182 panic("unreachable") |
182 panic("unreachable") |
183 } |
183 } |
184 } |
184 } |
185 } |
185 } |
186 |
186 |
|
187 // parseGenericRawTextElements implements the generic raw text element parsing |
|
188 // algorithm defined in 12.2.6.2. |
|
189 // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text |
|
190 // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part |
|
191 // officially, need to make tokenizer consider both states. |
|
192 func (p *parser) parseGenericRawTextElement() { |
|
193 p.addElement() |
|
194 p.originalIM = p.im |
|
195 p.im = textIM |
|
196 } |
|
197 |
187 // generateImpliedEndTags pops nodes off the stack of open elements as long as |
198 // generateImpliedEndTags pops nodes off the stack of open elements as long as |
188 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. |
199 // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. |
189 // If exceptions are specified, nodes with that name will not be popped off. |
200 // If exceptions are specified, nodes with that name will not be popped off. |
190 func (p *parser) generateImpliedEndTags(exceptions ...string) { |
201 func (p *parser) generateImpliedEndTags(exceptions ...string) { |
191 var i int |
202 var i int |
192 loop: |
203 loop: |
193 for i = len(p.oe) - 1; i >= 0; i-- { |
204 for i = len(p.oe) - 1; i >= 0; i-- { |
194 n := p.oe[i] |
205 n := p.oe[i] |
195 if n.Type == ElementNode { |
206 if n.Type != ElementNode { |
196 switch n.DataAtom { |
207 break |
197 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: |
208 } |
198 for _, except := range exceptions { |
209 switch n.DataAtom { |
199 if n.Data == except { |
210 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: |
200 break loop |
211 for _, except := range exceptions { |
201 } |
212 if n.Data == except { |
202 } |
213 break loop |
203 continue |
214 } |
204 } |
215 } |
|
216 continue |
205 } |
217 } |
206 break |
218 break |
207 } |
219 } |
208 |
220 |
209 p.oe = p.oe[:i+1] |
221 p.oe = p.oe[:i+1] |
830 } |
894 } |
831 p.oe = p.oe[:1] |
895 p.oe = p.oe[:1] |
832 p.addElement() |
896 p.addElement() |
833 p.im = inFramesetIM |
897 p.im = inFramesetIM |
834 return true |
898 return true |
835 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: |
899 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: |
836 p.popUntil(buttonScope, a.P) |
900 p.popUntil(buttonScope, a.P) |
837 p.addElement() |
901 p.addElement() |
838 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: |
902 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: |
839 p.popUntil(buttonScope, a.P) |
903 p.popUntil(buttonScope, a.P) |
840 switch n := p.top(); n.DataAtom { |
904 switch n := p.top(); n.DataAtom { |
963 p.framesetOK = false |
1027 p.framesetOK = false |
964 case a.Image: |
1028 case a.Image: |
965 p.tok.DataAtom = a.Img |
1029 p.tok.DataAtom = a.Img |
966 p.tok.Data = a.Img.String() |
1030 p.tok.Data = a.Img.String() |
967 return false |
1031 return false |
968 case a.Isindex: |
|
969 if p.form != nil { |
|
970 // Ignore the token. |
|
971 return true |
|
972 } |
|
973 action := "" |
|
974 prompt := "This is a searchable index. Enter search keywords: " |
|
975 attr := []Attribute{{Key: "name", Val: "isindex"}} |
|
976 for _, t := range p.tok.Attr { |
|
977 switch t.Key { |
|
978 case "action": |
|
979 action = t.Val |
|
980 case "name": |
|
981 // Ignore the attribute. |
|
982 case "prompt": |
|
983 prompt = t.Val |
|
984 default: |
|
985 attr = append(attr, t) |
|
986 } |
|
987 } |
|
988 p.acknowledgeSelfClosingTag() |
|
989 p.popUntil(buttonScope, a.P) |
|
990 p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) |
|
991 if p.form == nil { |
|
992 // NOTE: The 'isindex' element has been removed, |
|
993 // and the 'template' element has not been designed to be |
|
994 // collaborative with the index element. |
|
995 // |
|
996 // Ignore the token. |
|
997 return true |
|
998 } |
|
999 if action != "" { |
|
1000 p.form.Attr = []Attribute{{Key: "action", Val: action}} |
|
1001 } |
|
1002 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) |
|
1003 p.parseImpliedToken(StartTagToken, a.Label, a.Label.String()) |
|
1004 p.addText(prompt) |
|
1005 p.addChild(&Node{ |
|
1006 Type: ElementNode, |
|
1007 DataAtom: a.Input, |
|
1008 Data: a.Input.String(), |
|
1009 Attr: attr, |
|
1010 }) |
|
1011 p.oe.pop() |
|
1012 p.parseImpliedToken(EndTagToken, a.Label, a.Label.String()) |
|
1013 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) |
|
1014 p.parseImpliedToken(EndTagToken, a.Form, a.Form.String()) |
|
1015 case a.Textarea: |
1032 case a.Textarea: |
1016 p.addElement() |
1033 p.addElement() |
1017 p.setOriginalIM() |
1034 p.setOriginalIM() |
1018 p.framesetOK = false |
1035 p.framesetOK = false |
1019 p.im = textIM |
1036 p.im = textIM |
1020 case a.Xmp: |
1037 case a.Xmp: |
1021 p.popUntil(buttonScope, a.P) |
1038 p.popUntil(buttonScope, a.P) |
1022 p.reconstructActiveFormattingElements() |
1039 p.reconstructActiveFormattingElements() |
1023 p.framesetOK = false |
1040 p.framesetOK = false |
1024 p.addElement() |
1041 p.parseGenericRawTextElement() |
1025 p.setOriginalIM() |
|
1026 p.im = textIM |
|
1027 case a.Iframe: |
1042 case a.Iframe: |
1028 p.framesetOK = false |
1043 p.framesetOK = false |
1029 p.addElement() |
1044 p.parseGenericRawTextElement() |
1030 p.setOriginalIM() |
1045 case a.Noembed: |
1031 p.im = textIM |
1046 p.parseGenericRawTextElement() |
1032 case a.Noembed, a.Noscript: |
1047 case a.Noscript: |
1033 p.addElement() |
1048 if p.scripting { |
1034 p.setOriginalIM() |
1049 p.parseGenericRawTextElement() |
1035 p.im = textIM |
1050 return true |
|
1051 } |
|
1052 p.reconstructActiveFormattingElements() |
|
1053 p.addElement() |
|
1054 // Don't let the tokenizer go into raw text mode when scripting is disabled. |
|
1055 p.tokenizer.NextIsNotRawText() |
1036 case a.Select: |
1056 case a.Select: |
1037 p.reconstructActiveFormattingElements() |
1057 p.reconstructActiveFormattingElements() |
1038 p.addElement() |
1058 p.addElement() |
1039 p.framesetOK = false |
1059 p.framesetOK = false |
1040 p.im = inSelectIM |
1060 p.im = inSelectIM |
1086 if p.elementInScope(defaultScope, a.Body) { |
1106 if p.elementInScope(defaultScope, a.Body) { |
1087 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String()) |
1107 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String()) |
1088 return false |
1108 return false |
1089 } |
1109 } |
1090 return true |
1110 return true |
1091 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: |
1111 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: |
1092 p.popUntil(defaultScope, p.tok.DataAtom) |
1112 p.popUntil(defaultScope, p.tok.DataAtom) |
1093 case a.Form: |
1113 case a.Form: |
1094 if p.oe.contains(a.Template) { |
1114 if p.oe.contains(a.Template) { |
1095 i := p.indexOfElementInScope(defaultScope, a.Form) |
1115 i := p.indexOfElementInScope(defaultScope, a.Form) |
1096 if i == -1 { |
1116 if i == -1 { |
1124 case a.Dd, a.Dt: |
1144 case a.Dd, a.Dt: |
1125 p.popUntil(defaultScope, p.tok.DataAtom) |
1145 p.popUntil(defaultScope, p.tok.DataAtom) |
1126 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: |
1146 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: |
1127 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) |
1147 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) |
1128 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: |
1148 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: |
1129 p.inBodyEndTagFormatting(p.tok.DataAtom) |
1149 p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data) |
1130 case a.Applet, a.Marquee, a.Object: |
1150 case a.Applet, a.Marquee, a.Object: |
1131 if p.popUntil(defaultScope, p.tok.DataAtom) { |
1151 if p.popUntil(defaultScope, p.tok.DataAtom) { |
1132 p.clearActiveFormattingElements() |
1152 p.clearActiveFormattingElements() |
1133 } |
1153 } |
1134 case a.Br: |
1154 case a.Br: |
1135 p.tok.Type = StartTagToken |
1155 p.tok.Type = StartTagToken |
1136 return false |
1156 return false |
1137 case a.Template: |
1157 case a.Template: |
1138 return inHeadIM(p) |
1158 return inHeadIM(p) |
1139 default: |
1159 default: |
1140 p.inBodyEndTagOther(p.tok.DataAtom) |
1160 p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data) |
1141 } |
1161 } |
1142 case CommentToken: |
1162 case CommentToken: |
1143 p.addChild(&Node{ |
1163 p.addChild(&Node{ |
1144 Type: CommentNode, |
1164 Type: CommentNode, |
1145 Data: p.tok.Data, |
1165 Data: p.tok.Data, |
1147 case ErrorToken: |
1167 case ErrorToken: |
1148 // TODO: remove this divergence from the HTML5 spec. |
1168 // TODO: remove this divergence from the HTML5 spec. |
1149 if len(p.templateStack) > 0 { |
1169 if len(p.templateStack) > 0 { |
1150 p.im = inTemplateIM |
1170 p.im = inTemplateIM |
1151 return false |
1171 return false |
1152 } else { |
1172 } |
1153 for _, e := range p.oe { |
1173 for _, e := range p.oe { |
1154 switch e.DataAtom { |
1174 switch e.DataAtom { |
1155 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, |
1175 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, |
1156 a.Thead, a.Tr, a.Body, a.Html: |
1176 a.Thead, a.Tr, a.Body, a.Html: |
1157 default: |
1177 default: |
1158 return true |
1178 return true |
1159 } |
|
1160 } |
1179 } |
1161 } |
1180 } |
1162 } |
1181 } |
1163 |
1182 |
1164 return true |
1183 return true |
1165 } |
1184 } |
1166 |
1185 |
1167 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { |
1186 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { |
1168 // This is the "adoption agency" algorithm, described at |
1187 // This is the "adoption agency" algorithm, described at |
1169 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency |
1188 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency |
1170 |
1189 |
1171 // TODO: this is a fairly literal line-by-line translation of that algorithm. |
1190 // TODO: this is a fairly literal line-by-line translation of that algorithm. |
1172 // Once the code successfully parses the comprehensive test suite, we should |
1191 // Once the code successfully parses the comprehensive test suite, we should |
1173 // refactor this code to be more idiomatic. |
1192 // refactor this code to be more idiomatic. |
1174 |
1193 |
1175 // Steps 1-4. The outer loop. |
1194 // Steps 1-2 |
|
1195 if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 { |
|
1196 p.oe.pop() |
|
1197 return |
|
1198 } |
|
1199 |
|
1200 // Steps 3-5. The outer loop. |
1176 for i := 0; i < 8; i++ { |
1201 for i := 0; i < 8; i++ { |
1177 // Step 5. Find the formatting element. |
1202 // Step 6. Find the formatting element. |
1178 var formattingElement *Node |
1203 var formattingElement *Node |
1179 for j := len(p.afe) - 1; j >= 0; j-- { |
1204 for j := len(p.afe) - 1; j >= 0; j-- { |
1180 if p.afe[j].Type == scopeMarkerNode { |
1205 if p.afe[j].Type == scopeMarkerNode { |
1181 break |
1206 break |
1182 } |
1207 } |
1214 } |
1244 } |
1215 p.afe.remove(e) |
1245 p.afe.remove(e) |
1216 return |
1246 return |
1217 } |
1247 } |
1218 |
1248 |
1219 // Steps 11-12. Find the common ancestor and bookmark node. |
1249 // Steps 12-13. Find the common ancestor and bookmark node. |
1220 commonAncestor := p.oe[feIndex-1] |
1250 commonAncestor := p.oe[feIndex-1] |
1221 bookmark := p.afe.index(formattingElement) |
1251 bookmark := p.afe.index(formattingElement) |
1222 |
1252 |
1223 // Step 13. The inner loop. Find the lastNode to reparent. |
1253 // Step 14. The inner loop. Find the lastNode to reparent. |
1224 lastNode := furthestBlock |
1254 lastNode := furthestBlock |
1225 node := furthestBlock |
1255 node := furthestBlock |
1226 x := p.oe.index(node) |
1256 x := p.oe.index(node) |
1227 // Steps 13.1-13.2 |
1257 // Step 14.1. |
1228 for j := 0; j < 3; j++ { |
1258 j := 0 |
1229 // Step 13.3. |
1259 for { |
|
1260 // Step 14.2. |
|
1261 j++ |
|
1262 // Step. 14.3. |
1230 x-- |
1263 x-- |
1231 node = p.oe[x] |
1264 node = p.oe[x] |
1232 // Step 13.4 - 13.5. |
1265 // Step 14.4. Go to the next step if node is formatting element. |
|
1266 if node == formattingElement { |
|
1267 break |
|
1268 } |
|
1269 // Step 14.5. Remove node from the list of active formatting elements if |
|
1270 // inner loop counter is greater than three and node is in the list of |
|
1271 // active formatting elements. |
|
1272 if ni := p.afe.index(node); j > 3 && ni > -1 { |
|
1273 p.afe.remove(node) |
|
1274 // If any element of the list of active formatting elements is removed, |
|
1275 // we need to take care whether bookmark should be decremented or not. |
|
1276 // This is because the value of bookmark may exceed the size of the |
|
1277 // list by removing elements from the list. |
|
1278 if ni <= bookmark { |
|
1279 bookmark-- |
|
1280 } |
|
1281 continue |
|
1282 } |
|
1283 // Step 14.6. Continue the next inner loop if node is not in the list of |
|
1284 // active formatting elements. |
1233 if p.afe.index(node) == -1 { |
1285 if p.afe.index(node) == -1 { |
1234 p.oe.remove(node) |
1286 p.oe.remove(node) |
1235 continue |
1287 continue |
1236 } |
1288 } |
1237 // Step 13.6. |
1289 // Step 14.7. |
1238 if node == formattingElement { |
|
1239 break |
|
1240 } |
|
1241 // Step 13.7. |
|
1242 clone := node.clone() |
1290 clone := node.clone() |
1243 p.afe[p.afe.index(node)] = clone |
1291 p.afe[p.afe.index(node)] = clone |
1244 p.oe[p.oe.index(node)] = clone |
1292 p.oe[p.oe.index(node)] = clone |
1245 node = clone |
1293 node = clone |
1246 // Step 13.8. |
1294 // Step 14.8. |
1247 if lastNode == furthestBlock { |
1295 if lastNode == furthestBlock { |
1248 bookmark = p.afe.index(node) + 1 |
1296 bookmark = p.afe.index(node) + 1 |
1249 } |
1297 } |
1250 // Step 13.9. |
1298 // Step 14.9. |
1251 if lastNode.Parent != nil { |
1299 if lastNode.Parent != nil { |
1252 lastNode.Parent.RemoveChild(lastNode) |
1300 lastNode.Parent.RemoveChild(lastNode) |
1253 } |
1301 } |
1254 node.AppendChild(lastNode) |
1302 node.AppendChild(lastNode) |
1255 // Step 13.10. |
1303 // Step 14.10. |
1256 lastNode = node |
1304 lastNode = node |
1257 } |
1305 } |
1258 |
1306 |
1259 // Step 14. Reparent lastNode to the common ancestor, |
1307 // Step 15. Reparent lastNode to the common ancestor, |
1260 // or for misnested table nodes, to the foster parent. |
1308 // or for misnested table nodes, to the foster parent. |
1261 if lastNode.Parent != nil { |
1309 if lastNode.Parent != nil { |
1262 lastNode.Parent.RemoveChild(lastNode) |
1310 lastNode.Parent.RemoveChild(lastNode) |
1263 } |
1311 } |
1264 switch commonAncestor.DataAtom { |
1312 switch commonAncestor.DataAtom { |
1265 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: |
1313 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: |
1266 p.fosterParent(lastNode) |
1314 p.fosterParent(lastNode) |
1267 case a.Template: |
|
1268 // TODO: remove namespace checking |
|
1269 if commonAncestor.Namespace == "html" { |
|
1270 commonAncestor = commonAncestor.LastChild |
|
1271 } |
|
1272 fallthrough |
|
1273 default: |
1315 default: |
1274 commonAncestor.AppendChild(lastNode) |
1316 commonAncestor.AppendChild(lastNode) |
1275 } |
1317 } |
1276 |
1318 |
1277 // Steps 15-17. Reparent nodes from the furthest block's children |
1319 // Steps 16-18. Reparent nodes from the furthest block's children |
1278 // to a clone of the formatting element. |
1320 // to a clone of the formatting element. |
1279 clone := formattingElement.clone() |
1321 clone := formattingElement.clone() |
1280 reparentChildren(clone, furthestBlock) |
1322 reparentChildren(clone, furthestBlock) |
1281 furthestBlock.AppendChild(clone) |
1323 furthestBlock.AppendChild(clone) |
1282 |
1324 |
1283 // Step 18. Fix up the list of active formatting elements. |
1325 // Step 19. Fix up the list of active formatting elements. |
1284 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { |
1326 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { |
1285 // Move the bookmark with the rest of the list. |
1327 // Move the bookmark with the rest of the list. |
1286 bookmark-- |
1328 bookmark-- |
1287 } |
1329 } |
1288 p.afe.remove(formattingElement) |
1330 p.afe.remove(formattingElement) |
1289 p.afe.insert(bookmark, clone) |
1331 p.afe.insert(bookmark, clone) |
1290 |
1332 |
1291 // Step 19. Fix up the stack of open elements. |
1333 // Step 20. Fix up the stack of open elements. |
1292 p.oe.remove(formattingElement) |
1334 p.oe.remove(formattingElement) |
1293 p.oe.insert(p.oe.index(furthestBlock)+1, clone) |
1335 p.oe.insert(p.oe.index(furthestBlock)+1, clone) |
1294 } |
1336 } |
1295 } |
1337 } |
1296 |
1338 |
1297 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. |
1339 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. |
1298 // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content |
1340 // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content |
1299 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign |
1341 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign |
1300 func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { |
1342 func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) { |
1301 for i := len(p.oe) - 1; i >= 0; i-- { |
1343 for i := len(p.oe) - 1; i >= 0; i-- { |
1302 if p.oe[i].DataAtom == tagAtom { |
1344 // Two element nodes have the same tag if they have the same Data (a |
|
1345 // string-typed field). As an optimization, for common HTML tags, each |
|
1346 // Data string is assigned a unique, non-zero DataAtom (a uint32-typed |
|
1347 // field), since integer comparison is faster than string comparison. |
|
1348 // Uncommon (custom) tags get a zero DataAtom. |
|
1349 // |
|
1350 // The if condition here is equivalent to (p.oe[i].Data == tagName). |
|
1351 if (p.oe[i].DataAtom == tagAtom) && |
|
1352 ((tagAtom != 0) || (p.oe[i].Data == tagName)) { |
1303 p.oe = p.oe[:i] |
1353 p.oe = p.oe[:i] |
1304 break |
1354 break |
1305 } |
1355 } |
1306 if isSpecialElement(p.oe[i]) { |
1356 if isSpecialElement(p.oe[i]) { |
1307 break |
1357 break |
1779 func inSelectInTableIM(p *parser) bool { |
1833 func inSelectInTableIM(p *parser) bool { |
1780 switch p.tok.Type { |
1834 switch p.tok.Type { |
1781 case StartTagToken, EndTagToken: |
1835 case StartTagToken, EndTagToken: |
1782 switch p.tok.DataAtom { |
1836 switch p.tok.DataAtom { |
1783 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: |
1837 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: |
1784 if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) { |
1838 if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) { |
1785 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) |
|
1786 return false |
|
1787 } else { |
|
1788 // Ignore the token. |
1839 // Ignore the token. |
1789 return true |
1840 return true |
1790 } |
1841 } |
|
1842 // This is like p.popUntil(selectScope, a.Select), but it also |
|
1843 // matches <math select>, not just <select>. Matching the MathML |
|
1844 // tag is arguably incorrect (conceptually), but it mimics what |
|
1845 // Chromium does. |
|
1846 for i := len(p.oe) - 1; i >= 0; i-- { |
|
1847 if n := p.oe[i]; n.DataAtom == a.Select { |
|
1848 p.oe = p.oe[:i] |
|
1849 break |
|
1850 } |
|
1851 } |
|
1852 p.resetInsertionMode() |
|
1853 return false |
1791 } |
1854 } |
1792 } |
1855 } |
1793 return inSelectIM(p) |
1856 return inSelectIM(p) |
1794 } |
1857 } |
1795 |
1858 |
2230 // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped, |
2304 // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped, |
2231 // with no corresponding node in the resulting tree. |
2305 // with no corresponding node in the resulting tree. |
2232 // |
2306 // |
2233 // The input is assumed to be UTF-8 encoded. |
2307 // The input is assumed to be UTF-8 encoded. |
2234 func Parse(r io.Reader) (*Node, error) { |
2308 func Parse(r io.Reader) (*Node, error) { |
|
2309 return ParseWithOptions(r) |
|
2310 } |
|
2311 |
|
2312 // ParseFragment parses a fragment of HTML and returns the nodes that were |
|
2313 // found. If the fragment is the InnerHTML for an existing element, pass that |
|
2314 // element in context. |
|
2315 // |
|
2316 // It has the same intricacies as Parse. |
|
2317 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { |
|
2318 return ParseFragmentWithOptions(r, context) |
|
2319 } |
|
2320 |
|
2321 // ParseOption configures a parser. |
|
2322 type ParseOption func(p *parser) |
|
2323 |
|
2324 // ParseOptionEnableScripting configures the scripting flag. |
|
2325 // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting |
|
2326 // |
|
2327 // By default, scripting is enabled. |
|
2328 func ParseOptionEnableScripting(enable bool) ParseOption { |
|
2329 return func(p *parser) { |
|
2330 p.scripting = enable |
|
2331 } |
|
2332 } |
|
2333 |
|
2334 // ParseWithOptions is like Parse, with options. |
|
2335 func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) { |
2235 p := &parser{ |
2336 p := &parser{ |
2236 tokenizer: NewTokenizer(r), |
2337 tokenizer: NewTokenizer(r), |
2237 doc: &Node{ |
2338 doc: &Node{ |
2238 Type: DocumentNode, |
2339 Type: DocumentNode, |
2239 }, |
2340 }, |
2240 scripting: true, |
2341 scripting: true, |
2241 framesetOK: true, |
2342 framesetOK: true, |
2242 im: initialIM, |
2343 im: initialIM, |
2243 } |
2344 } |
2244 err := p.parse() |
2345 |
2245 if err != nil { |
2346 for _, f := range opts { |
|
2347 f(p) |
|
2348 } |
|
2349 |
|
2350 if err := p.parse(); err != nil { |
2246 return nil, err |
2351 return nil, err |
2247 } |
2352 } |
2248 return p.doc, nil |
2353 return p.doc, nil |
2249 } |
2354 } |
2250 |
2355 |
2251 // ParseFragment parses a fragment of HTML and returns the nodes that were |
2356 // ParseFragmentWithOptions is like ParseFragment, with options. |
2252 // found. If the fragment is the InnerHTML for an existing element, pass that |
2357 func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) { |
2253 // element in context. |
|
2254 // |
|
2255 // It has the same intricacies as Parse. |
|
2256 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { |
|
2257 contextTag := "" |
2358 contextTag := "" |
2258 if context != nil { |
2359 if context != nil { |
2259 if context.Type != ElementNode { |
2360 if context.Type != ElementNode { |
2260 return nil, errors.New("html: ParseFragment of non-element Node") |
2361 return nil, errors.New("html: ParseFragment of non-element Node") |
2261 } |
2362 } |