--- a/vendor/golang.org/x/net/html/parse.go Wed Sep 18 19:17:42 2019 +0200
+++ b/vendor/golang.org/x/net/html/parse.go Sun Feb 16 18:54:01 2020 +0100
@@ -184,6 +184,17 @@
}
}
+// parseGenericRawTextElements implements the generic raw text element parsing
+// algorithm defined in 12.2.6.2.
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
+// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
+// officially, need to make tokenizer consider both states.
+func (p *parser) parseGenericRawTextElement() {
+ p.addElement()
+ p.originalIM = p.im
+ p.im = textIM
+}
+
// generateImpliedEndTags pops nodes off the stack of open elements as long as
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
// If exceptions are specified, nodes with that name will not be popped off.
@@ -192,16 +203,17 @@
loop:
for i = len(p.oe) - 1; i >= 0; i-- {
n := p.oe[i]
- if n.Type == ElementNode {
- switch n.DataAtom {
- case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
- for _, except := range exceptions {
- if n.Data == except {
- break loop
- }
+ if n.Type != ElementNode {
+ break
+ }
+ switch n.DataAtom {
+ case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
+ for _, except := range exceptions {
+ if n.Data == except {
+ break loop
}
- continue
}
+ continue
}
break
}
@@ -369,8 +381,7 @@
// Section 12.2.4.3.
func (p *parser) clearActiveFormattingElements() {
for {
- n := p.afe.pop()
- if len(p.afe) == 0 || n.Type == scopeMarkerNode {
+ if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
return
}
}
@@ -439,9 +450,6 @@
case a.Select:
if !last {
for ancestor, first := n, p.oe[0]; ancestor != first; {
- if ancestor == first {
- break
- }
ancestor = p.oe[p.oe.index(ancestor)-1]
switch ancestor.DataAtom {
case a.Template:
@@ -628,16 +636,29 @@
switch p.tok.DataAtom {
case a.Html:
return inBodyIM(p)
- case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
+ case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
p.addElement()
p.oe.pop()
p.acknowledgeSelfClosingTag()
return true
- case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
+ case a.Noscript:
+ if p.scripting {
+ p.parseGenericRawTextElement()
+ return true
+ }
+ p.addElement()
+ p.im = inHeadNoscriptIM
+ // Don't let the tokenizer go into raw text mode when scripting is disabled.
+ p.tokenizer.NextIsNotRawText()
+ return true
+ case a.Script, a.Title:
p.addElement()
p.setOriginalIM()
p.im = textIM
return true
+ case a.Noframes, a.Style:
+ p.parseGenericRawTextElement()
+ return true
case a.Head:
// Ignore the token.
return true
@@ -695,6 +716,49 @@
return false
}
+// 12.2.6.4.5.
+func inHeadNoscriptIM(p *parser) bool {
+ switch p.tok.Type {
+ case DoctypeToken:
+ // Ignore the token.
+ return true
+ case StartTagToken:
+ switch p.tok.DataAtom {
+ case a.Html:
+ return inBodyIM(p)
+ case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
+ return inHeadIM(p)
+ case a.Head, a.Noscript:
+ // Ignore the token.
+ return true
+ }
+ case EndTagToken:
+ switch p.tok.DataAtom {
+ case a.Noscript, a.Br:
+ default:
+ // Ignore the token.
+ return true
+ }
+ case TextToken:
+ s := strings.TrimLeft(p.tok.Data, whitespace)
+ if len(s) == 0 {
+ // It was all whitespace.
+ return inHeadIM(p)
+ }
+ case CommentToken:
+ return inHeadIM(p)
+ }
+ p.oe.pop()
+ if p.top().DataAtom != a.Head {
+ panic("html: the new current node will be a head element.")
+ }
+ p.im = inHeadIM
+ if p.tok.DataAtom == a.Noscript {
+ return true
+ }
+ return false
+}
+
// Section 12.2.6.4.6.
func afterHeadIM(p *parser) bool {
switch p.tok.Type {
@@ -806,7 +870,7 @@
return true
}
copyAttributes(p.oe[0], p.tok)
- case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+ case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
return inHeadIM(p)
case a.Body:
if p.oe.contains(a.Template) {
@@ -832,7 +896,7 @@
p.addElement()
p.im = inFramesetIM
return true
- case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+ case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
p.popUntil(buttonScope, a.P)
p.addElement()
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
@@ -904,7 +968,7 @@
case a.A:
for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
- p.inBodyEndTagFormatting(a.A)
+ p.inBodyEndTagFormatting(a.A, "a")
p.oe.remove(n)
p.afe.remove(n)
break
@@ -918,7 +982,7 @@
case a.Nobr:
p.reconstructActiveFormattingElements()
if p.elementInScope(defaultScope, a.Nobr) {
- p.inBodyEndTagFormatting(a.Nobr)
+ p.inBodyEndTagFormatting(a.Nobr, "nobr")
p.reconstructActiveFormattingElements()
}
p.addFormattingElement()
@@ -965,53 +1029,6 @@
p.tok.DataAtom = a.Img
p.tok.Data = a.Img.String()
return false
- case a.Isindex:
- if p.form != nil {
- // Ignore the token.
- return true
- }
- action := ""
- prompt := "This is a searchable index. Enter search keywords: "
- attr := []Attribute{{Key: "name", Val: "isindex"}}
- for _, t := range p.tok.Attr {
- switch t.Key {
- case "action":
- action = t.Val
- case "name":
- // Ignore the attribute.
- case "prompt":
- prompt = t.Val
- default:
- attr = append(attr, t)
- }
- }
- p.acknowledgeSelfClosingTag()
- p.popUntil(buttonScope, a.P)
- p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
- if p.form == nil {
- // NOTE: The 'isindex' element has been removed,
- // and the 'template' element has not been designed to be
- // collaborative with the index element.
- //
- // Ignore the token.
- return true
- }
- if action != "" {
- p.form.Attr = []Attribute{{Key: "action", Val: action}}
- }
- p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
- p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
- p.addText(prompt)
- p.addChild(&Node{
- Type: ElementNode,
- DataAtom: a.Input,
- Data: a.Input.String(),
- Attr: attr,
- })
- p.oe.pop()
- p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
- p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
- p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
case a.Textarea:
p.addElement()
p.setOriginalIM()
@@ -1021,18 +1038,21 @@
p.popUntil(buttonScope, a.P)
p.reconstructActiveFormattingElements()
p.framesetOK = false
- p.addElement()
- p.setOriginalIM()
- p.im = textIM
+ p.parseGenericRawTextElement()
case a.Iframe:
p.framesetOK = false
+ p.parseGenericRawTextElement()
+ case a.Noembed:
+ p.parseGenericRawTextElement()
+ case a.Noscript:
+ if p.scripting {
+ p.parseGenericRawTextElement()
+ return true
+ }
+ p.reconstructActiveFormattingElements()
p.addElement()
- p.setOriginalIM()
- p.im = textIM
- case a.Noembed, a.Noscript:
- p.addElement()
- p.setOriginalIM()
- p.im = textIM
+ // Don't let the tokenizer go into raw text mode when scripting is disabled.
+ p.tokenizer.NextIsNotRawText()
case a.Select:
p.reconstructActiveFormattingElements()
p.addElement()
@@ -1088,7 +1108,7 @@
return false
}
return true
- case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+ case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
p.popUntil(defaultScope, p.tok.DataAtom)
case a.Form:
if p.oe.contains(a.Template) {
@@ -1126,7 +1146,7 @@
case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
- p.inBodyEndTagFormatting(p.tok.DataAtom)
+ p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
case a.Applet, a.Marquee, a.Object:
if p.popUntil(defaultScope, p.tok.DataAtom) {
p.clearActiveFormattingElements()
@@ -1137,7 +1157,7 @@
case a.Template:
return inHeadIM(p)
default:
- p.inBodyEndTagOther(p.tok.DataAtom)
+ p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
}
case CommentToken:
p.addChild(&Node{
@@ -1149,14 +1169,13 @@
if len(p.templateStack) > 0 {
p.im = inTemplateIM
return false
- } else {
- for _, e := range p.oe {
- switch e.DataAtom {
- case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
- a.Thead, a.Tr, a.Body, a.Html:
- default:
- return true
- }
+ }
+ for _, e := range p.oe {
+ switch e.DataAtom {
+ case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+ a.Thead, a.Tr, a.Body, a.Html:
+ default:
+ return true
}
}
}
@@ -1164,7 +1183,7 @@
return true
}
-func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
// This is the "adoption agency" algorithm, described at
// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
@@ -1172,9 +1191,15 @@
// Once the code successfully parses the comprehensive test suite, we should
// refactor this code to be more idiomatic.
- // Steps 1-4. The outer loop.
+ // Steps 1-2
+ if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
+ p.oe.pop()
+ return
+ }
+
+ // Steps 3-5. The outer loop.
for i := 0; i < 8; i++ {
- // Step 5. Find the formatting element.
+ // Step 6. Find the formatting element.
var formattingElement *Node
for j := len(p.afe) - 1; j >= 0; j-- {
if p.afe[j].Type == scopeMarkerNode {
@@ -1186,20 +1211,25 @@
}
}
if formattingElement == nil {
- p.inBodyEndTagOther(tagAtom)
+ p.inBodyEndTagOther(tagAtom, tagName)
return
}
+
+ // Step 7. Ignore the tag if formatting element is not in the stack of open elements.
feIndex := p.oe.index(formattingElement)
if feIndex == -1 {
p.afe.remove(formattingElement)
return
}
+ // Step 8. Ignore the tag if formatting element is not in the scope.
if !p.elementInScope(defaultScope, tagAtom) {
// Ignore the tag.
return
}
- // Steps 9-10. Find the furthest block.
+ // Step 9. This step is omitted because it's just a parse error but no need to return.
+
+ // Steps 10-11. Find the furthest block.
var furthestBlock *Node
for _, e := range p.oe[feIndex:] {
if isSpecialElement(e) {
@@ -1216,47 +1246,65 @@
return
}
- // Steps 11-12. Find the common ancestor and bookmark node.
+ // Steps 12-13. Find the common ancestor and bookmark node.
commonAncestor := p.oe[feIndex-1]
bookmark := p.afe.index(formattingElement)
- // Step 13. The inner loop. Find the lastNode to reparent.
+ // Step 14. The inner loop. Find the lastNode to reparent.
lastNode := furthestBlock
node := furthestBlock
x := p.oe.index(node)
- // Steps 13.1-13.2
- for j := 0; j < 3; j++ {
- // Step 13.3.
+ // Step 14.1.
+ j := 0
+ for {
+ // Step 14.2.
+ j++
+ // Step. 14.3.
x--
node = p.oe[x]
- // Step 13.4 - 13.5.
+ // Step 14.4. Go to the next step if node is formatting element.
+ if node == formattingElement {
+ break
+ }
+ // Step 14.5. Remove node from the list of active formatting elements if
+ // inner loop counter is greater than three and node is in the list of
+ // active formatting elements.
+ if ni := p.afe.index(node); j > 3 && ni > -1 {
+ p.afe.remove(node)
+ // If any element of the list of active formatting elements is removed,
+ // we need to take care whether bookmark should be decremented or not.
+ // This is because the value of bookmark may exceed the size of the
+ // list by removing elements from the list.
+ if ni <= bookmark {
+ bookmark--
+ }
+ continue
+ }
+ // Step 14.6. Continue the next inner loop if node is not in the list of
+ // active formatting elements.
if p.afe.index(node) == -1 {
p.oe.remove(node)
continue
}
- // Step 13.6.
- if node == formattingElement {
- break
- }
- // Step 13.7.
+ // Step 14.7.
clone := node.clone()
p.afe[p.afe.index(node)] = clone
p.oe[p.oe.index(node)] = clone
node = clone
- // Step 13.8.
+ // Step 14.8.
if lastNode == furthestBlock {
bookmark = p.afe.index(node) + 1
}
- // Step 13.9.
+ // Step 14.9.
if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode)
}
node.AppendChild(lastNode)
- // Step 13.10.
+ // Step 14.10.
lastNode = node
}
- // Step 14. Reparent lastNode to the common ancestor,
+ // Step 15. Reparent lastNode to the common ancestor,
// or for misnested table nodes, to the foster parent.
if lastNode.Parent != nil {
lastNode.Parent.RemoveChild(lastNode)
@@ -1264,23 +1312,17 @@
switch commonAncestor.DataAtom {
case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
p.fosterParent(lastNode)
- case a.Template:
- // TODO: remove namespace checking
- if commonAncestor.Namespace == "html" {
- commonAncestor = commonAncestor.LastChild
- }
- fallthrough
default:
commonAncestor.AppendChild(lastNode)
}
- // Steps 15-17. Reparent nodes from the furthest block's children
+ // Steps 16-18. Reparent nodes from the furthest block's children
// to a clone of the formatting element.
clone := formattingElement.clone()
reparentChildren(clone, furthestBlock)
furthestBlock.AppendChild(clone)
- // Step 18. Fix up the list of active formatting elements.
+ // Step 19. Fix up the list of active formatting elements.
if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
// Move the bookmark with the rest of the list.
bookmark--
@@ -1288,7 +1330,7 @@
p.afe.remove(formattingElement)
p.afe.insert(bookmark, clone)
- // Step 19. Fix up the stack of open elements.
+ // Step 20. Fix up the stack of open elements.
p.oe.remove(formattingElement)
p.oe.insert(p.oe.index(furthestBlock)+1, clone)
}
@@ -1297,9 +1339,17 @@
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
-func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
for i := len(p.oe) - 1; i >= 0; i-- {
- if p.oe[i].DataAtom == tagAtom {
+ // Two element nodes have the same tag if they have the same Data (a
+ // string-typed field). As an optimization, for common HTML tags, each
+ // Data string is assigned a unique, non-zero DataAtom (a uint32-typed
+ // field), since integer comparison is faster than string comparison.
+ // Uncommon (custom) tags get a zero DataAtom.
+ //
+ // The if condition here is equivalent to (p.oe[i].Data == tagName).
+ if (p.oe[i].DataAtom == tagAtom) &&
+ ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
p.oe = p.oe[:i]
break
}
@@ -1451,14 +1501,13 @@
case StartTagToken:
switch p.tok.DataAtom {
case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
- if p.popUntil(tableScope, a.Caption) {
- p.clearActiveFormattingElements()
- p.im = inTableIM
- return false
- } else {
+ if !p.popUntil(tableScope, a.Caption) {
// Ignore the token.
return true
}
+ p.clearActiveFormattingElements()
+ p.im = inTableIM
+ return false
case a.Select:
p.reconstructActiveFormattingElements()
p.addElement()
@@ -1475,14 +1524,13 @@
}
return true
case a.Table:
- if p.popUntil(tableScope, a.Caption) {
- p.clearActiveFormattingElements()
- p.im = inTableIM
- return false
- } else {
+ if !p.popUntil(tableScope, a.Caption) {
// Ignore the token.
return true
}
+ p.clearActiveFormattingElements()
+ p.im = inTableIM
+ return false
case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
// Ignore the token.
return true
@@ -1693,8 +1741,9 @@
return true
}
// Close the cell and reprocess.
- p.popUntil(tableScope, a.Td, a.Th)
- p.clearActiveFormattingElements()
+ if p.popUntil(tableScope, a.Td, a.Th) {
+ p.clearActiveFormattingElements()
+ }
p.im = inRowIM
return false
}
@@ -1725,8 +1774,11 @@
}
p.addElement()
case a.Select:
- p.tok.Type = EndTagToken
- return false
+ if !p.popUntil(selectScope, a.Select) {
+ // Ignore the token.
+ return true
+ }
+ p.resetInsertionMode()
case a.Input, a.Keygen, a.Textarea:
if p.elementInScope(selectScope, a.Select) {
p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
@@ -1754,9 +1806,11 @@
p.oe = p.oe[:i]
}
case a.Select:
- if p.popUntil(selectScope, a.Select) {
- p.resetInsertionMode()
+ if !p.popUntil(selectScope, a.Select) {
+ // Ignore the token.
+ return true
}
+ p.resetInsertionMode()
case a.Template:
return inHeadIM(p)
}
@@ -1781,13 +1835,22 @@
case StartTagToken, EndTagToken:
switch p.tok.DataAtom {
case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
- if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
- p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
- return false
- } else {
+ if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
// Ignore the token.
return true
}
+ // This is like p.popUntil(selectScope, a.Select), but it also
+ // matches <math select>, not just <select>. Matching the MathML
+ // tag is arguably incorrect (conceptually), but it mimics what
+ // Chromium does.
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if n := p.oe[i]; n.DataAtom == a.Select {
+ p.oe = p.oe[:i]
+ break
+ }
+ }
+ p.resetInsertionMode()
+ return false
}
}
return inSelectIM(p)
@@ -2068,28 +2131,31 @@
Data: p.tok.Data,
})
case StartTagToken:
- b := breakout[p.tok.Data]
- if p.tok.DataAtom == a.Font {
- loop:
- for _, attr := range p.tok.Attr {
- switch attr.Key {
- case "color", "face", "size":
- b = true
- break loop
+ if !p.fragment {
+ b := breakout[p.tok.Data]
+ if p.tok.DataAtom == a.Font {
+ loop:
+ for _, attr := range p.tok.Attr {
+ switch attr.Key {
+ case "color", "face", "size":
+ b = true
+ break loop
+ }
}
}
+ if b {
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ n := p.oe[i]
+ if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+ p.oe = p.oe[:i+1]
+ break
+ }
+ }
+ return false
+ }
}
- if b {
- for i := len(p.oe) - 1; i >= 0; i-- {
- n := p.oe[i]
- if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
- p.oe = p.oe[:i+1]
- break
- }
- }
- return false
- }
- switch p.top().Namespace {
+ current := p.adjustedCurrentNode()
+ switch current.Namespace {
case "math":
adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
case "svg":
@@ -2104,7 +2170,7 @@
panic("html: bad parser state: unexpected namespace")
}
adjustForeignAttributes(p.tok.Attr)
- namespace := p.top().Namespace
+ namespace := current.Namespace
p.addElement()
p.top().Namespace = namespace
if namespace != "" {
@@ -2133,12 +2199,20 @@
return true
}
+// Section 12.2.4.2.
+func (p *parser) adjustedCurrentNode() *Node {
+ if len(p.oe) == 1 && p.fragment && p.context != nil {
+ return p.context
+ }
+ return p.oe.top()
+}
+
// Section 12.2.6.
func (p *parser) inForeignContent() bool {
if len(p.oe) == 0 {
return false
}
- n := p.oe[len(p.oe)-1]
+ n := p.adjustedCurrentNode()
if n.Namespace == "" {
return false
}
@@ -2232,6 +2306,33 @@
//
// The input is assumed to be UTF-8 encoded.
func Parse(r io.Reader) (*Node, error) {
+ return ParseWithOptions(r)
+}
+
+// ParseFragment parses a fragment of HTML and returns the nodes that were
+// found. If the fragment is the InnerHTML for an existing element, pass that
+// element in context.
+//
+// It has the same intricacies as Parse.
+func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+ return ParseFragmentWithOptions(r, context)
+}
+
+// ParseOption configures a parser.
+type ParseOption func(p *parser)
+
+// ParseOptionEnableScripting configures the scripting flag.
+// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
+//
+// By default, scripting is enabled.
+func ParseOptionEnableScripting(enable bool) ParseOption {
+ return func(p *parser) {
+ p.scripting = enable
+ }
+}
+
+// ParseWithOptions is like Parse, with options.
+func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
p := &parser{
tokenizer: NewTokenizer(r),
doc: &Node{
@@ -2241,19 +2342,19 @@
framesetOK: true,
im: initialIM,
}
- err := p.parse()
- if err != nil {
+
+ for _, f := range opts {
+ f(p)
+ }
+
+ if err := p.parse(); err != nil {
return nil, err
}
return p.doc, nil
}
-// ParseFragment parses a fragment of HTML and returns the nodes that were
-// found. If the fragment is the InnerHTML for an existing element, pass that
-// element in context.
-//
-// It has the same intricacies as Parse.
-func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+// ParseFragmentWithOptions is like ParseFragment, with options.
+func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
contextTag := ""
if context != nil {
if context.Type != ElementNode {
@@ -2268,7 +2369,6 @@
contextTag = context.DataAtom.String()
}
p := &parser{
- tokenizer: NewTokenizerFragment(r, contextTag),
doc: &Node{
Type: DocumentNode,
},
@@ -2276,6 +2376,15 @@
fragment: true,
context: context,
}
+ if context != nil && context.Namespace != "" {
+ p.tokenizer = NewTokenizer(r)
+ } else {
+ p.tokenizer = NewTokenizerFragment(r, contextTag)
+ }
+
+ for _, f := range opts {
+ f(p)
+ }
root := &Node{
Type: ElementNode,
@@ -2296,8 +2405,7 @@
}
}
- err := p.parse()
- if err != nil {
+ if err := p.parse(); err != nil {
return nil, err
}