|
1 // Copyright 2011 The Go Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style |
|
3 // license that can be found in the LICENSE file. |
|
4 |
|
5 package html |
|
6 |
|
7 import ( |
|
8 "bufio" |
|
9 "errors" |
|
10 "fmt" |
|
11 "io" |
|
12 "strings" |
|
13 ) |
|
14 |
|
15 type writer interface { |
|
16 io.Writer |
|
17 io.ByteWriter |
|
18 WriteString(string) (int, error) |
|
19 } |
|
20 |
|
21 // Render renders the parse tree n to the given writer. |
|
22 // |
|
23 // Rendering is done on a 'best effort' basis: calling Parse on the output of |
|
24 // Render will always result in something similar to the original tree, but it |
|
25 // is not necessarily an exact clone unless the original tree was 'well-formed'. |
|
26 // 'Well-formed' is not easily specified; the HTML5 specification is |
|
27 // complicated. |
|
28 // |
|
29 // Calling Parse on arbitrary input typically results in a 'well-formed' parse |
|
30 // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. |
|
31 // For example, in a 'well-formed' parse tree, no <a> element is a child of |
|
32 // another <a> element: parsing "<a><a>" results in two sibling elements. |
|
33 // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a |
|
34 // <table> element: parsing "<p><table><a>" results in a <p> with two sibling |
|
35 // children; the <a> is reparented to the <table>'s parent. However, calling |
|
36 // Parse on "<a><table><a>" does not return an error, but the result has an <a> |
|
37 // element with an <a> child, and is therefore not 'well-formed'. |
|
38 // |
|
39 // Programmatically constructed trees are typically also 'well-formed', but it |
|
40 // is possible to construct a tree that looks innocuous but, when rendered and |
|
41 // re-parsed, results in a different tree. A simple example is that a solitary |
|
42 // text node would become a tree containing <html>, <head> and <body> elements. |
|
43 // Another example is that the programmatic equivalent of "a<head>b</head>c" |
|
44 // becomes "<html><head><head/><body>abc</body></html>". |
|
45 func Render(w io.Writer, n *Node) error { |
|
46 if x, ok := w.(writer); ok { |
|
47 return render(x, n) |
|
48 } |
|
49 buf := bufio.NewWriter(w) |
|
50 if err := render(buf, n); err != nil { |
|
51 return err |
|
52 } |
|
53 return buf.Flush() |
|
54 } |
|
55 |
|
56 // plaintextAbort is returned from render1 when a <plaintext> element |
|
57 // has been rendered. No more end tags should be rendered after that. |
|
58 var plaintextAbort = errors.New("html: internal error (plaintext abort)") |
|
59 |
|
60 func render(w writer, n *Node) error { |
|
61 err := render1(w, n) |
|
62 if err == plaintextAbort { |
|
63 err = nil |
|
64 } |
|
65 return err |
|
66 } |
|
67 |
|
68 func render1(w writer, n *Node) error { |
|
69 // Render non-element nodes; these are the easy cases. |
|
70 switch n.Type { |
|
71 case ErrorNode: |
|
72 return errors.New("html: cannot render an ErrorNode node") |
|
73 case TextNode: |
|
74 return escape(w, n.Data) |
|
75 case DocumentNode: |
|
76 for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
77 if err := render1(w, c); err != nil { |
|
78 return err |
|
79 } |
|
80 } |
|
81 return nil |
|
82 case ElementNode: |
|
83 // No-op. |
|
84 case CommentNode: |
|
85 if _, err := w.WriteString("<!--"); err != nil { |
|
86 return err |
|
87 } |
|
88 if _, err := w.WriteString(n.Data); err != nil { |
|
89 return err |
|
90 } |
|
91 if _, err := w.WriteString("-->"); err != nil { |
|
92 return err |
|
93 } |
|
94 return nil |
|
95 case DoctypeNode: |
|
96 if _, err := w.WriteString("<!DOCTYPE "); err != nil { |
|
97 return err |
|
98 } |
|
99 if _, err := w.WriteString(n.Data); err != nil { |
|
100 return err |
|
101 } |
|
102 if n.Attr != nil { |
|
103 var p, s string |
|
104 for _, a := range n.Attr { |
|
105 switch a.Key { |
|
106 case "public": |
|
107 p = a.Val |
|
108 case "system": |
|
109 s = a.Val |
|
110 } |
|
111 } |
|
112 if p != "" { |
|
113 if _, err := w.WriteString(" PUBLIC "); err != nil { |
|
114 return err |
|
115 } |
|
116 if err := writeQuoted(w, p); err != nil { |
|
117 return err |
|
118 } |
|
119 if s != "" { |
|
120 if err := w.WriteByte(' '); err != nil { |
|
121 return err |
|
122 } |
|
123 if err := writeQuoted(w, s); err != nil { |
|
124 return err |
|
125 } |
|
126 } |
|
127 } else if s != "" { |
|
128 if _, err := w.WriteString(" SYSTEM "); err != nil { |
|
129 return err |
|
130 } |
|
131 if err := writeQuoted(w, s); err != nil { |
|
132 return err |
|
133 } |
|
134 } |
|
135 } |
|
136 return w.WriteByte('>') |
|
137 default: |
|
138 return errors.New("html: unknown node type") |
|
139 } |
|
140 |
|
141 // Render the <xxx> opening tag. |
|
142 if err := w.WriteByte('<'); err != nil { |
|
143 return err |
|
144 } |
|
145 if _, err := w.WriteString(n.Data); err != nil { |
|
146 return err |
|
147 } |
|
148 for _, a := range n.Attr { |
|
149 if err := w.WriteByte(' '); err != nil { |
|
150 return err |
|
151 } |
|
152 if a.Namespace != "" { |
|
153 if _, err := w.WriteString(a.Namespace); err != nil { |
|
154 return err |
|
155 } |
|
156 if err := w.WriteByte(':'); err != nil { |
|
157 return err |
|
158 } |
|
159 } |
|
160 if _, err := w.WriteString(a.Key); err != nil { |
|
161 return err |
|
162 } |
|
163 if _, err := w.WriteString(`="`); err != nil { |
|
164 return err |
|
165 } |
|
166 if err := escape(w, a.Val); err != nil { |
|
167 return err |
|
168 } |
|
169 if err := w.WriteByte('"'); err != nil { |
|
170 return err |
|
171 } |
|
172 } |
|
173 if voidElements[n.Data] { |
|
174 if n.FirstChild != nil { |
|
175 return fmt.Errorf("html: void element <%s> has child nodes", n.Data) |
|
176 } |
|
177 _, err := w.WriteString("/>") |
|
178 return err |
|
179 } |
|
180 if err := w.WriteByte('>'); err != nil { |
|
181 return err |
|
182 } |
|
183 |
|
184 // Add initial newline where there is danger of a newline beging ignored. |
|
185 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { |
|
186 switch n.Data { |
|
187 case "pre", "listing", "textarea": |
|
188 if err := w.WriteByte('\n'); err != nil { |
|
189 return err |
|
190 } |
|
191 } |
|
192 } |
|
193 |
|
194 // Render any child nodes. |
|
195 switch n.Data { |
|
196 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": |
|
197 for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
198 if c.Type == TextNode { |
|
199 if _, err := w.WriteString(c.Data); err != nil { |
|
200 return err |
|
201 } |
|
202 } else { |
|
203 if err := render1(w, c); err != nil { |
|
204 return err |
|
205 } |
|
206 } |
|
207 } |
|
208 if n.Data == "plaintext" { |
|
209 // Don't render anything else. <plaintext> must be the |
|
210 // last element in the file, with no closing tag. |
|
211 return plaintextAbort |
|
212 } |
|
213 default: |
|
214 for c := n.FirstChild; c != nil; c = c.NextSibling { |
|
215 if err := render1(w, c); err != nil { |
|
216 return err |
|
217 } |
|
218 } |
|
219 } |
|
220 |
|
221 // Render the </xxx> closing tag. |
|
222 if _, err := w.WriteString("</"); err != nil { |
|
223 return err |
|
224 } |
|
225 if _, err := w.WriteString(n.Data); err != nil { |
|
226 return err |
|
227 } |
|
228 return w.WriteByte('>') |
|
229 } |
|
230 |
|
231 // writeQuoted writes s to w surrounded by quotes. Normally it will use double |
|
232 // quotes, but if s contains a double quote, it will use single quotes. |
|
233 // It is used for writing the identifiers in a doctype declaration. |
|
234 // In valid HTML, they can't contain both types of quotes. |
|
235 func writeQuoted(w writer, s string) error { |
|
236 var q byte = '"' |
|
237 if strings.Contains(s, `"`) { |
|
238 q = '\'' |
|
239 } |
|
240 if err := w.WriteByte(q); err != nil { |
|
241 return err |
|
242 } |
|
243 if _, err := w.WriteString(s); err != nil { |
|
244 return err |
|
245 } |
|
246 if err := w.WriteByte(q); err != nil { |
|
247 return err |
|
248 } |
|
249 return nil |
|
250 } |
|
251 |
|
252 // Section 12.1.2, "Elements", gives this list of void elements. Void elements |
|
253 // are those that can't have any contents. |
|
254 var voidElements = map[string]bool{ |
|
255 "area": true, |
|
256 "base": true, |
|
257 "br": true, |
|
258 "col": true, |
|
259 "command": true, |
|
260 "embed": true, |
|
261 "hr": true, |
|
262 "img": true, |
|
263 "input": true, |
|
264 "keygen": true, |
|
265 "link": true, |
|
266 "meta": true, |
|
267 "param": true, |
|
268 "source": true, |
|
269 "track": true, |
|
270 "wbr": true, |
|
271 } |