1 // Copyright 2012 The Go Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style |
|
3 // license that can be found in the LICENSE file. |
|
4 |
|
5 // +build ignore |
|
6 |
|
7 //go:generate go run gen.go |
|
8 //go:generate go run gen.go -test |
|
9 |
|
10 package main |
|
11 |
|
12 import ( |
|
13 "bytes" |
|
14 "flag" |
|
15 "fmt" |
|
16 "go/format" |
|
17 "io/ioutil" |
|
18 "math/rand" |
|
19 "os" |
|
20 "sort" |
|
21 "strings" |
|
22 ) |
|
23 |
|
24 // identifier converts s to a Go exported identifier. |
|
25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset". |
|
26 func identifier(s string) string { |
|
27 b := make([]byte, 0, len(s)) |
|
28 cap := true |
|
29 for _, c := range s { |
|
30 if c == '-' { |
|
31 cap = true |
|
32 continue |
|
33 } |
|
34 if cap && 'a' <= c && c <= 'z' { |
|
35 c -= 'a' - 'A' |
|
36 } |
|
37 cap = false |
|
38 b = append(b, byte(c)) |
|
39 } |
|
40 return string(b) |
|
41 } |
|
42 |
|
43 var test = flag.Bool("test", false, "generate table_test.go") |
|
44 |
|
45 func genFile(name string, buf *bytes.Buffer) { |
|
46 b, err := format.Source(buf.Bytes()) |
|
47 if err != nil { |
|
48 fmt.Fprintln(os.Stderr, err) |
|
49 os.Exit(1) |
|
50 } |
|
51 if err := ioutil.WriteFile(name, b, 0644); err != nil { |
|
52 fmt.Fprintln(os.Stderr, err) |
|
53 os.Exit(1) |
|
54 } |
|
55 } |
|
56 |
|
57 func main() { |
|
58 flag.Parse() |
|
59 |
|
60 var all []string |
|
61 all = append(all, elements...) |
|
62 all = append(all, attributes...) |
|
63 all = append(all, eventHandlers...) |
|
64 all = append(all, extra...) |
|
65 sort.Strings(all) |
|
66 |
|
67 // uniq - lists have dups |
|
68 w := 0 |
|
69 for _, s := range all { |
|
70 if w == 0 || all[w-1] != s { |
|
71 all[w] = s |
|
72 w++ |
|
73 } |
|
74 } |
|
75 all = all[:w] |
|
76 |
|
77 if *test { |
|
78 var buf bytes.Buffer |
|
79 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") |
|
80 fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") |
|
81 fmt.Fprintln(&buf, "package atom\n") |
|
82 fmt.Fprintln(&buf, "var testAtomList = []string{") |
|
83 for _, s := range all { |
|
84 fmt.Fprintf(&buf, "\t%q,\n", s) |
|
85 } |
|
86 fmt.Fprintln(&buf, "}") |
|
87 |
|
88 genFile("table_test.go", &buf) |
|
89 return |
|
90 } |
|
91 |
|
92 // Find hash that minimizes table size. |
|
93 var best *table |
|
94 for i := 0; i < 1000000; i++ { |
|
95 if best != nil && 1<<(best.k-1) < len(all) { |
|
96 break |
|
97 } |
|
98 h := rand.Uint32() |
|
99 for k := uint(0); k <= 16; k++ { |
|
100 if best != nil && k >= best.k { |
|
101 break |
|
102 } |
|
103 var t table |
|
104 if t.init(h, k, all) { |
|
105 best = &t |
|
106 break |
|
107 } |
|
108 } |
|
109 } |
|
110 if best == nil { |
|
111 fmt.Fprintf(os.Stderr, "failed to construct string table\n") |
|
112 os.Exit(1) |
|
113 } |
|
114 |
|
115 // Lay out strings, using overlaps when possible. |
|
116 layout := append([]string{}, all...) |
|
117 |
|
118 // Remove strings that are substrings of other strings |
|
119 for changed := true; changed; { |
|
120 changed = false |
|
121 for i, s := range layout { |
|
122 if s == "" { |
|
123 continue |
|
124 } |
|
125 for j, t := range layout { |
|
126 if i != j && t != "" && strings.Contains(s, t) { |
|
127 changed = true |
|
128 layout[j] = "" |
|
129 } |
|
130 } |
|
131 } |
|
132 } |
|
133 |
|
134 // Join strings where one suffix matches another prefix. |
|
135 for { |
|
136 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], |
|
137 // maximizing overlap length k. |
|
138 besti := -1 |
|
139 bestj := -1 |
|
140 bestk := 0 |
|
141 for i, s := range layout { |
|
142 if s == "" { |
|
143 continue |
|
144 } |
|
145 for j, t := range layout { |
|
146 if i == j { |
|
147 continue |
|
148 } |
|
149 for k := bestk + 1; k <= len(s) && k <= len(t); k++ { |
|
150 if s[len(s)-k:] == t[:k] { |
|
151 besti = i |
|
152 bestj = j |
|
153 bestk = k |
|
154 } |
|
155 } |
|
156 } |
|
157 } |
|
158 if bestk > 0 { |
|
159 layout[besti] += layout[bestj][bestk:] |
|
160 layout[bestj] = "" |
|
161 continue |
|
162 } |
|
163 break |
|
164 } |
|
165 |
|
166 text := strings.Join(layout, "") |
|
167 |
|
168 atom := map[string]uint32{} |
|
169 for _, s := range all { |
|
170 off := strings.Index(text, s) |
|
171 if off < 0 { |
|
172 panic("lost string " + s) |
|
173 } |
|
174 atom[s] = uint32(off<<8 | len(s)) |
|
175 } |
|
176 |
|
177 var buf bytes.Buffer |
|
178 // Generate the Go code. |
|
179 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") |
|
180 fmt.Fprintln(&buf, "//go:generate go run gen.go\n") |
|
181 fmt.Fprintln(&buf, "package atom\n\nconst (") |
|
182 |
|
183 // compute max len |
|
184 maxLen := 0 |
|
185 for _, s := range all { |
|
186 if maxLen < len(s) { |
|
187 maxLen = len(s) |
|
188 } |
|
189 fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) |
|
190 } |
|
191 fmt.Fprintln(&buf, ")\n") |
|
192 |
|
193 fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) |
|
194 fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) |
|
195 |
|
196 fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) |
|
197 for i, s := range best.tab { |
|
198 if s == "" { |
|
199 continue |
|
200 } |
|
201 fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) |
|
202 } |
|
203 fmt.Fprintf(&buf, "}\n") |
|
204 datasize := (1 << best.k) * 4 |
|
205 |
|
206 fmt.Fprintln(&buf, "const atomText =") |
|
207 textsize := len(text) |
|
208 for len(text) > 60 { |
|
209 fmt.Fprintf(&buf, "\t%q +\n", text[:60]) |
|
210 text = text[60:] |
|
211 } |
|
212 fmt.Fprintf(&buf, "\t%q\n\n", text) |
|
213 |
|
214 genFile("table.go", &buf) |
|
215 |
|
216 fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) |
|
217 } |
|
218 |
|
219 type byLen []string |
|
220 |
|
221 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } |
|
222 func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } |
|
223 func (x byLen) Len() int { return len(x) } |
|
224 |
|
225 // fnv computes the FNV hash with an arbitrary starting value h. |
|
226 func fnv(h uint32, s string) uint32 { |
|
227 for i := 0; i < len(s); i++ { |
|
228 h ^= uint32(s[i]) |
|
229 h *= 16777619 |
|
230 } |
|
231 return h |
|
232 } |
|
233 |
|
234 // A table represents an attempt at constructing the lookup table. |
|
235 // The lookup table uses cuckoo hashing, meaning that each string |
|
236 // can be found in one of two positions. |
|
237 type table struct { |
|
238 h0 uint32 |
|
239 k uint |
|
240 mask uint32 |
|
241 tab []string |
|
242 } |
|
243 |
|
244 // hash returns the two hashes for s. |
|
245 func (t *table) hash(s string) (h1, h2 uint32) { |
|
246 h := fnv(t.h0, s) |
|
247 h1 = h & t.mask |
|
248 h2 = (h >> 16) & t.mask |
|
249 return |
|
250 } |
|
251 |
|
252 // init initializes the table with the given parameters. |
|
253 // h0 is the initial hash value, |
|
254 // k is the number of bits of hash value to use, and |
|
255 // x is the list of strings to store in the table. |
|
256 // init returns false if the table cannot be constructed. |
|
257 func (t *table) init(h0 uint32, k uint, x []string) bool { |
|
258 t.h0 = h0 |
|
259 t.k = k |
|
260 t.tab = make([]string, 1<<k) |
|
261 t.mask = 1<<k - 1 |
|
262 for _, s := range x { |
|
263 if !t.insert(s) { |
|
264 return false |
|
265 } |
|
266 } |
|
267 return true |
|
268 } |
|
269 |
|
270 // insert inserts s in the table. |
|
271 func (t *table) insert(s string) bool { |
|
272 h1, h2 := t.hash(s) |
|
273 if t.tab[h1] == "" { |
|
274 t.tab[h1] = s |
|
275 return true |
|
276 } |
|
277 if t.tab[h2] == "" { |
|
278 t.tab[h2] = s |
|
279 return true |
|
280 } |
|
281 if t.push(h1, 0) { |
|
282 t.tab[h1] = s |
|
283 return true |
|
284 } |
|
285 if t.push(h2, 0) { |
|
286 t.tab[h2] = s |
|
287 return true |
|
288 } |
|
289 return false |
|
290 } |
|
291 |
|
292 // push attempts to push aside the entry in slot i. |
|
293 func (t *table) push(i uint32, depth int) bool { |
|
294 if depth > len(t.tab) { |
|
295 return false |
|
296 } |
|
297 s := t.tab[i] |
|
298 h1, h2 := t.hash(s) |
|
299 j := h1 + h2 - i |
|
300 if t.tab[j] != "" && !t.push(j, depth+1) { |
|
301 return false |
|
302 } |
|
303 t.tab[j] = s |
|
304 return true |
|
305 } |
|
306 |
|
307 // The lists of element names and attribute keys were taken from |
|
308 // https://html.spec.whatwg.org/multipage/indices.html#index |
|
309 // as of the "HTML Living Standard - Last Updated 16 April 2018" version. |
|
310 |
|
311 // "command", "keygen" and "menuitem" have been removed from the spec, |
|
312 // but are kept here for backwards compatibility. |
|
313 var elements = []string{ |
|
314 "a", |
|
315 "abbr", |
|
316 "address", |
|
317 "area", |
|
318 "article", |
|
319 "aside", |
|
320 "audio", |
|
321 "b", |
|
322 "base", |
|
323 "bdi", |
|
324 "bdo", |
|
325 "blockquote", |
|
326 "body", |
|
327 "br", |
|
328 "button", |
|
329 "canvas", |
|
330 "caption", |
|
331 "cite", |
|
332 "code", |
|
333 "col", |
|
334 "colgroup", |
|
335 "command", |
|
336 "data", |
|
337 "datalist", |
|
338 "dd", |
|
339 "del", |
|
340 "details", |
|
341 "dfn", |
|
342 "dialog", |
|
343 "div", |
|
344 "dl", |
|
345 "dt", |
|
346 "em", |
|
347 "embed", |
|
348 "fieldset", |
|
349 "figcaption", |
|
350 "figure", |
|
351 "footer", |
|
352 "form", |
|
353 "h1", |
|
354 "h2", |
|
355 "h3", |
|
356 "h4", |
|
357 "h5", |
|
358 "h6", |
|
359 "head", |
|
360 "header", |
|
361 "hgroup", |
|
362 "hr", |
|
363 "html", |
|
364 "i", |
|
365 "iframe", |
|
366 "img", |
|
367 "input", |
|
368 "ins", |
|
369 "kbd", |
|
370 "keygen", |
|
371 "label", |
|
372 "legend", |
|
373 "li", |
|
374 "link", |
|
375 "main", |
|
376 "map", |
|
377 "mark", |
|
378 "menu", |
|
379 "menuitem", |
|
380 "meta", |
|
381 "meter", |
|
382 "nav", |
|
383 "noscript", |
|
384 "object", |
|
385 "ol", |
|
386 "optgroup", |
|
387 "option", |
|
388 "output", |
|
389 "p", |
|
390 "param", |
|
391 "picture", |
|
392 "pre", |
|
393 "progress", |
|
394 "q", |
|
395 "rp", |
|
396 "rt", |
|
397 "ruby", |
|
398 "s", |
|
399 "samp", |
|
400 "script", |
|
401 "section", |
|
402 "select", |
|
403 "slot", |
|
404 "small", |
|
405 "source", |
|
406 "span", |
|
407 "strong", |
|
408 "style", |
|
409 "sub", |
|
410 "summary", |
|
411 "sup", |
|
412 "table", |
|
413 "tbody", |
|
414 "td", |
|
415 "template", |
|
416 "textarea", |
|
417 "tfoot", |
|
418 "th", |
|
419 "thead", |
|
420 "time", |
|
421 "title", |
|
422 "tr", |
|
423 "track", |
|
424 "u", |
|
425 "ul", |
|
426 "var", |
|
427 "video", |
|
428 "wbr", |
|
429 } |
|
430 |
|
431 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3 |
|
432 // |
|
433 // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", |
|
434 // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, |
|
435 // but are kept here for backwards compatibility. |
|
436 var attributes = []string{ |
|
437 "abbr", |
|
438 "accept", |
|
439 "accept-charset", |
|
440 "accesskey", |
|
441 "action", |
|
442 "allowfullscreen", |
|
443 "allowpaymentrequest", |
|
444 "allowusermedia", |
|
445 "alt", |
|
446 "as", |
|
447 "async", |
|
448 "autocomplete", |
|
449 "autofocus", |
|
450 "autoplay", |
|
451 "challenge", |
|
452 "charset", |
|
453 "checked", |
|
454 "cite", |
|
455 "class", |
|
456 "color", |
|
457 "cols", |
|
458 "colspan", |
|
459 "command", |
|
460 "content", |
|
461 "contenteditable", |
|
462 "contextmenu", |
|
463 "controls", |
|
464 "coords", |
|
465 "crossorigin", |
|
466 "data", |
|
467 "datetime", |
|
468 "default", |
|
469 "defer", |
|
470 "dir", |
|
471 "dirname", |
|
472 "disabled", |
|
473 "download", |
|
474 "draggable", |
|
475 "dropzone", |
|
476 "enctype", |
|
477 "for", |
|
478 "form", |
|
479 "formaction", |
|
480 "formenctype", |
|
481 "formmethod", |
|
482 "formnovalidate", |
|
483 "formtarget", |
|
484 "headers", |
|
485 "height", |
|
486 "hidden", |
|
487 "high", |
|
488 "href", |
|
489 "hreflang", |
|
490 "http-equiv", |
|
491 "icon", |
|
492 "id", |
|
493 "inputmode", |
|
494 "integrity", |
|
495 "is", |
|
496 "ismap", |
|
497 "itemid", |
|
498 "itemprop", |
|
499 "itemref", |
|
500 "itemscope", |
|
501 "itemtype", |
|
502 "keytype", |
|
503 "kind", |
|
504 "label", |
|
505 "lang", |
|
506 "list", |
|
507 "loop", |
|
508 "low", |
|
509 "manifest", |
|
510 "max", |
|
511 "maxlength", |
|
512 "media", |
|
513 "mediagroup", |
|
514 "method", |
|
515 "min", |
|
516 "minlength", |
|
517 "multiple", |
|
518 "muted", |
|
519 "name", |
|
520 "nomodule", |
|
521 "nonce", |
|
522 "novalidate", |
|
523 "open", |
|
524 "optimum", |
|
525 "pattern", |
|
526 "ping", |
|
527 "placeholder", |
|
528 "playsinline", |
|
529 "poster", |
|
530 "preload", |
|
531 "radiogroup", |
|
532 "readonly", |
|
533 "referrerpolicy", |
|
534 "rel", |
|
535 "required", |
|
536 "reversed", |
|
537 "rows", |
|
538 "rowspan", |
|
539 "sandbox", |
|
540 "spellcheck", |
|
541 "scope", |
|
542 "scoped", |
|
543 "seamless", |
|
544 "selected", |
|
545 "shape", |
|
546 "size", |
|
547 "sizes", |
|
548 "sortable", |
|
549 "sorted", |
|
550 "slot", |
|
551 "span", |
|
552 "spellcheck", |
|
553 "src", |
|
554 "srcdoc", |
|
555 "srclang", |
|
556 "srcset", |
|
557 "start", |
|
558 "step", |
|
559 "style", |
|
560 "tabindex", |
|
561 "target", |
|
562 "title", |
|
563 "translate", |
|
564 "type", |
|
565 "typemustmatch", |
|
566 "updateviacache", |
|
567 "usemap", |
|
568 "value", |
|
569 "width", |
|
570 "workertype", |
|
571 "wrap", |
|
572 } |
|
573 |
|
574 // "onautocomplete", "onautocompleteerror", "onmousewheel", |
|
575 // "onshow" and "onsort" have been removed from the spec, |
|
576 // but are kept here for backwards compatibility. |
|
577 var eventHandlers = []string{ |
|
578 "onabort", |
|
579 "onautocomplete", |
|
580 "onautocompleteerror", |
|
581 "onauxclick", |
|
582 "onafterprint", |
|
583 "onbeforeprint", |
|
584 "onbeforeunload", |
|
585 "onblur", |
|
586 "oncancel", |
|
587 "oncanplay", |
|
588 "oncanplaythrough", |
|
589 "onchange", |
|
590 "onclick", |
|
591 "onclose", |
|
592 "oncontextmenu", |
|
593 "oncopy", |
|
594 "oncuechange", |
|
595 "oncut", |
|
596 "ondblclick", |
|
597 "ondrag", |
|
598 "ondragend", |
|
599 "ondragenter", |
|
600 "ondragexit", |
|
601 "ondragleave", |
|
602 "ondragover", |
|
603 "ondragstart", |
|
604 "ondrop", |
|
605 "ondurationchange", |
|
606 "onemptied", |
|
607 "onended", |
|
608 "onerror", |
|
609 "onfocus", |
|
610 "onhashchange", |
|
611 "oninput", |
|
612 "oninvalid", |
|
613 "onkeydown", |
|
614 "onkeypress", |
|
615 "onkeyup", |
|
616 "onlanguagechange", |
|
617 "onload", |
|
618 "onloadeddata", |
|
619 "onloadedmetadata", |
|
620 "onloadend", |
|
621 "onloadstart", |
|
622 "onmessage", |
|
623 "onmessageerror", |
|
624 "onmousedown", |
|
625 "onmouseenter", |
|
626 "onmouseleave", |
|
627 "onmousemove", |
|
628 "onmouseout", |
|
629 "onmouseover", |
|
630 "onmouseup", |
|
631 "onmousewheel", |
|
632 "onwheel", |
|
633 "onoffline", |
|
634 "ononline", |
|
635 "onpagehide", |
|
636 "onpageshow", |
|
637 "onpaste", |
|
638 "onpause", |
|
639 "onplay", |
|
640 "onplaying", |
|
641 "onpopstate", |
|
642 "onprogress", |
|
643 "onratechange", |
|
644 "onreset", |
|
645 "onresize", |
|
646 "onrejectionhandled", |
|
647 "onscroll", |
|
648 "onsecuritypolicyviolation", |
|
649 "onseeked", |
|
650 "onseeking", |
|
651 "onselect", |
|
652 "onshow", |
|
653 "onsort", |
|
654 "onstalled", |
|
655 "onstorage", |
|
656 "onsubmit", |
|
657 "onsuspend", |
|
658 "ontimeupdate", |
|
659 "ontoggle", |
|
660 "onunhandledrejection", |
|
661 "onunload", |
|
662 "onvolumechange", |
|
663 "onwaiting", |
|
664 } |
|
665 |
|
666 // extra are ad-hoc values not covered by any of the lists above. |
|
667 var extra = []string{ |
|
668 "acronym", |
|
669 "align", |
|
670 "annotation", |
|
671 "annotation-xml", |
|
672 "applet", |
|
673 "basefont", |
|
674 "bgsound", |
|
675 "big", |
|
676 "blink", |
|
677 "center", |
|
678 "color", |
|
679 "desc", |
|
680 "face", |
|
681 "font", |
|
682 "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. |
|
683 "foreignobject", |
|
684 "frame", |
|
685 "frameset", |
|
686 "image", |
|
687 "isindex", |
|
688 "listing", |
|
689 "malignmark", |
|
690 "marquee", |
|
691 "math", |
|
692 "mglyph", |
|
693 "mi", |
|
694 "mn", |
|
695 "mo", |
|
696 "ms", |
|
697 "mtext", |
|
698 "nobr", |
|
699 "noembed", |
|
700 "noframes", |
|
701 "plaintext", |
|
702 "prompt", |
|
703 "public", |
|
704 "rb", |
|
705 "rtc", |
|
706 "spacer", |
|
707 "strike", |
|
708 "svg", |
|
709 "system", |
|
710 "tt", |
|
711 "xmp", |
|
712 } |
|