vendor/golang.org/x/net/html/atom/gen.go
changeset 251 1c52a0eeb952
parent 250 c040f992052f
child 252 8399cd48111b
equal deleted inserted replaced
250:c040f992052f 251:1c52a0eeb952
     1 // Copyright 2012 The Go Authors. All rights reserved.
       
     2 // Use of this source code is governed by a BSD-style
       
     3 // license that can be found in the LICENSE file.
       
     4 
       
     5 // +build ignore
       
     6 
       
     7 //go:generate go run gen.go
       
     8 //go:generate go run gen.go -test
       
     9 
       
    10 package main
       
    11 
       
    12 import (
       
    13 	"bytes"
       
    14 	"flag"
       
    15 	"fmt"
       
    16 	"go/format"
       
    17 	"io/ioutil"
       
    18 	"math/rand"
       
    19 	"os"
       
    20 	"sort"
       
    21 	"strings"
       
    22 )
       
    23 
       
    24 // identifier converts s to a Go exported identifier.
       
    25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
       
    26 func identifier(s string) string {
       
    27 	b := make([]byte, 0, len(s))
       
    28 	cap := true
       
    29 	for _, c := range s {
       
    30 		if c == '-' {
       
    31 			cap = true
       
    32 			continue
       
    33 		}
       
    34 		if cap && 'a' <= c && c <= 'z' {
       
    35 			c -= 'a' - 'A'
       
    36 		}
       
    37 		cap = false
       
    38 		b = append(b, byte(c))
       
    39 	}
       
    40 	return string(b)
       
    41 }
       
    42 
       
    43 var test = flag.Bool("test", false, "generate table_test.go")
       
    44 
       
    45 func genFile(name string, buf *bytes.Buffer) {
       
    46 	b, err := format.Source(buf.Bytes())
       
    47 	if err != nil {
       
    48 		fmt.Fprintln(os.Stderr, err)
       
    49 		os.Exit(1)
       
    50 	}
       
    51 	if err := ioutil.WriteFile(name, b, 0644); err != nil {
       
    52 		fmt.Fprintln(os.Stderr, err)
       
    53 		os.Exit(1)
       
    54 	}
       
    55 }
       
    56 
       
    57 func main() {
       
    58 	flag.Parse()
       
    59 
       
    60 	var all []string
       
    61 	all = append(all, elements...)
       
    62 	all = append(all, attributes...)
       
    63 	all = append(all, eventHandlers...)
       
    64 	all = append(all, extra...)
       
    65 	sort.Strings(all)
       
    66 
       
    67 	// uniq - lists have dups
       
    68 	w := 0
       
    69 	for _, s := range all {
       
    70 		if w == 0 || all[w-1] != s {
       
    71 			all[w] = s
       
    72 			w++
       
    73 		}
       
    74 	}
       
    75 	all = all[:w]
       
    76 
       
    77 	if *test {
       
    78 		var buf bytes.Buffer
       
    79 		fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
       
    80 		fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
       
    81 		fmt.Fprintln(&buf, "package atom\n")
       
    82 		fmt.Fprintln(&buf, "var testAtomList = []string{")
       
    83 		for _, s := range all {
       
    84 			fmt.Fprintf(&buf, "\t%q,\n", s)
       
    85 		}
       
    86 		fmt.Fprintln(&buf, "}")
       
    87 
       
    88 		genFile("table_test.go", &buf)
       
    89 		return
       
    90 	}
       
    91 
       
    92 	// Find hash that minimizes table size.
       
    93 	var best *table
       
    94 	for i := 0; i < 1000000; i++ {
       
    95 		if best != nil && 1<<(best.k-1) < len(all) {
       
    96 			break
       
    97 		}
       
    98 		h := rand.Uint32()
       
    99 		for k := uint(0); k <= 16; k++ {
       
   100 			if best != nil && k >= best.k {
       
   101 				break
       
   102 			}
       
   103 			var t table
       
   104 			if t.init(h, k, all) {
       
   105 				best = &t
       
   106 				break
       
   107 			}
       
   108 		}
       
   109 	}
       
   110 	if best == nil {
       
   111 		fmt.Fprintf(os.Stderr, "failed to construct string table\n")
       
   112 		os.Exit(1)
       
   113 	}
       
   114 
       
   115 	// Lay out strings, using overlaps when possible.
       
   116 	layout := append([]string{}, all...)
       
   117 
       
   118 	// Remove strings that are substrings of other strings
       
   119 	for changed := true; changed; {
       
   120 		changed = false
       
   121 		for i, s := range layout {
       
   122 			if s == "" {
       
   123 				continue
       
   124 			}
       
   125 			for j, t := range layout {
       
   126 				if i != j && t != "" && strings.Contains(s, t) {
       
   127 					changed = true
       
   128 					layout[j] = ""
       
   129 				}
       
   130 			}
       
   131 		}
       
   132 	}
       
   133 
       
   134 	// Join strings where one suffix matches another prefix.
       
   135 	for {
       
   136 		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
       
   137 		// maximizing overlap length k.
       
   138 		besti := -1
       
   139 		bestj := -1
       
   140 		bestk := 0
       
   141 		for i, s := range layout {
       
   142 			if s == "" {
       
   143 				continue
       
   144 			}
       
   145 			for j, t := range layout {
       
   146 				if i == j {
       
   147 					continue
       
   148 				}
       
   149 				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
       
   150 					if s[len(s)-k:] == t[:k] {
       
   151 						besti = i
       
   152 						bestj = j
       
   153 						bestk = k
       
   154 					}
       
   155 				}
       
   156 			}
       
   157 		}
       
   158 		if bestk > 0 {
       
   159 			layout[besti] += layout[bestj][bestk:]
       
   160 			layout[bestj] = ""
       
   161 			continue
       
   162 		}
       
   163 		break
       
   164 	}
       
   165 
       
   166 	text := strings.Join(layout, "")
       
   167 
       
   168 	atom := map[string]uint32{}
       
   169 	for _, s := range all {
       
   170 		off := strings.Index(text, s)
       
   171 		if off < 0 {
       
   172 			panic("lost string " + s)
       
   173 		}
       
   174 		atom[s] = uint32(off<<8 | len(s))
       
   175 	}
       
   176 
       
   177 	var buf bytes.Buffer
       
   178 	// Generate the Go code.
       
   179 	fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
       
   180 	fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
       
   181 	fmt.Fprintln(&buf, "package atom\n\nconst (")
       
   182 
       
   183 	// compute max len
       
   184 	maxLen := 0
       
   185 	for _, s := range all {
       
   186 		if maxLen < len(s) {
       
   187 			maxLen = len(s)
       
   188 		}
       
   189 		fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
       
   190 	}
       
   191 	fmt.Fprintln(&buf, ")\n")
       
   192 
       
   193 	fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
       
   194 	fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
       
   195 
       
   196 	fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
       
   197 	for i, s := range best.tab {
       
   198 		if s == "" {
       
   199 			continue
       
   200 		}
       
   201 		fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
       
   202 	}
       
   203 	fmt.Fprintf(&buf, "}\n")
       
   204 	datasize := (1 << best.k) * 4
       
   205 
       
   206 	fmt.Fprintln(&buf, "const atomText =")
       
   207 	textsize := len(text)
       
   208 	for len(text) > 60 {
       
   209 		fmt.Fprintf(&buf, "\t%q +\n", text[:60])
       
   210 		text = text[60:]
       
   211 	}
       
   212 	fmt.Fprintf(&buf, "\t%q\n\n", text)
       
   213 
       
   214 	genFile("table.go", &buf)
       
   215 
       
   216 	fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
       
   217 }
       
   218 
       
   219 type byLen []string
       
   220 
       
   221 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
       
   222 func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
       
   223 func (x byLen) Len() int           { return len(x) }
       
   224 
       
   225 // fnv computes the FNV hash with an arbitrary starting value h.
       
   226 func fnv(h uint32, s string) uint32 {
       
   227 	for i := 0; i < len(s); i++ {
       
   228 		h ^= uint32(s[i])
       
   229 		h *= 16777619
       
   230 	}
       
   231 	return h
       
   232 }
       
   233 
       
   234 // A table represents an attempt at constructing the lookup table.
       
   235 // The lookup table uses cuckoo hashing, meaning that each string
       
   236 // can be found in one of two positions.
       
   237 type table struct {
       
   238 	h0   uint32
       
   239 	k    uint
       
   240 	mask uint32
       
   241 	tab  []string
       
   242 }
       
   243 
       
   244 // hash returns the two hashes for s.
       
   245 func (t *table) hash(s string) (h1, h2 uint32) {
       
   246 	h := fnv(t.h0, s)
       
   247 	h1 = h & t.mask
       
   248 	h2 = (h >> 16) & t.mask
       
   249 	return
       
   250 }
       
   251 
       
   252 // init initializes the table with the given parameters.
       
   253 // h0 is the initial hash value,
       
   254 // k is the number of bits of hash value to use, and
       
   255 // x is the list of strings to store in the table.
       
   256 // init returns false if the table cannot be constructed.
       
   257 func (t *table) init(h0 uint32, k uint, x []string) bool {
       
   258 	t.h0 = h0
       
   259 	t.k = k
       
   260 	t.tab = make([]string, 1<<k)
       
   261 	t.mask = 1<<k - 1
       
   262 	for _, s := range x {
       
   263 		if !t.insert(s) {
       
   264 			return false
       
   265 		}
       
   266 	}
       
   267 	return true
       
   268 }
       
   269 
       
   270 // insert inserts s in the table.
       
   271 func (t *table) insert(s string) bool {
       
   272 	h1, h2 := t.hash(s)
       
   273 	if t.tab[h1] == "" {
       
   274 		t.tab[h1] = s
       
   275 		return true
       
   276 	}
       
   277 	if t.tab[h2] == "" {
       
   278 		t.tab[h2] = s
       
   279 		return true
       
   280 	}
       
   281 	if t.push(h1, 0) {
       
   282 		t.tab[h1] = s
       
   283 		return true
       
   284 	}
       
   285 	if t.push(h2, 0) {
       
   286 		t.tab[h2] = s
       
   287 		return true
       
   288 	}
       
   289 	return false
       
   290 }
       
   291 
       
   292 // push attempts to push aside the entry in slot i.
       
   293 func (t *table) push(i uint32, depth int) bool {
       
   294 	if depth > len(t.tab) {
       
   295 		return false
       
   296 	}
       
   297 	s := t.tab[i]
       
   298 	h1, h2 := t.hash(s)
       
   299 	j := h1 + h2 - i
       
   300 	if t.tab[j] != "" && !t.push(j, depth+1) {
       
   301 		return false
       
   302 	}
       
   303 	t.tab[j] = s
       
   304 	return true
       
   305 }
       
   306 
       
   307 // The lists of element names and attribute keys were taken from
       
   308 // https://html.spec.whatwg.org/multipage/indices.html#index
       
   309 // as of the "HTML Living Standard - Last Updated 16 April 2018" version.
       
   310 
       
   311 // "command", "keygen" and "menuitem" have been removed from the spec,
       
   312 // but are kept here for backwards compatibility.
       
   313 var elements = []string{
       
   314 	"a",
       
   315 	"abbr",
       
   316 	"address",
       
   317 	"area",
       
   318 	"article",
       
   319 	"aside",
       
   320 	"audio",
       
   321 	"b",
       
   322 	"base",
       
   323 	"bdi",
       
   324 	"bdo",
       
   325 	"blockquote",
       
   326 	"body",
       
   327 	"br",
       
   328 	"button",
       
   329 	"canvas",
       
   330 	"caption",
       
   331 	"cite",
       
   332 	"code",
       
   333 	"col",
       
   334 	"colgroup",
       
   335 	"command",
       
   336 	"data",
       
   337 	"datalist",
       
   338 	"dd",
       
   339 	"del",
       
   340 	"details",
       
   341 	"dfn",
       
   342 	"dialog",
       
   343 	"div",
       
   344 	"dl",
       
   345 	"dt",
       
   346 	"em",
       
   347 	"embed",
       
   348 	"fieldset",
       
   349 	"figcaption",
       
   350 	"figure",
       
   351 	"footer",
       
   352 	"form",
       
   353 	"h1",
       
   354 	"h2",
       
   355 	"h3",
       
   356 	"h4",
       
   357 	"h5",
       
   358 	"h6",
       
   359 	"head",
       
   360 	"header",
       
   361 	"hgroup",
       
   362 	"hr",
       
   363 	"html",
       
   364 	"i",
       
   365 	"iframe",
       
   366 	"img",
       
   367 	"input",
       
   368 	"ins",
       
   369 	"kbd",
       
   370 	"keygen",
       
   371 	"label",
       
   372 	"legend",
       
   373 	"li",
       
   374 	"link",
       
   375 	"main",
       
   376 	"map",
       
   377 	"mark",
       
   378 	"menu",
       
   379 	"menuitem",
       
   380 	"meta",
       
   381 	"meter",
       
   382 	"nav",
       
   383 	"noscript",
       
   384 	"object",
       
   385 	"ol",
       
   386 	"optgroup",
       
   387 	"option",
       
   388 	"output",
       
   389 	"p",
       
   390 	"param",
       
   391 	"picture",
       
   392 	"pre",
       
   393 	"progress",
       
   394 	"q",
       
   395 	"rp",
       
   396 	"rt",
       
   397 	"ruby",
       
   398 	"s",
       
   399 	"samp",
       
   400 	"script",
       
   401 	"section",
       
   402 	"select",
       
   403 	"slot",
       
   404 	"small",
       
   405 	"source",
       
   406 	"span",
       
   407 	"strong",
       
   408 	"style",
       
   409 	"sub",
       
   410 	"summary",
       
   411 	"sup",
       
   412 	"table",
       
   413 	"tbody",
       
   414 	"td",
       
   415 	"template",
       
   416 	"textarea",
       
   417 	"tfoot",
       
   418 	"th",
       
   419 	"thead",
       
   420 	"time",
       
   421 	"title",
       
   422 	"tr",
       
   423 	"track",
       
   424 	"u",
       
   425 	"ul",
       
   426 	"var",
       
   427 	"video",
       
   428 	"wbr",
       
   429 }
       
   430 
       
   431 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
       
   432 //
       
   433 // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
       
   434 // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
       
   435 // but are kept here for backwards compatibility.
       
   436 var attributes = []string{
       
   437 	"abbr",
       
   438 	"accept",
       
   439 	"accept-charset",
       
   440 	"accesskey",
       
   441 	"action",
       
   442 	"allowfullscreen",
       
   443 	"allowpaymentrequest",
       
   444 	"allowusermedia",
       
   445 	"alt",
       
   446 	"as",
       
   447 	"async",
       
   448 	"autocomplete",
       
   449 	"autofocus",
       
   450 	"autoplay",
       
   451 	"challenge",
       
   452 	"charset",
       
   453 	"checked",
       
   454 	"cite",
       
   455 	"class",
       
   456 	"color",
       
   457 	"cols",
       
   458 	"colspan",
       
   459 	"command",
       
   460 	"content",
       
   461 	"contenteditable",
       
   462 	"contextmenu",
       
   463 	"controls",
       
   464 	"coords",
       
   465 	"crossorigin",
       
   466 	"data",
       
   467 	"datetime",
       
   468 	"default",
       
   469 	"defer",
       
   470 	"dir",
       
   471 	"dirname",
       
   472 	"disabled",
       
   473 	"download",
       
   474 	"draggable",
       
   475 	"dropzone",
       
   476 	"enctype",
       
   477 	"for",
       
   478 	"form",
       
   479 	"formaction",
       
   480 	"formenctype",
       
   481 	"formmethod",
       
   482 	"formnovalidate",
       
   483 	"formtarget",
       
   484 	"headers",
       
   485 	"height",
       
   486 	"hidden",
       
   487 	"high",
       
   488 	"href",
       
   489 	"hreflang",
       
   490 	"http-equiv",
       
   491 	"icon",
       
   492 	"id",
       
   493 	"inputmode",
       
   494 	"integrity",
       
   495 	"is",
       
   496 	"ismap",
       
   497 	"itemid",
       
   498 	"itemprop",
       
   499 	"itemref",
       
   500 	"itemscope",
       
   501 	"itemtype",
       
   502 	"keytype",
       
   503 	"kind",
       
   504 	"label",
       
   505 	"lang",
       
   506 	"list",
       
   507 	"loop",
       
   508 	"low",
       
   509 	"manifest",
       
   510 	"max",
       
   511 	"maxlength",
       
   512 	"media",
       
   513 	"mediagroup",
       
   514 	"method",
       
   515 	"min",
       
   516 	"minlength",
       
   517 	"multiple",
       
   518 	"muted",
       
   519 	"name",
       
   520 	"nomodule",
       
   521 	"nonce",
       
   522 	"novalidate",
       
   523 	"open",
       
   524 	"optimum",
       
   525 	"pattern",
       
   526 	"ping",
       
   527 	"placeholder",
       
   528 	"playsinline",
       
   529 	"poster",
       
   530 	"preload",
       
   531 	"radiogroup",
       
   532 	"readonly",
       
   533 	"referrerpolicy",
       
   534 	"rel",
       
   535 	"required",
       
   536 	"reversed",
       
   537 	"rows",
       
   538 	"rowspan",
       
   539 	"sandbox",
       
   540 	"spellcheck",
       
   541 	"scope",
       
   542 	"scoped",
       
   543 	"seamless",
       
   544 	"selected",
       
   545 	"shape",
       
   546 	"size",
       
   547 	"sizes",
       
   548 	"sortable",
       
   549 	"sorted",
       
   550 	"slot",
       
   551 	"span",
       
   552 	"spellcheck",
       
   553 	"src",
       
   554 	"srcdoc",
       
   555 	"srclang",
       
   556 	"srcset",
       
   557 	"start",
       
   558 	"step",
       
   559 	"style",
       
   560 	"tabindex",
       
   561 	"target",
       
   562 	"title",
       
   563 	"translate",
       
   564 	"type",
       
   565 	"typemustmatch",
       
   566 	"updateviacache",
       
   567 	"usemap",
       
   568 	"value",
       
   569 	"width",
       
   570 	"workertype",
       
   571 	"wrap",
       
   572 }
       
   573 
       
   574 // "onautocomplete", "onautocompleteerror", "onmousewheel",
       
   575 // "onshow" and "onsort" have been removed from the spec,
       
   576 // but are kept here for backwards compatibility.
       
   577 var eventHandlers = []string{
       
   578 	"onabort",
       
   579 	"onautocomplete",
       
   580 	"onautocompleteerror",
       
   581 	"onauxclick",
       
   582 	"onafterprint",
       
   583 	"onbeforeprint",
       
   584 	"onbeforeunload",
       
   585 	"onblur",
       
   586 	"oncancel",
       
   587 	"oncanplay",
       
   588 	"oncanplaythrough",
       
   589 	"onchange",
       
   590 	"onclick",
       
   591 	"onclose",
       
   592 	"oncontextmenu",
       
   593 	"oncopy",
       
   594 	"oncuechange",
       
   595 	"oncut",
       
   596 	"ondblclick",
       
   597 	"ondrag",
       
   598 	"ondragend",
       
   599 	"ondragenter",
       
   600 	"ondragexit",
       
   601 	"ondragleave",
       
   602 	"ondragover",
       
   603 	"ondragstart",
       
   604 	"ondrop",
       
   605 	"ondurationchange",
       
   606 	"onemptied",
       
   607 	"onended",
       
   608 	"onerror",
       
   609 	"onfocus",
       
   610 	"onhashchange",
       
   611 	"oninput",
       
   612 	"oninvalid",
       
   613 	"onkeydown",
       
   614 	"onkeypress",
       
   615 	"onkeyup",
       
   616 	"onlanguagechange",
       
   617 	"onload",
       
   618 	"onloadeddata",
       
   619 	"onloadedmetadata",
       
   620 	"onloadend",
       
   621 	"onloadstart",
       
   622 	"onmessage",
       
   623 	"onmessageerror",
       
   624 	"onmousedown",
       
   625 	"onmouseenter",
       
   626 	"onmouseleave",
       
   627 	"onmousemove",
       
   628 	"onmouseout",
       
   629 	"onmouseover",
       
   630 	"onmouseup",
       
   631 	"onmousewheel",
       
   632 	"onwheel",
       
   633 	"onoffline",
       
   634 	"ononline",
       
   635 	"onpagehide",
       
   636 	"onpageshow",
       
   637 	"onpaste",
       
   638 	"onpause",
       
   639 	"onplay",
       
   640 	"onplaying",
       
   641 	"onpopstate",
       
   642 	"onprogress",
       
   643 	"onratechange",
       
   644 	"onreset",
       
   645 	"onresize",
       
   646 	"onrejectionhandled",
       
   647 	"onscroll",
       
   648 	"onsecuritypolicyviolation",
       
   649 	"onseeked",
       
   650 	"onseeking",
       
   651 	"onselect",
       
   652 	"onshow",
       
   653 	"onsort",
       
   654 	"onstalled",
       
   655 	"onstorage",
       
   656 	"onsubmit",
       
   657 	"onsuspend",
       
   658 	"ontimeupdate",
       
   659 	"ontoggle",
       
   660 	"onunhandledrejection",
       
   661 	"onunload",
       
   662 	"onvolumechange",
       
   663 	"onwaiting",
       
   664 }
       
   665 
       
   666 // extra are ad-hoc values not covered by any of the lists above.
       
   667 var extra = []string{
       
   668 	"acronym",
       
   669 	"align",
       
   670 	"annotation",
       
   671 	"annotation-xml",
       
   672 	"applet",
       
   673 	"basefont",
       
   674 	"bgsound",
       
   675 	"big",
       
   676 	"blink",
       
   677 	"center",
       
   678 	"color",
       
   679 	"desc",
       
   680 	"face",
       
   681 	"font",
       
   682 	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
       
   683 	"foreignobject",
       
   684 	"frame",
       
   685 	"frameset",
       
   686 	"image",
       
   687 	"isindex",
       
   688 	"listing",
       
   689 	"malignmark",
       
   690 	"marquee",
       
   691 	"math",
       
   692 	"mglyph",
       
   693 	"mi",
       
   694 	"mn",
       
   695 	"mo",
       
   696 	"ms",
       
   697 	"mtext",
       
   698 	"nobr",
       
   699 	"noembed",
       
   700 	"noframes",
       
   701 	"plaintext",
       
   702 	"prompt",
       
   703 	"public",
       
   704 	"rb",
       
   705 	"rtc",
       
   706 	"spacer",
       
   707 	"strike",
       
   708 	"svg",
       
   709 	"system",
       
   710 	"tt",
       
   711 	"xmp",
       
   712 }