1 // Copyright 2011 The Go Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style |
|
3 // license that can be found in the LICENSE file. |
|
4 |
|
5 // +build ignore |
|
6 |
|
7 // Trie table generator. |
|
8 // Used by make*tables tools to generate a go file with trie data structures |
|
9 // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte |
|
10 // sequence are used to lookup offsets in the index table to be used for the |
|
11 // next byte. The last byte is used to index into a table with 16-bit values. |
|
12 |
|
13 package main |
|
14 |
|
15 import ( |
|
16 "fmt" |
|
17 "io" |
|
18 ) |
|
19 |
|
20 const maxSparseEntries = 16 |
|
21 |
|
22 type normCompacter struct { |
|
23 sparseBlocks [][]uint64 |
|
24 sparseOffset []uint16 |
|
25 sparseCount int |
|
26 name string |
|
27 } |
|
28 |
|
29 func mostFrequentStride(a []uint64) int { |
|
30 counts := make(map[int]int) |
|
31 var v int |
|
32 for _, x := range a { |
|
33 if stride := int(x) - v; v != 0 && stride >= 0 { |
|
34 counts[stride]++ |
|
35 } |
|
36 v = int(x) |
|
37 } |
|
38 var maxs, maxc int |
|
39 for stride, cnt := range counts { |
|
40 if cnt > maxc || (cnt == maxc && stride < maxs) { |
|
41 maxs, maxc = stride, cnt |
|
42 } |
|
43 } |
|
44 return maxs |
|
45 } |
|
46 |
|
47 func countSparseEntries(a []uint64) int { |
|
48 stride := mostFrequentStride(a) |
|
49 var v, count int |
|
50 for _, tv := range a { |
|
51 if int(tv)-v != stride { |
|
52 if tv != 0 { |
|
53 count++ |
|
54 } |
|
55 } |
|
56 v = int(tv) |
|
57 } |
|
58 return count |
|
59 } |
|
60 |
|
61 func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { |
|
62 if n := countSparseEntries(v); n <= maxSparseEntries { |
|
63 return (n+1)*4 + 2, true |
|
64 } |
|
65 return 0, false |
|
66 } |
|
67 |
|
68 func (c *normCompacter) Store(v []uint64) uint32 { |
|
69 h := uint32(len(c.sparseOffset)) |
|
70 c.sparseBlocks = append(c.sparseBlocks, v) |
|
71 c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) |
|
72 c.sparseCount += countSparseEntries(v) + 1 |
|
73 return h |
|
74 } |
|
75 |
|
76 func (c *normCompacter) Handler() string { |
|
77 return c.name + "Sparse.lookup" |
|
78 } |
|
79 |
|
80 func (c *normCompacter) Print(w io.Writer) (retErr error) { |
|
81 p := func(f string, x ...interface{}) { |
|
82 if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { |
|
83 retErr = err |
|
84 } |
|
85 } |
|
86 |
|
87 ls := len(c.sparseBlocks) |
|
88 p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) |
|
89 p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) |
|
90 |
|
91 ns := c.sparseCount |
|
92 p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) |
|
93 p("var %sSparseValues = [%d]valueRange {", c.name, ns) |
|
94 for i, b := range c.sparseBlocks { |
|
95 p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) |
|
96 var v int |
|
97 stride := mostFrequentStride(b) |
|
98 n := countSparseEntries(b) |
|
99 p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) |
|
100 for i, nv := range b { |
|
101 if int(nv)-v != stride { |
|
102 if v != 0 { |
|
103 p(",hi:%#02x},", 0x80+i-1) |
|
104 } |
|
105 if nv != 0 { |
|
106 p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) |
|
107 } |
|
108 } |
|
109 v = int(nv) |
|
110 } |
|
111 if v != 0 { |
|
112 p(",hi:%#02x},", 0x80+len(b)-1) |
|
113 } |
|
114 } |
|
115 p("\n}\n\n") |
|
116 return |
|
117 } |
|