vendor/golang.org/x/text/unicode/norm/triegen.go
changeset 251 1c52a0eeb952
parent 250 c040f992052f
child 252 8399cd48111b
equal deleted inserted replaced
250:c040f992052f 251:1c52a0eeb952
     1 // Copyright 2011 The Go Authors. All rights reserved.
       
     2 // Use of this source code is governed by a BSD-style
       
     3 // license that can be found in the LICENSE file.
       
     4 
       
     5 // +build ignore
       
     6 
       
     7 // Trie table generator.
       
     8 // Used by make*tables tools to generate a go file with trie data structures
       
     9 // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
       
    10 // sequence are used to lookup offsets in the index table to be used for the
       
    11 // next byte. The last byte is used to index into a table with 16-bit values.
       
    12 
       
    13 package main
       
    14 
       
    15 import (
       
    16 	"fmt"
       
    17 	"io"
       
    18 )
       
    19 
       
    20 const maxSparseEntries = 16
       
    21 
       
    22 type normCompacter struct {
       
    23 	sparseBlocks [][]uint64
       
    24 	sparseOffset []uint16
       
    25 	sparseCount  int
       
    26 	name         string
       
    27 }
       
    28 
       
    29 func mostFrequentStride(a []uint64) int {
       
    30 	counts := make(map[int]int)
       
    31 	var v int
       
    32 	for _, x := range a {
       
    33 		if stride := int(x) - v; v != 0 && stride >= 0 {
       
    34 			counts[stride]++
       
    35 		}
       
    36 		v = int(x)
       
    37 	}
       
    38 	var maxs, maxc int
       
    39 	for stride, cnt := range counts {
       
    40 		if cnt > maxc || (cnt == maxc && stride < maxs) {
       
    41 			maxs, maxc = stride, cnt
       
    42 		}
       
    43 	}
       
    44 	return maxs
       
    45 }
       
    46 
       
    47 func countSparseEntries(a []uint64) int {
       
    48 	stride := mostFrequentStride(a)
       
    49 	var v, count int
       
    50 	for _, tv := range a {
       
    51 		if int(tv)-v != stride {
       
    52 			if tv != 0 {
       
    53 				count++
       
    54 			}
       
    55 		}
       
    56 		v = int(tv)
       
    57 	}
       
    58 	return count
       
    59 }
       
    60 
       
    61 func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
       
    62 	if n := countSparseEntries(v); n <= maxSparseEntries {
       
    63 		return (n+1)*4 + 2, true
       
    64 	}
       
    65 	return 0, false
       
    66 }
       
    67 
       
    68 func (c *normCompacter) Store(v []uint64) uint32 {
       
    69 	h := uint32(len(c.sparseOffset))
       
    70 	c.sparseBlocks = append(c.sparseBlocks, v)
       
    71 	c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
       
    72 	c.sparseCount += countSparseEntries(v) + 1
       
    73 	return h
       
    74 }
       
    75 
       
    76 func (c *normCompacter) Handler() string {
       
    77 	return c.name + "Sparse.lookup"
       
    78 }
       
    79 
       
    80 func (c *normCompacter) Print(w io.Writer) (retErr error) {
       
    81 	p := func(f string, x ...interface{}) {
       
    82 		if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
       
    83 			retErr = err
       
    84 		}
       
    85 	}
       
    86 
       
    87 	ls := len(c.sparseBlocks)
       
    88 	p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
       
    89 	p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
       
    90 
       
    91 	ns := c.sparseCount
       
    92 	p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
       
    93 	p("var %sSparseValues = [%d]valueRange {", c.name, ns)
       
    94 	for i, b := range c.sparseBlocks {
       
    95 		p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
       
    96 		var v int
       
    97 		stride := mostFrequentStride(b)
       
    98 		n := countSparseEntries(b)
       
    99 		p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
       
   100 		for i, nv := range b {
       
   101 			if int(nv)-v != stride {
       
   102 				if v != 0 {
       
   103 					p(",hi:%#02x},", 0x80+i-1)
       
   104 				}
       
   105 				if nv != 0 {
       
   106 					p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
       
   107 				}
       
   108 			}
       
   109 			v = int(nv)
       
   110 		}
       
   111 		if v != 0 {
       
   112 			p(",hi:%#02x},", 0x80+len(b)-1)
       
   113 		}
       
   114 	}
       
   115 	p("\n}\n\n")
       
   116 	return
       
   117 }