|
1 // Copyright 2015 The Go Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style |
|
3 // license that can be found in the LICENSE file. |
|
4 |
|
5 package runes |
|
6 |
|
7 import ( |
|
8 "unicode/utf8" |
|
9 |
|
10 "golang.org/x/text/transform" |
|
11 ) |
|
12 |
|
13 // Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is. |
|
14 // This is done for various reasons: |
|
15 // - To retain the semantics of the Nop transformer: if input is passed to a Nop |
|
16 // one would expect it to be unchanged. |
|
17 // - It would be very expensive to pass a converted RuneError to a transformer: |
|
18 // a transformer might need more source bytes after RuneError, meaning that |
|
19 // the only way to pass it safely is to create a new buffer and manage the |
|
20 // intermingling of RuneErrors and normal input. |
|
21 // - Many transformers leave ill-formed UTF-8 as is, so this is not |
|
22 // inconsistent. Generally ill-formed UTF-8 is only replaced if it is a |
|
23 // logical consequence of the operation (as for Map) or if it otherwise would |
|
24 // pose security concerns (as for Remove). |
|
25 // - An alternative would be to return an error on ill-formed UTF-8, but this |
|
26 // would be inconsistent with other operations. |
|
27 |
|
28 // If returns a transformer that applies tIn to consecutive runes for which |
|
29 // s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset |
|
30 // is called on tIn and tNotIn at the start of each run. A Nop transformer will |
|
31 // substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated |
|
32 // to RuneError to determine which transformer to apply, but is passed as is to |
|
33 // the respective transformer. |
|
34 func If(s Set, tIn, tNotIn transform.Transformer) Transformer { |
|
35 if tIn == nil && tNotIn == nil { |
|
36 return Transformer{transform.Nop} |
|
37 } |
|
38 if tIn == nil { |
|
39 tIn = transform.Nop |
|
40 } |
|
41 if tNotIn == nil { |
|
42 tNotIn = transform.Nop |
|
43 } |
|
44 sIn, ok := tIn.(transform.SpanningTransformer) |
|
45 if !ok { |
|
46 sIn = dummySpan{tIn} |
|
47 } |
|
48 sNotIn, ok := tNotIn.(transform.SpanningTransformer) |
|
49 if !ok { |
|
50 sNotIn = dummySpan{tNotIn} |
|
51 } |
|
52 |
|
53 a := &cond{ |
|
54 tIn: sIn, |
|
55 tNotIn: sNotIn, |
|
56 f: s.Contains, |
|
57 } |
|
58 a.Reset() |
|
59 return Transformer{a} |
|
60 } |
|
61 |
|
62 type dummySpan struct{ transform.Transformer } |
|
63 |
|
64 func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) { |
|
65 return 0, transform.ErrEndOfSpan |
|
66 } |
|
67 |
|
68 type cond struct { |
|
69 tIn, tNotIn transform.SpanningTransformer |
|
70 f func(rune) bool |
|
71 check func(rune) bool // current check to perform |
|
72 t transform.SpanningTransformer // current transformer to use |
|
73 } |
|
74 |
|
75 // Reset implements transform.Transformer. |
|
76 func (t *cond) Reset() { |
|
77 t.check = t.is |
|
78 t.t = t.tIn |
|
79 t.t.Reset() // notIn will be reset on first usage. |
|
80 } |
|
81 |
|
82 func (t *cond) is(r rune) bool { |
|
83 if t.f(r) { |
|
84 return true |
|
85 } |
|
86 t.check = t.isNot |
|
87 t.t = t.tNotIn |
|
88 t.tNotIn.Reset() |
|
89 return false |
|
90 } |
|
91 |
|
92 func (t *cond) isNot(r rune) bool { |
|
93 if !t.f(r) { |
|
94 return true |
|
95 } |
|
96 t.check = t.is |
|
97 t.t = t.tIn |
|
98 t.tIn.Reset() |
|
99 return false |
|
100 } |
|
101 |
|
102 // This implementation of Span doesn't help all too much, but it needs to be |
|
103 // there to satisfy this package's Transformer interface. |
|
104 // TODO: there are certainly room for improvements, though. For example, if |
|
105 // t.t == transform.Nop (which will a common occurrence) it will save a bundle |
|
106 // to special-case that loop. |
|
107 func (t *cond) Span(src []byte, atEOF bool) (n int, err error) { |
|
108 p := 0 |
|
109 for n < len(src) && err == nil { |
|
110 // Don't process too much at a time as the Spanner that will be |
|
111 // called on this block may terminate early. |
|
112 const maxChunk = 4096 |
|
113 max := len(src) |
|
114 if v := n + maxChunk; v < max { |
|
115 max = v |
|
116 } |
|
117 atEnd := false |
|
118 size := 0 |
|
119 current := t.t |
|
120 for ; p < max; p += size { |
|
121 r := rune(src[p]) |
|
122 if r < utf8.RuneSelf { |
|
123 size = 1 |
|
124 } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { |
|
125 if !atEOF && !utf8.FullRune(src[p:]) { |
|
126 err = transform.ErrShortSrc |
|
127 break |
|
128 } |
|
129 } |
|
130 if !t.check(r) { |
|
131 // The next rune will be the start of a new run. |
|
132 atEnd = true |
|
133 break |
|
134 } |
|
135 } |
|
136 n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src))) |
|
137 n += n2 |
|
138 if err2 != nil { |
|
139 return n, err2 |
|
140 } |
|
141 // At this point either err != nil or t.check will pass for the rune at p. |
|
142 p = n + size |
|
143 } |
|
144 return n, err |
|
145 } |
|
146 |
|
147 func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
|
148 p := 0 |
|
149 for nSrc < len(src) && err == nil { |
|
150 // Don't process too much at a time, as the work might be wasted if the |
|
151 // destination buffer isn't large enough to hold the result or a |
|
152 // transform returns an error early. |
|
153 const maxChunk = 4096 |
|
154 max := len(src) |
|
155 if n := nSrc + maxChunk; n < len(src) { |
|
156 max = n |
|
157 } |
|
158 atEnd := false |
|
159 size := 0 |
|
160 current := t.t |
|
161 for ; p < max; p += size { |
|
162 r := rune(src[p]) |
|
163 if r < utf8.RuneSelf { |
|
164 size = 1 |
|
165 } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { |
|
166 if !atEOF && !utf8.FullRune(src[p:]) { |
|
167 err = transform.ErrShortSrc |
|
168 break |
|
169 } |
|
170 } |
|
171 if !t.check(r) { |
|
172 // The next rune will be the start of a new run. |
|
173 atEnd = true |
|
174 break |
|
175 } |
|
176 } |
|
177 nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src))) |
|
178 nDst += nDst2 |
|
179 nSrc += nSrc2 |
|
180 if err2 != nil { |
|
181 return nDst, nSrc, err2 |
|
182 } |
|
183 // At this point either err != nil or t.check will pass for the rune at p. |
|
184 p = nSrc + size |
|
185 } |
|
186 return nDst, nSrc, err |
|
187 } |