|
1 // Copyright 2018 Frank Schroeder. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style |
|
3 // license that can be found in the LICENSE file. |
|
4 // |
|
5 // Parts of the lexer are from the template/text/parser package |
|
6 // For these parts the following applies: |
|
7 // |
|
8 // Copyright 2011 The Go Authors. All rights reserved. |
|
9 // Use of this source code is governed by a BSD-style |
|
10 // license that can be found in the LICENSE file of the go 1.2 |
|
11 // distribution. |
|
12 |
|
13 package properties |
|
14 |
|
15 import ( |
|
16 "fmt" |
|
17 "strconv" |
|
18 "strings" |
|
19 "unicode/utf8" |
|
20 ) |
|
21 |
|
22 // item represents a token or text string returned from the scanner. |
|
23 type item struct { |
|
24 typ itemType // The type of this item. |
|
25 pos int // The starting position, in bytes, of this item in the input string. |
|
26 val string // The value of this item. |
|
27 } |
|
28 |
|
29 func (i item) String() string { |
|
30 switch { |
|
31 case i.typ == itemEOF: |
|
32 return "EOF" |
|
33 case i.typ == itemError: |
|
34 return i.val |
|
35 case len(i.val) > 10: |
|
36 return fmt.Sprintf("%.10q...", i.val) |
|
37 } |
|
38 return fmt.Sprintf("%q", i.val) |
|
39 } |
|
40 |
|
41 // itemType identifies the type of lex items. |
|
42 type itemType int |
|
43 |
|
44 const ( |
|
45 itemError itemType = iota // error occurred; value is text of error |
|
46 itemEOF |
|
47 itemKey // a key |
|
48 itemValue // a value |
|
49 itemComment // a comment |
|
50 ) |
|
51 |
|
52 // defines a constant for EOF |
|
53 const eof = -1 |
|
54 |
|
55 // permitted whitespace characters space, FF and TAB |
|
56 const whitespace = " \f\t" |
|
57 |
|
58 // stateFn represents the state of the scanner as a function that returns the next state. |
|
59 type stateFn func(*lexer) stateFn |
|
60 |
|
61 // lexer holds the state of the scanner. |
|
62 type lexer struct { |
|
63 input string // the string being scanned |
|
64 state stateFn // the next lexing function to enter |
|
65 pos int // current position in the input |
|
66 start int // start position of this item |
|
67 width int // width of last rune read from input |
|
68 lastPos int // position of most recent item returned by nextItem |
|
69 runes []rune // scanned runes for this item |
|
70 items chan item // channel of scanned items |
|
71 } |
|
72 |
|
73 // next returns the next rune in the input. |
|
74 func (l *lexer) next() rune { |
|
75 if l.pos >= len(l.input) { |
|
76 l.width = 0 |
|
77 return eof |
|
78 } |
|
79 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) |
|
80 l.width = w |
|
81 l.pos += l.width |
|
82 return r |
|
83 } |
|
84 |
|
85 // peek returns but does not consume the next rune in the input. |
|
86 func (l *lexer) peek() rune { |
|
87 r := l.next() |
|
88 l.backup() |
|
89 return r |
|
90 } |
|
91 |
|
92 // backup steps back one rune. Can only be called once per call of next. |
|
93 func (l *lexer) backup() { |
|
94 l.pos -= l.width |
|
95 } |
|
96 |
|
97 // emit passes an item back to the client. |
|
98 func (l *lexer) emit(t itemType) { |
|
99 i := item{t, l.start, string(l.runes)} |
|
100 l.items <- i |
|
101 l.start = l.pos |
|
102 l.runes = l.runes[:0] |
|
103 } |
|
104 |
|
105 // ignore skips over the pending input before this point. |
|
106 func (l *lexer) ignore() { |
|
107 l.start = l.pos |
|
108 } |
|
109 |
|
110 // appends the rune to the current value |
|
111 func (l *lexer) appendRune(r rune) { |
|
112 l.runes = append(l.runes, r) |
|
113 } |
|
114 |
|
115 // accept consumes the next rune if it's from the valid set. |
|
116 func (l *lexer) accept(valid string) bool { |
|
117 if strings.ContainsRune(valid, l.next()) { |
|
118 return true |
|
119 } |
|
120 l.backup() |
|
121 return false |
|
122 } |
|
123 |
|
124 // acceptRun consumes a run of runes from the valid set. |
|
125 func (l *lexer) acceptRun(valid string) { |
|
126 for strings.ContainsRune(valid, l.next()) { |
|
127 } |
|
128 l.backup() |
|
129 } |
|
130 |
|
131 // acceptRunUntil consumes a run of runes up to a terminator. |
|
132 func (l *lexer) acceptRunUntil(term rune) { |
|
133 for term != l.next() { |
|
134 } |
|
135 l.backup() |
|
136 } |
|
137 |
|
138 // hasText returns true if the current parsed text is not empty. |
|
139 func (l *lexer) isNotEmpty() bool { |
|
140 return l.pos > l.start |
|
141 } |
|
142 |
|
143 // lineNumber reports which line we're on, based on the position of |
|
144 // the previous item returned by nextItem. Doing it this way |
|
145 // means we don't have to worry about peek double counting. |
|
146 func (l *lexer) lineNumber() int { |
|
147 return 1 + strings.Count(l.input[:l.lastPos], "\n") |
|
148 } |
|
149 |
|
150 // errorf returns an error token and terminates the scan by passing |
|
151 // back a nil pointer that will be the next state, terminating l.nextItem. |
|
152 func (l *lexer) errorf(format string, args ...interface{}) stateFn { |
|
153 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} |
|
154 return nil |
|
155 } |
|
156 |
|
157 // nextItem returns the next item from the input. |
|
158 func (l *lexer) nextItem() item { |
|
159 i := <-l.items |
|
160 l.lastPos = i.pos |
|
161 return i |
|
162 } |
|
163 |
|
164 // lex creates a new scanner for the input string. |
|
165 func lex(input string) *lexer { |
|
166 l := &lexer{ |
|
167 input: input, |
|
168 items: make(chan item), |
|
169 runes: make([]rune, 0, 32), |
|
170 } |
|
171 go l.run() |
|
172 return l |
|
173 } |
|
174 |
|
175 // run runs the state machine for the lexer. |
|
176 func (l *lexer) run() { |
|
177 for l.state = lexBeforeKey(l); l.state != nil; { |
|
178 l.state = l.state(l) |
|
179 } |
|
180 } |
|
181 |
|
182 // state functions |
|
183 |
|
184 // lexBeforeKey scans until a key begins. |
|
185 func lexBeforeKey(l *lexer) stateFn { |
|
186 switch r := l.next(); { |
|
187 case isEOF(r): |
|
188 l.emit(itemEOF) |
|
189 return nil |
|
190 |
|
191 case isEOL(r): |
|
192 l.ignore() |
|
193 return lexBeforeKey |
|
194 |
|
195 case isComment(r): |
|
196 return lexComment |
|
197 |
|
198 case isWhitespace(r): |
|
199 l.ignore() |
|
200 return lexBeforeKey |
|
201 |
|
202 default: |
|
203 l.backup() |
|
204 return lexKey |
|
205 } |
|
206 } |
|
207 |
|
208 // lexComment scans a comment line. The comment character has already been scanned. |
|
209 func lexComment(l *lexer) stateFn { |
|
210 l.acceptRun(whitespace) |
|
211 l.ignore() |
|
212 for { |
|
213 switch r := l.next(); { |
|
214 case isEOF(r): |
|
215 l.ignore() |
|
216 l.emit(itemEOF) |
|
217 return nil |
|
218 case isEOL(r): |
|
219 l.emit(itemComment) |
|
220 return lexBeforeKey |
|
221 default: |
|
222 l.appendRune(r) |
|
223 } |
|
224 } |
|
225 } |
|
226 |
|
227 // lexKey scans the key up to a delimiter |
|
228 func lexKey(l *lexer) stateFn { |
|
229 var r rune |
|
230 |
|
231 Loop: |
|
232 for { |
|
233 switch r = l.next(); { |
|
234 |
|
235 case isEscape(r): |
|
236 err := l.scanEscapeSequence() |
|
237 if err != nil { |
|
238 return l.errorf(err.Error()) |
|
239 } |
|
240 |
|
241 case isEndOfKey(r): |
|
242 l.backup() |
|
243 break Loop |
|
244 |
|
245 case isEOF(r): |
|
246 break Loop |
|
247 |
|
248 default: |
|
249 l.appendRune(r) |
|
250 } |
|
251 } |
|
252 |
|
253 if len(l.runes) > 0 { |
|
254 l.emit(itemKey) |
|
255 } |
|
256 |
|
257 if isEOF(r) { |
|
258 l.emit(itemEOF) |
|
259 return nil |
|
260 } |
|
261 |
|
262 return lexBeforeValue |
|
263 } |
|
264 |
|
265 // lexBeforeValue scans the delimiter between key and value. |
|
266 // Leading and trailing whitespace is ignored. |
|
267 // We expect to be just after the key. |
|
268 func lexBeforeValue(l *lexer) stateFn { |
|
269 l.acceptRun(whitespace) |
|
270 l.accept(":=") |
|
271 l.acceptRun(whitespace) |
|
272 l.ignore() |
|
273 return lexValue |
|
274 } |
|
275 |
|
276 // lexValue scans text until the end of the line. We expect to be just after the delimiter. |
|
277 func lexValue(l *lexer) stateFn { |
|
278 for { |
|
279 switch r := l.next(); { |
|
280 case isEscape(r): |
|
281 if isEOL(l.peek()) { |
|
282 l.next() |
|
283 l.acceptRun(whitespace) |
|
284 } else { |
|
285 err := l.scanEscapeSequence() |
|
286 if err != nil { |
|
287 return l.errorf(err.Error()) |
|
288 } |
|
289 } |
|
290 |
|
291 case isEOL(r): |
|
292 l.emit(itemValue) |
|
293 l.ignore() |
|
294 return lexBeforeKey |
|
295 |
|
296 case isEOF(r): |
|
297 l.emit(itemValue) |
|
298 l.emit(itemEOF) |
|
299 return nil |
|
300 |
|
301 default: |
|
302 l.appendRune(r) |
|
303 } |
|
304 } |
|
305 } |
|
306 |
|
307 // scanEscapeSequence scans either one of the escaped characters |
|
308 // or a unicode literal. We expect to be after the escape character. |
|
309 func (l *lexer) scanEscapeSequence() error { |
|
310 switch r := l.next(); { |
|
311 |
|
312 case isEscapedCharacter(r): |
|
313 l.appendRune(decodeEscapedCharacter(r)) |
|
314 return nil |
|
315 |
|
316 case atUnicodeLiteral(r): |
|
317 return l.scanUnicodeLiteral() |
|
318 |
|
319 case isEOF(r): |
|
320 return fmt.Errorf("premature EOF") |
|
321 |
|
322 // silently drop the escape character and append the rune as is |
|
323 default: |
|
324 l.appendRune(r) |
|
325 return nil |
|
326 } |
|
327 } |
|
328 |
|
329 // scans a unicode literal in the form \uXXXX. We expect to be after the \u. |
|
330 func (l *lexer) scanUnicodeLiteral() error { |
|
331 // scan the digits |
|
332 d := make([]rune, 4) |
|
333 for i := 0; i < 4; i++ { |
|
334 d[i] = l.next() |
|
335 if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) { |
|
336 return fmt.Errorf("invalid unicode literal") |
|
337 } |
|
338 } |
|
339 |
|
340 // decode the digits into a rune |
|
341 r, err := strconv.ParseInt(string(d), 16, 0) |
|
342 if err != nil { |
|
343 return err |
|
344 } |
|
345 |
|
346 l.appendRune(rune(r)) |
|
347 return nil |
|
348 } |
|
349 |
|
350 // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character. |
|
351 func decodeEscapedCharacter(r rune) rune { |
|
352 switch r { |
|
353 case 'f': |
|
354 return '\f' |
|
355 case 'n': |
|
356 return '\n' |
|
357 case 'r': |
|
358 return '\r' |
|
359 case 't': |
|
360 return '\t' |
|
361 default: |
|
362 return r |
|
363 } |
|
364 } |
|
365 |
|
366 // atUnicodeLiteral reports whether we are at a unicode literal. |
|
367 // The escape character has already been consumed. |
|
368 func atUnicodeLiteral(r rune) bool { |
|
369 return r == 'u' |
|
370 } |
|
371 |
|
372 // isComment reports whether we are at the start of a comment. |
|
373 func isComment(r rune) bool { |
|
374 return r == '#' || r == '!' |
|
375 } |
|
376 |
|
377 // isEndOfKey reports whether the rune terminates the current key. |
|
378 func isEndOfKey(r rune) bool { |
|
379 return strings.ContainsRune(" \f\t\r\n:=", r) |
|
380 } |
|
381 |
|
382 // isEOF reports whether we are at EOF. |
|
383 func isEOF(r rune) bool { |
|
384 return r == eof |
|
385 } |
|
386 |
|
387 // isEOL reports whether we are at a new line character. |
|
388 func isEOL(r rune) bool { |
|
389 return r == '\n' || r == '\r' |
|
390 } |
|
391 |
|
392 // isEscape reports whether the rune is the escape character which |
|
393 // prefixes unicode literals and other escaped characters. |
|
394 func isEscape(r rune) bool { |
|
395 return r == '\\' |
|
396 } |
|
397 |
|
398 // isEscapedCharacter reports whether we are at one of the characters that need escaping. |
|
399 // The escape character has already been consumed. |
|
400 func isEscapedCharacter(r rune) bool { |
|
401 return strings.ContainsRune(" :=fnrt", r) |
|
402 } |
|
403 |
|
404 // isWhitespace reports whether the rune is a whitespace character. |
|
405 func isWhitespace(r rune) bool { |
|
406 return strings.ContainsRune(whitespace, r) |
|
407 } |