1 // TOML Parser. |
|
2 |
|
3 package toml |
|
4 |
|
5 import ( |
|
6 "errors" |
|
7 "fmt" |
|
8 "math" |
|
9 "reflect" |
|
10 "strconv" |
|
11 "strings" |
|
12 "time" |
|
13 ) |
|
14 |
|
15 type tomlParser struct { |
|
16 flowIdx int |
|
17 flow []token |
|
18 tree *Tree |
|
19 currentTable []string |
|
20 seenTableKeys []string |
|
21 } |
|
22 |
|
23 type tomlParserStateFn func() tomlParserStateFn |
|
24 |
|
25 // Formats and panics an error message based on a token |
|
26 func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) { |
|
27 panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...)) |
|
28 } |
|
29 |
|
30 func (p *tomlParser) run() { |
|
31 for state := p.parseStart; state != nil; { |
|
32 state = state() |
|
33 } |
|
34 } |
|
35 |
|
36 func (p *tomlParser) peek() *token { |
|
37 if p.flowIdx >= len(p.flow) { |
|
38 return nil |
|
39 } |
|
40 return &p.flow[p.flowIdx] |
|
41 } |
|
42 |
|
43 func (p *tomlParser) assume(typ tokenType) { |
|
44 tok := p.getToken() |
|
45 if tok == nil { |
|
46 p.raiseError(tok, "was expecting token %s, but token stream is empty", tok) |
|
47 } |
|
48 if tok.typ != typ { |
|
49 p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok) |
|
50 } |
|
51 } |
|
52 |
|
53 func (p *tomlParser) getToken() *token { |
|
54 tok := p.peek() |
|
55 if tok == nil { |
|
56 return nil |
|
57 } |
|
58 p.flowIdx++ |
|
59 return tok |
|
60 } |
|
61 |
|
62 func (p *tomlParser) parseStart() tomlParserStateFn { |
|
63 tok := p.peek() |
|
64 |
|
65 // end of stream, parsing is finished |
|
66 if tok == nil { |
|
67 return nil |
|
68 } |
|
69 |
|
70 switch tok.typ { |
|
71 case tokenDoubleLeftBracket: |
|
72 return p.parseGroupArray |
|
73 case tokenLeftBracket: |
|
74 return p.parseGroup |
|
75 case tokenKey: |
|
76 return p.parseAssign |
|
77 case tokenEOF: |
|
78 return nil |
|
79 case tokenError: |
|
80 p.raiseError(tok, "parsing error: %s", tok.String()) |
|
81 default: |
|
82 p.raiseError(tok, "unexpected token %s", tok.typ) |
|
83 } |
|
84 return nil |
|
85 } |
|
86 |
|
87 func (p *tomlParser) parseGroupArray() tomlParserStateFn { |
|
88 startToken := p.getToken() // discard the [[ |
|
89 key := p.getToken() |
|
90 if key.typ != tokenKeyGroupArray { |
|
91 p.raiseError(key, "unexpected token %s, was expecting a table array key", key) |
|
92 } |
|
93 |
|
94 // get or create table array element at the indicated part in the path |
|
95 keys, err := parseKey(key.val) |
|
96 if err != nil { |
|
97 p.raiseError(key, "invalid table array key: %s", err) |
|
98 } |
|
99 p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries |
|
100 destTree := p.tree.GetPath(keys) |
|
101 var array []*Tree |
|
102 if destTree == nil { |
|
103 array = make([]*Tree, 0) |
|
104 } else if target, ok := destTree.([]*Tree); ok && target != nil { |
|
105 array = destTree.([]*Tree) |
|
106 } else { |
|
107 p.raiseError(key, "key %s is already assigned and not of type table array", key) |
|
108 } |
|
109 p.currentTable = keys |
|
110 |
|
111 // add a new tree to the end of the table array |
|
112 newTree := newTree() |
|
113 newTree.position = startToken.Position |
|
114 array = append(array, newTree) |
|
115 p.tree.SetPath(p.currentTable, array) |
|
116 |
|
117 // remove all keys that were children of this table array |
|
118 prefix := key.val + "." |
|
119 found := false |
|
120 for ii := 0; ii < len(p.seenTableKeys); { |
|
121 tableKey := p.seenTableKeys[ii] |
|
122 if strings.HasPrefix(tableKey, prefix) { |
|
123 p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...) |
|
124 } else { |
|
125 found = (tableKey == key.val) |
|
126 ii++ |
|
127 } |
|
128 } |
|
129 |
|
130 // keep this key name from use by other kinds of assignments |
|
131 if !found { |
|
132 p.seenTableKeys = append(p.seenTableKeys, key.val) |
|
133 } |
|
134 |
|
135 // move to next parser state |
|
136 p.assume(tokenDoubleRightBracket) |
|
137 return p.parseStart |
|
138 } |
|
139 |
|
140 func (p *tomlParser) parseGroup() tomlParserStateFn { |
|
141 startToken := p.getToken() // discard the [ |
|
142 key := p.getToken() |
|
143 if key.typ != tokenKeyGroup { |
|
144 p.raiseError(key, "unexpected token %s, was expecting a table key", key) |
|
145 } |
|
146 for _, item := range p.seenTableKeys { |
|
147 if item == key.val { |
|
148 p.raiseError(key, "duplicated tables") |
|
149 } |
|
150 } |
|
151 |
|
152 p.seenTableKeys = append(p.seenTableKeys, key.val) |
|
153 keys, err := parseKey(key.val) |
|
154 if err != nil { |
|
155 p.raiseError(key, "invalid table array key: %s", err) |
|
156 } |
|
157 if err := p.tree.createSubTree(keys, startToken.Position); err != nil { |
|
158 p.raiseError(key, "%s", err) |
|
159 } |
|
160 destTree := p.tree.GetPath(keys) |
|
161 if target, ok := destTree.(*Tree); ok && target != nil && target.inline { |
|
162 p.raiseError(key, "could not re-define exist inline table or its sub-table : %s", |
|
163 strings.Join(keys, ".")) |
|
164 } |
|
165 p.assume(tokenRightBracket) |
|
166 p.currentTable = keys |
|
167 return p.parseStart |
|
168 } |
|
169 |
|
170 func (p *tomlParser) parseAssign() tomlParserStateFn { |
|
171 key := p.getToken() |
|
172 p.assume(tokenEqual) |
|
173 |
|
174 parsedKey, err := parseKey(key.val) |
|
175 if err != nil { |
|
176 p.raiseError(key, "invalid key: %s", err.Error()) |
|
177 } |
|
178 |
|
179 value := p.parseRvalue() |
|
180 var tableKey []string |
|
181 if len(p.currentTable) > 0 { |
|
182 tableKey = p.currentTable |
|
183 } else { |
|
184 tableKey = []string{} |
|
185 } |
|
186 |
|
187 prefixKey := parsedKey[0 : len(parsedKey)-1] |
|
188 tableKey = append(tableKey, prefixKey...) |
|
189 |
|
190 // find the table to assign, looking out for arrays of tables |
|
191 var targetNode *Tree |
|
192 switch node := p.tree.GetPath(tableKey).(type) { |
|
193 case []*Tree: |
|
194 targetNode = node[len(node)-1] |
|
195 case *Tree: |
|
196 targetNode = node |
|
197 case nil: |
|
198 // create intermediate |
|
199 if err := p.tree.createSubTree(tableKey, key.Position); err != nil { |
|
200 p.raiseError(key, "could not create intermediate group: %s", err) |
|
201 } |
|
202 targetNode = p.tree.GetPath(tableKey).(*Tree) |
|
203 default: |
|
204 p.raiseError(key, "Unknown table type for path: %s", |
|
205 strings.Join(tableKey, ".")) |
|
206 } |
|
207 |
|
208 if targetNode.inline { |
|
209 p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s", |
|
210 strings.Join(tableKey, ".")) |
|
211 } |
|
212 |
|
213 // assign value to the found table |
|
214 keyVal := parsedKey[len(parsedKey)-1] |
|
215 localKey := []string{keyVal} |
|
216 finalKey := append(tableKey, keyVal) |
|
217 if targetNode.GetPath(localKey) != nil { |
|
218 p.raiseError(key, "The following key was defined twice: %s", |
|
219 strings.Join(finalKey, ".")) |
|
220 } |
|
221 var toInsert interface{} |
|
222 |
|
223 switch value.(type) { |
|
224 case *Tree, []*Tree: |
|
225 toInsert = value |
|
226 default: |
|
227 toInsert = &tomlValue{value: value, position: key.Position} |
|
228 } |
|
229 targetNode.values[keyVal] = toInsert |
|
230 return p.parseStart |
|
231 } |
|
232 |
|
233 var errInvalidUnderscore = errors.New("invalid use of _ in number") |
|
234 |
|
235 func numberContainsInvalidUnderscore(value string) error { |
|
236 // For large numbers, you may use underscores between digits to enhance |
|
237 // readability. Each underscore must be surrounded by at least one digit on |
|
238 // each side. |
|
239 |
|
240 hasBefore := false |
|
241 for idx, r := range value { |
|
242 if r == '_' { |
|
243 if !hasBefore || idx+1 >= len(value) { |
|
244 // can't end with an underscore |
|
245 return errInvalidUnderscore |
|
246 } |
|
247 } |
|
248 hasBefore = isDigit(r) |
|
249 } |
|
250 return nil |
|
251 } |
|
252 |
|
253 var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number") |
|
254 |
|
255 func hexNumberContainsInvalidUnderscore(value string) error { |
|
256 hasBefore := false |
|
257 for idx, r := range value { |
|
258 if r == '_' { |
|
259 if !hasBefore || idx+1 >= len(value) { |
|
260 // can't end with an underscore |
|
261 return errInvalidUnderscoreHex |
|
262 } |
|
263 } |
|
264 hasBefore = isHexDigit(r) |
|
265 } |
|
266 return nil |
|
267 } |
|
268 |
|
269 func cleanupNumberToken(value string) string { |
|
270 cleanedVal := strings.Replace(value, "_", "", -1) |
|
271 return cleanedVal |
|
272 } |
|
273 |
|
274 func (p *tomlParser) parseRvalue() interface{} { |
|
275 tok := p.getToken() |
|
276 if tok == nil || tok.typ == tokenEOF { |
|
277 p.raiseError(tok, "expecting a value") |
|
278 } |
|
279 |
|
280 switch tok.typ { |
|
281 case tokenString: |
|
282 return tok.val |
|
283 case tokenTrue: |
|
284 return true |
|
285 case tokenFalse: |
|
286 return false |
|
287 case tokenInf: |
|
288 if tok.val[0] == '-' { |
|
289 return math.Inf(-1) |
|
290 } |
|
291 return math.Inf(1) |
|
292 case tokenNan: |
|
293 return math.NaN() |
|
294 case tokenInteger: |
|
295 cleanedVal := cleanupNumberToken(tok.val) |
|
296 base := 10 |
|
297 s := cleanedVal |
|
298 checkInvalidUnderscore := numberContainsInvalidUnderscore |
|
299 if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { |
|
300 switch cleanedVal[1] { |
|
301 case 'x': |
|
302 checkInvalidUnderscore = hexNumberContainsInvalidUnderscore |
|
303 base = 16 |
|
304 case 'o': |
|
305 base = 8 |
|
306 case 'b': |
|
307 base = 2 |
|
308 default: |
|
309 panic("invalid base") // the lexer should catch this first |
|
310 } |
|
311 s = cleanedVal[2:] |
|
312 } |
|
313 |
|
314 err := checkInvalidUnderscore(tok.val) |
|
315 if err != nil { |
|
316 p.raiseError(tok, "%s", err) |
|
317 } |
|
318 |
|
319 var val interface{} |
|
320 val, err = strconv.ParseInt(s, base, 64) |
|
321 if err == nil { |
|
322 return val |
|
323 } |
|
324 |
|
325 if s[0] != '-' { |
|
326 if val, err = strconv.ParseUint(s, base, 64); err == nil { |
|
327 return val |
|
328 } |
|
329 } |
|
330 p.raiseError(tok, "%s", err) |
|
331 case tokenFloat: |
|
332 err := numberContainsInvalidUnderscore(tok.val) |
|
333 if err != nil { |
|
334 p.raiseError(tok, "%s", err) |
|
335 } |
|
336 cleanedVal := cleanupNumberToken(tok.val) |
|
337 val, err := strconv.ParseFloat(cleanedVal, 64) |
|
338 if err != nil { |
|
339 p.raiseError(tok, "%s", err) |
|
340 } |
|
341 return val |
|
342 case tokenLocalTime: |
|
343 val, err := ParseLocalTime(tok.val) |
|
344 if err != nil { |
|
345 p.raiseError(tok, "%s", err) |
|
346 } |
|
347 return val |
|
348 case tokenLocalDate: |
|
349 // a local date may be followed by: |
|
350 // * nothing: this is a local date |
|
351 // * a local time: this is a local date-time |
|
352 |
|
353 next := p.peek() |
|
354 if next == nil || next.typ != tokenLocalTime { |
|
355 val, err := ParseLocalDate(tok.val) |
|
356 if err != nil { |
|
357 p.raiseError(tok, "%s", err) |
|
358 } |
|
359 return val |
|
360 } |
|
361 |
|
362 localDate := tok |
|
363 localTime := p.getToken() |
|
364 |
|
365 next = p.peek() |
|
366 if next == nil || next.typ != tokenTimeOffset { |
|
367 v := localDate.val + "T" + localTime.val |
|
368 val, err := ParseLocalDateTime(v) |
|
369 if err != nil { |
|
370 p.raiseError(tok, "%s", err) |
|
371 } |
|
372 return val |
|
373 } |
|
374 |
|
375 offset := p.getToken() |
|
376 |
|
377 layout := time.RFC3339Nano |
|
378 v := localDate.val + "T" + localTime.val + offset.val |
|
379 val, err := time.ParseInLocation(layout, v, time.UTC) |
|
380 if err != nil { |
|
381 p.raiseError(tok, "%s", err) |
|
382 } |
|
383 return val |
|
384 case tokenLeftBracket: |
|
385 return p.parseArray() |
|
386 case tokenLeftCurlyBrace: |
|
387 return p.parseInlineTable() |
|
388 case tokenEqual: |
|
389 p.raiseError(tok, "cannot have multiple equals for the same key") |
|
390 case tokenError: |
|
391 p.raiseError(tok, "%s", tok) |
|
392 default: |
|
393 panic(fmt.Errorf("unhandled token: %v", tok)) |
|
394 } |
|
395 |
|
396 return nil |
|
397 } |
|
398 |
|
399 func tokenIsComma(t *token) bool { |
|
400 return t != nil && t.typ == tokenComma |
|
401 } |
|
402 |
|
403 func (p *tomlParser) parseInlineTable() *Tree { |
|
404 tree := newTree() |
|
405 var previous *token |
|
406 Loop: |
|
407 for { |
|
408 follow := p.peek() |
|
409 if follow == nil || follow.typ == tokenEOF { |
|
410 p.raiseError(follow, "unterminated inline table") |
|
411 } |
|
412 switch follow.typ { |
|
413 case tokenRightCurlyBrace: |
|
414 p.getToken() |
|
415 break Loop |
|
416 case tokenKey, tokenInteger, tokenString: |
|
417 if !tokenIsComma(previous) && previous != nil { |
|
418 p.raiseError(follow, "comma expected between fields in inline table") |
|
419 } |
|
420 key := p.getToken() |
|
421 p.assume(tokenEqual) |
|
422 |
|
423 parsedKey, err := parseKey(key.val) |
|
424 if err != nil { |
|
425 p.raiseError(key, "invalid key: %s", err) |
|
426 } |
|
427 |
|
428 value := p.parseRvalue() |
|
429 tree.SetPath(parsedKey, value) |
|
430 case tokenComma: |
|
431 if tokenIsComma(previous) { |
|
432 p.raiseError(follow, "need field between two commas in inline table") |
|
433 } |
|
434 p.getToken() |
|
435 default: |
|
436 p.raiseError(follow, "unexpected token type in inline table: %s", follow.String()) |
|
437 } |
|
438 previous = follow |
|
439 } |
|
440 if tokenIsComma(previous) { |
|
441 p.raiseError(previous, "trailing comma at the end of inline table") |
|
442 } |
|
443 tree.inline = true |
|
444 return tree |
|
445 } |
|
446 |
|
447 func (p *tomlParser) parseArray() interface{} { |
|
448 var array []interface{} |
|
449 arrayType := reflect.TypeOf(newTree()) |
|
450 for { |
|
451 follow := p.peek() |
|
452 if follow == nil || follow.typ == tokenEOF { |
|
453 p.raiseError(follow, "unterminated array") |
|
454 } |
|
455 if follow.typ == tokenRightBracket { |
|
456 p.getToken() |
|
457 break |
|
458 } |
|
459 val := p.parseRvalue() |
|
460 if reflect.TypeOf(val) != arrayType { |
|
461 arrayType = nil |
|
462 } |
|
463 array = append(array, val) |
|
464 follow = p.peek() |
|
465 if follow == nil || follow.typ == tokenEOF { |
|
466 p.raiseError(follow, "unterminated array") |
|
467 } |
|
468 if follow.typ != tokenRightBracket && follow.typ != tokenComma { |
|
469 p.raiseError(follow, "missing comma") |
|
470 } |
|
471 if follow.typ == tokenComma { |
|
472 p.getToken() |
|
473 } |
|
474 } |
|
475 |
|
476 // if the array is a mixed-type array or its length is 0, |
|
477 // don't convert it to a table array |
|
478 if len(array) <= 0 { |
|
479 arrayType = nil |
|
480 } |
|
481 // An array of Trees is actually an array of inline |
|
482 // tables, which is a shorthand for a table array. If the |
|
483 // array was not converted from []interface{} to []*Tree, |
|
484 // the two notations would not be equivalent. |
|
485 if arrayType == reflect.TypeOf(newTree()) { |
|
486 tomlArray := make([]*Tree, len(array)) |
|
487 for i, v := range array { |
|
488 tomlArray[i] = v.(*Tree) |
|
489 } |
|
490 return tomlArray |
|
491 } |
|
492 return array |
|
493 } |
|
494 |
|
495 func parseToml(flow []token) *Tree { |
|
496 result := newTree() |
|
497 result.position = Position{1, 1} |
|
498 parser := &tomlParser{ |
|
499 flowIdx: 0, |
|
500 flow: flow, |
|
501 tree: result, |
|
502 currentTable: make([]string, 0), |
|
503 seenTableKeys: make([]string, 0), |
|
504 } |
|
505 parser.run() |
|
506 return result |
|
507 } |
|