|
1 package unstable |
|
2 |
|
3 import ( |
|
4 "bytes" |
|
5 "fmt" |
|
6 "unicode" |
|
7 |
|
8 "github.com/pelletier/go-toml/v2/internal/characters" |
|
9 "github.com/pelletier/go-toml/v2/internal/danger" |
|
10 ) |
|
11 |
|
12 // ParserError describes an error relative to the content of the document. |
|
13 // |
|
14 // It cannot outlive the instance of Parser it refers to, and may cause panics |
|
15 // if the parser is reset. |
|
16 type ParserError struct { |
|
17 Highlight []byte |
|
18 Message string |
|
19 Key []string // optional |
|
20 } |
|
21 |
|
22 // Error is the implementation of the error interface. |
|
23 func (e *ParserError) Error() string { |
|
24 return e.Message |
|
25 } |
|
26 |
|
27 // NewParserError is a convenience function to create a ParserError |
|
28 // |
|
29 // Warning: Highlight needs to be a subslice of Parser.data, so only slices |
|
30 // returned by Parser.Raw are valid candidates. |
|
31 func NewParserError(highlight []byte, format string, args ...interface{}) error { |
|
32 return &ParserError{ |
|
33 Highlight: highlight, |
|
34 Message: fmt.Errorf(format, args...).Error(), |
|
35 } |
|
36 } |
|
37 |
|
38 // Parser scans over a TOML-encoded document and generates an iterative AST. |
|
39 // |
|
40 // To prime the Parser, first reset it with the contents of a TOML document. |
|
41 // Then, process all top-level expressions sequentially. See Example. |
|
42 // |
|
43 // Don't forget to check Error() after you're done parsing. |
|
44 // |
|
45 // Each top-level expression needs to be fully processed before calling |
|
46 // NextExpression() again. Otherwise, calls to various Node methods may panic if |
|
47 // the parser has moved on the next expression. |
|
48 // |
|
49 // For performance reasons, go-toml doesn't make a copy of the input bytes to |
|
50 // the parser. Make sure to copy all the bytes you need to outlive the slice |
|
51 // given to the parser. |
|
52 // |
|
53 // The parser doesn't provide nodes for comments yet, nor for whitespace. |
|
54 type Parser struct { |
|
55 data []byte |
|
56 builder builder |
|
57 ref reference |
|
58 left []byte |
|
59 err error |
|
60 first bool |
|
61 } |
|
62 |
|
63 // Data returns the slice provided to the last call to Reset. |
|
64 func (p *Parser) Data() []byte { |
|
65 return p.data |
|
66 } |
|
67 |
|
68 // Range returns a range description that corresponds to a given slice of the |
|
69 // input. If the argument is not a subslice of the parser input, this function |
|
70 // panics. |
|
71 func (p *Parser) Range(b []byte) Range { |
|
72 return Range{ |
|
73 Offset: uint32(danger.SubsliceOffset(p.data, b)), |
|
74 Length: uint32(len(b)), |
|
75 } |
|
76 } |
|
77 |
|
78 // Raw returns the slice corresponding to the bytes in the given range. |
|
79 func (p *Parser) Raw(raw Range) []byte { |
|
80 return p.data[raw.Offset : raw.Offset+raw.Length] |
|
81 } |
|
82 |
|
83 // Reset brings the parser to its initial state for a given input. It wipes an |
|
84 // reuses internal storage to reduce allocation. |
|
85 func (p *Parser) Reset(b []byte) { |
|
86 p.builder.Reset() |
|
87 p.ref = invalidReference |
|
88 p.data = b |
|
89 p.left = b |
|
90 p.err = nil |
|
91 p.first = true |
|
92 } |
|
93 |
|
94 // NextExpression parses the next top-level expression. If an expression was |
|
95 // successfully parsed, it returns true. If the parser is at the end of the |
|
96 // document or an error occurred, it returns false. |
|
97 // |
|
98 // Retrieve the parsed expression with Expression(). |
|
99 func (p *Parser) NextExpression() bool { |
|
100 if len(p.left) == 0 || p.err != nil { |
|
101 return false |
|
102 } |
|
103 |
|
104 p.builder.Reset() |
|
105 p.ref = invalidReference |
|
106 |
|
107 for { |
|
108 if len(p.left) == 0 || p.err != nil { |
|
109 return false |
|
110 } |
|
111 |
|
112 if !p.first { |
|
113 p.left, p.err = p.parseNewline(p.left) |
|
114 } |
|
115 |
|
116 if len(p.left) == 0 || p.err != nil { |
|
117 return false |
|
118 } |
|
119 |
|
120 p.ref, p.left, p.err = p.parseExpression(p.left) |
|
121 |
|
122 if p.err != nil { |
|
123 return false |
|
124 } |
|
125 |
|
126 p.first = false |
|
127 |
|
128 if p.ref.Valid() { |
|
129 return true |
|
130 } |
|
131 } |
|
132 } |
|
133 |
|
134 // Expression returns a pointer to the node representing the last successfully |
|
135 // parsed expresion. |
|
136 func (p *Parser) Expression() *Node { |
|
137 return p.builder.NodeAt(p.ref) |
|
138 } |
|
139 |
|
140 // Error returns any error that has occured during parsing. |
|
141 func (p *Parser) Error() error { |
|
142 return p.err |
|
143 } |
|
144 |
|
145 func (p *Parser) parseNewline(b []byte) ([]byte, error) { |
|
146 if b[0] == '\n' { |
|
147 return b[1:], nil |
|
148 } |
|
149 |
|
150 if b[0] == '\r' { |
|
151 _, rest, err := scanWindowsNewline(b) |
|
152 return rest, err |
|
153 } |
|
154 |
|
155 return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0]) |
|
156 } |
|
157 |
|
158 func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { |
|
159 // expression = ws [ comment ] |
|
160 // expression =/ ws keyval ws [ comment ] |
|
161 // expression =/ ws table ws [ comment ] |
|
162 ref := invalidReference |
|
163 |
|
164 b = p.parseWhitespace(b) |
|
165 |
|
166 if len(b) == 0 { |
|
167 return ref, b, nil |
|
168 } |
|
169 |
|
170 if b[0] == '#' { |
|
171 _, rest, err := scanComment(b) |
|
172 return ref, rest, err |
|
173 } |
|
174 |
|
175 if b[0] == '\n' || b[0] == '\r' { |
|
176 return ref, b, nil |
|
177 } |
|
178 |
|
179 var err error |
|
180 if b[0] == '[' { |
|
181 ref, b, err = p.parseTable(b) |
|
182 } else { |
|
183 ref, b, err = p.parseKeyval(b) |
|
184 } |
|
185 |
|
186 if err != nil { |
|
187 return ref, nil, err |
|
188 } |
|
189 |
|
190 b = p.parseWhitespace(b) |
|
191 |
|
192 if len(b) > 0 && b[0] == '#' { |
|
193 _, rest, err := scanComment(b) |
|
194 return ref, rest, err |
|
195 } |
|
196 |
|
197 return ref, b, nil |
|
198 } |
|
199 |
|
200 func (p *Parser) parseTable(b []byte) (reference, []byte, error) { |
|
201 // table = std-table / array-table |
|
202 if len(b) > 1 && b[1] == '[' { |
|
203 return p.parseArrayTable(b) |
|
204 } |
|
205 |
|
206 return p.parseStdTable(b) |
|
207 } |
|
208 |
|
209 func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) { |
|
210 // array-table = array-table-open key array-table-close |
|
211 // array-table-open = %x5B.5B ws ; [[ Double left square bracket |
|
212 // array-table-close = ws %x5D.5D ; ]] Double right square bracket |
|
213 ref := p.builder.Push(Node{ |
|
214 Kind: ArrayTable, |
|
215 }) |
|
216 |
|
217 b = b[2:] |
|
218 b = p.parseWhitespace(b) |
|
219 |
|
220 k, b, err := p.parseKey(b) |
|
221 if err != nil { |
|
222 return ref, nil, err |
|
223 } |
|
224 |
|
225 p.builder.AttachChild(ref, k) |
|
226 b = p.parseWhitespace(b) |
|
227 |
|
228 b, err = expect(']', b) |
|
229 if err != nil { |
|
230 return ref, nil, err |
|
231 } |
|
232 |
|
233 b, err = expect(']', b) |
|
234 |
|
235 return ref, b, err |
|
236 } |
|
237 |
|
238 func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { |
|
239 // std-table = std-table-open key std-table-close |
|
240 // std-table-open = %x5B ws ; [ Left square bracket |
|
241 // std-table-close = ws %x5D ; ] Right square bracket |
|
242 ref := p.builder.Push(Node{ |
|
243 Kind: Table, |
|
244 }) |
|
245 |
|
246 b = b[1:] |
|
247 b = p.parseWhitespace(b) |
|
248 |
|
249 key, b, err := p.parseKey(b) |
|
250 if err != nil { |
|
251 return ref, nil, err |
|
252 } |
|
253 |
|
254 p.builder.AttachChild(ref, key) |
|
255 |
|
256 b = p.parseWhitespace(b) |
|
257 |
|
258 b, err = expect(']', b) |
|
259 |
|
260 return ref, b, err |
|
261 } |
|
262 |
|
263 func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { |
|
264 // keyval = key keyval-sep val |
|
265 ref := p.builder.Push(Node{ |
|
266 Kind: KeyValue, |
|
267 }) |
|
268 |
|
269 key, b, err := p.parseKey(b) |
|
270 if err != nil { |
|
271 return invalidReference, nil, err |
|
272 } |
|
273 |
|
274 // keyval-sep = ws %x3D ws ; = |
|
275 |
|
276 b = p.parseWhitespace(b) |
|
277 |
|
278 if len(b) == 0 { |
|
279 return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there") |
|
280 } |
|
281 |
|
282 b, err = expect('=', b) |
|
283 if err != nil { |
|
284 return invalidReference, nil, err |
|
285 } |
|
286 |
|
287 b = p.parseWhitespace(b) |
|
288 |
|
289 valRef, b, err := p.parseVal(b) |
|
290 if err != nil { |
|
291 return ref, b, err |
|
292 } |
|
293 |
|
294 p.builder.Chain(valRef, key) |
|
295 p.builder.AttachChild(ref, valRef) |
|
296 |
|
297 return ref, b, err |
|
298 } |
|
299 |
|
300 //nolint:cyclop,funlen |
|
301 func (p *Parser) parseVal(b []byte) (reference, []byte, error) { |
|
302 // val = string / boolean / array / inline-table / date-time / float / integer |
|
303 ref := invalidReference |
|
304 |
|
305 if len(b) == 0 { |
|
306 return ref, nil, NewParserError(b, "expected value, not eof") |
|
307 } |
|
308 |
|
309 var err error |
|
310 c := b[0] |
|
311 |
|
312 switch c { |
|
313 case '"': |
|
314 var raw []byte |
|
315 var v []byte |
|
316 if scanFollowsMultilineBasicStringDelimiter(b) { |
|
317 raw, v, b, err = p.parseMultilineBasicString(b) |
|
318 } else { |
|
319 raw, v, b, err = p.parseBasicString(b) |
|
320 } |
|
321 |
|
322 if err == nil { |
|
323 ref = p.builder.Push(Node{ |
|
324 Kind: String, |
|
325 Raw: p.Range(raw), |
|
326 Data: v, |
|
327 }) |
|
328 } |
|
329 |
|
330 return ref, b, err |
|
331 case '\'': |
|
332 var raw []byte |
|
333 var v []byte |
|
334 if scanFollowsMultilineLiteralStringDelimiter(b) { |
|
335 raw, v, b, err = p.parseMultilineLiteralString(b) |
|
336 } else { |
|
337 raw, v, b, err = p.parseLiteralString(b) |
|
338 } |
|
339 |
|
340 if err == nil { |
|
341 ref = p.builder.Push(Node{ |
|
342 Kind: String, |
|
343 Raw: p.Range(raw), |
|
344 Data: v, |
|
345 }) |
|
346 } |
|
347 |
|
348 return ref, b, err |
|
349 case 't': |
|
350 if !scanFollowsTrue(b) { |
|
351 return ref, nil, NewParserError(atmost(b, 4), "expected 'true'") |
|
352 } |
|
353 |
|
354 ref = p.builder.Push(Node{ |
|
355 Kind: Bool, |
|
356 Data: b[:4], |
|
357 }) |
|
358 |
|
359 return ref, b[4:], nil |
|
360 case 'f': |
|
361 if !scanFollowsFalse(b) { |
|
362 return ref, nil, NewParserError(atmost(b, 5), "expected 'false'") |
|
363 } |
|
364 |
|
365 ref = p.builder.Push(Node{ |
|
366 Kind: Bool, |
|
367 Data: b[:5], |
|
368 }) |
|
369 |
|
370 return ref, b[5:], nil |
|
371 case '[': |
|
372 return p.parseValArray(b) |
|
373 case '{': |
|
374 return p.parseInlineTable(b) |
|
375 default: |
|
376 return p.parseIntOrFloatOrDateTime(b) |
|
377 } |
|
378 } |
|
379 |
|
380 func atmost(b []byte, n int) []byte { |
|
381 if n >= len(b) { |
|
382 return b |
|
383 } |
|
384 |
|
385 return b[:n] |
|
386 } |
|
387 |
|
388 func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { |
|
389 v, rest, err := scanLiteralString(b) |
|
390 if err != nil { |
|
391 return nil, nil, nil, err |
|
392 } |
|
393 |
|
394 return v, v[1 : len(v)-1], rest, nil |
|
395 } |
|
396 |
|
397 func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { |
|
398 // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close |
|
399 // inline-table-open = %x7B ws ; { |
|
400 // inline-table-close = ws %x7D ; } |
|
401 // inline-table-sep = ws %x2C ws ; , Comma |
|
402 // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] |
|
403 parent := p.builder.Push(Node{ |
|
404 Kind: InlineTable, |
|
405 }) |
|
406 |
|
407 first := true |
|
408 |
|
409 var child reference |
|
410 |
|
411 b = b[1:] |
|
412 |
|
413 var err error |
|
414 |
|
415 for len(b) > 0 { |
|
416 previousB := b |
|
417 b = p.parseWhitespace(b) |
|
418 |
|
419 if len(b) == 0 { |
|
420 return parent, nil, NewParserError(previousB[:1], "inline table is incomplete") |
|
421 } |
|
422 |
|
423 if b[0] == '}' { |
|
424 break |
|
425 } |
|
426 |
|
427 if !first { |
|
428 b, err = expect(',', b) |
|
429 if err != nil { |
|
430 return parent, nil, err |
|
431 } |
|
432 b = p.parseWhitespace(b) |
|
433 } |
|
434 |
|
435 var kv reference |
|
436 |
|
437 kv, b, err = p.parseKeyval(b) |
|
438 if err != nil { |
|
439 return parent, nil, err |
|
440 } |
|
441 |
|
442 if first { |
|
443 p.builder.AttachChild(parent, kv) |
|
444 } else { |
|
445 p.builder.Chain(child, kv) |
|
446 } |
|
447 child = kv |
|
448 |
|
449 first = false |
|
450 } |
|
451 |
|
452 rest, err := expect('}', b) |
|
453 |
|
454 return parent, rest, err |
|
455 } |
|
456 |
|
457 //nolint:funlen,cyclop |
|
458 func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { |
|
459 // array = array-open [ array-values ] ws-comment-newline array-close |
|
460 // array-open = %x5B ; [ |
|
461 // array-close = %x5D ; ] |
|
462 // array-values = ws-comment-newline val ws-comment-newline array-sep array-values |
|
463 // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] |
|
464 // array-sep = %x2C ; , Comma |
|
465 // ws-comment-newline = *( wschar / [ comment ] newline ) |
|
466 arrayStart := b |
|
467 b = b[1:] |
|
468 |
|
469 parent := p.builder.Push(Node{ |
|
470 Kind: Array, |
|
471 }) |
|
472 |
|
473 first := true |
|
474 |
|
475 var lastChild reference |
|
476 |
|
477 var err error |
|
478 for len(b) > 0 { |
|
479 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
480 if err != nil { |
|
481 return parent, nil, err |
|
482 } |
|
483 |
|
484 if len(b) == 0 { |
|
485 return parent, nil, NewParserError(arrayStart[:1], "array is incomplete") |
|
486 } |
|
487 |
|
488 if b[0] == ']' { |
|
489 break |
|
490 } |
|
491 |
|
492 if b[0] == ',' { |
|
493 if first { |
|
494 return parent, nil, NewParserError(b[0:1], "array cannot start with comma") |
|
495 } |
|
496 b = b[1:] |
|
497 |
|
498 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
499 if err != nil { |
|
500 return parent, nil, err |
|
501 } |
|
502 } else if !first { |
|
503 return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas") |
|
504 } |
|
505 |
|
506 // TOML allows trailing commas in arrays. |
|
507 if len(b) > 0 && b[0] == ']' { |
|
508 break |
|
509 } |
|
510 |
|
511 var valueRef reference |
|
512 valueRef, b, err = p.parseVal(b) |
|
513 if err != nil { |
|
514 return parent, nil, err |
|
515 } |
|
516 |
|
517 if first { |
|
518 p.builder.AttachChild(parent, valueRef) |
|
519 } else { |
|
520 p.builder.Chain(lastChild, valueRef) |
|
521 } |
|
522 lastChild = valueRef |
|
523 |
|
524 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
525 if err != nil { |
|
526 return parent, nil, err |
|
527 } |
|
528 first = false |
|
529 } |
|
530 |
|
531 rest, err := expect(']', b) |
|
532 |
|
533 return parent, rest, err |
|
534 } |
|
535 |
|
536 func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) { |
|
537 for len(b) > 0 { |
|
538 var err error |
|
539 b = p.parseWhitespace(b) |
|
540 |
|
541 if len(b) > 0 && b[0] == '#' { |
|
542 _, b, err = scanComment(b) |
|
543 if err != nil { |
|
544 return nil, err |
|
545 } |
|
546 } |
|
547 |
|
548 if len(b) == 0 { |
|
549 break |
|
550 } |
|
551 |
|
552 if b[0] == '\n' || b[0] == '\r' { |
|
553 b, err = p.parseNewline(b) |
|
554 if err != nil { |
|
555 return nil, err |
|
556 } |
|
557 } else { |
|
558 break |
|
559 } |
|
560 } |
|
561 |
|
562 return b, nil |
|
563 } |
|
564 |
|
565 func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { |
|
566 token, rest, err := scanMultilineLiteralString(b) |
|
567 if err != nil { |
|
568 return nil, nil, nil, err |
|
569 } |
|
570 |
|
571 i := 3 |
|
572 |
|
573 // skip the immediate new line |
|
574 if token[i] == '\n' { |
|
575 i++ |
|
576 } else if token[i] == '\r' && token[i+1] == '\n' { |
|
577 i += 2 |
|
578 } |
|
579 |
|
580 return token, token[i : len(token)-3], rest, err |
|
581 } |
|
582 |
|
583 //nolint:funlen,gocognit,cyclop |
|
584 func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { |
|
585 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body |
|
586 // ml-basic-string-delim |
|
587 // ml-basic-string-delim = 3quotation-mark |
|
588 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] |
|
589 // |
|
590 // mlb-content = mlb-char / newline / mlb-escaped-nl |
|
591 // mlb-char = mlb-unescaped / escaped |
|
592 // mlb-quotes = 1*2quotation-mark |
|
593 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
594 // mlb-escaped-nl = escape ws newline *( wschar / newline ) |
|
595 token, escaped, rest, err := scanMultilineBasicString(b) |
|
596 if err != nil { |
|
597 return nil, nil, nil, err |
|
598 } |
|
599 |
|
600 i := 3 |
|
601 |
|
602 // skip the immediate new line |
|
603 if token[i] == '\n' { |
|
604 i++ |
|
605 } else if token[i] == '\r' && token[i+1] == '\n' { |
|
606 i += 2 |
|
607 } |
|
608 |
|
609 // fast path |
|
610 startIdx := i |
|
611 endIdx := len(token) - len(`"""`) |
|
612 |
|
613 if !escaped { |
|
614 str := token[startIdx:endIdx] |
|
615 verr := characters.Utf8TomlValidAlreadyEscaped(str) |
|
616 if verr.Zero() { |
|
617 return token, str, rest, nil |
|
618 } |
|
619 return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") |
|
620 } |
|
621 |
|
622 var builder bytes.Buffer |
|
623 |
|
624 // The scanner ensures that the token starts and ends with quotes and that |
|
625 // escapes are balanced. |
|
626 for i < len(token)-3 { |
|
627 c := token[i] |
|
628 |
|
629 //nolint:nestif |
|
630 if c == '\\' { |
|
631 // When the last non-whitespace character on a line is an unescaped \, |
|
632 // it will be trimmed along with all whitespace (including newlines) up |
|
633 // to the next non-whitespace character or closing delimiter. |
|
634 |
|
635 isLastNonWhitespaceOnLine := false |
|
636 j := 1 |
|
637 findEOLLoop: |
|
638 for ; j < len(token)-3-i; j++ { |
|
639 switch token[i+j] { |
|
640 case ' ', '\t': |
|
641 continue |
|
642 case '\r': |
|
643 if token[i+j+1] == '\n' { |
|
644 continue |
|
645 } |
|
646 case '\n': |
|
647 isLastNonWhitespaceOnLine = true |
|
648 } |
|
649 break findEOLLoop |
|
650 } |
|
651 if isLastNonWhitespaceOnLine { |
|
652 i += j |
|
653 for ; i < len(token)-3; i++ { |
|
654 c := token[i] |
|
655 if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') { |
|
656 i-- |
|
657 break |
|
658 } |
|
659 } |
|
660 i++ |
|
661 continue |
|
662 } |
|
663 |
|
664 // handle escaping |
|
665 i++ |
|
666 c = token[i] |
|
667 |
|
668 switch c { |
|
669 case '"', '\\': |
|
670 builder.WriteByte(c) |
|
671 case 'b': |
|
672 builder.WriteByte('\b') |
|
673 case 'f': |
|
674 builder.WriteByte('\f') |
|
675 case 'n': |
|
676 builder.WriteByte('\n') |
|
677 case 'r': |
|
678 builder.WriteByte('\r') |
|
679 case 't': |
|
680 builder.WriteByte('\t') |
|
681 case 'e': |
|
682 builder.WriteByte(0x1B) |
|
683 case 'u': |
|
684 x, err := hexToRune(atmost(token[i+1:], 4), 4) |
|
685 if err != nil { |
|
686 return nil, nil, nil, err |
|
687 } |
|
688 builder.WriteRune(x) |
|
689 i += 4 |
|
690 case 'U': |
|
691 x, err := hexToRune(atmost(token[i+1:], 8), 8) |
|
692 if err != nil { |
|
693 return nil, nil, nil, err |
|
694 } |
|
695 |
|
696 builder.WriteRune(x) |
|
697 i += 8 |
|
698 default: |
|
699 return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) |
|
700 } |
|
701 i++ |
|
702 } else { |
|
703 size := characters.Utf8ValidNext(token[i:]) |
|
704 if size == 0 { |
|
705 return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) |
|
706 } |
|
707 builder.Write(token[i : i+size]) |
|
708 i += size |
|
709 } |
|
710 } |
|
711 |
|
712 return token, builder.Bytes(), rest, nil |
|
713 } |
|
714 |
|
715 func (p *Parser) parseKey(b []byte) (reference, []byte, error) { |
|
716 // key = simple-key / dotted-key |
|
717 // simple-key = quoted-key / unquoted-key |
|
718 // |
|
719 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
720 // quoted-key = basic-string / literal-string |
|
721 // dotted-key = simple-key 1*( dot-sep simple-key ) |
|
722 // |
|
723 // dot-sep = ws %x2E ws ; . Period |
|
724 raw, key, b, err := p.parseSimpleKey(b) |
|
725 if err != nil { |
|
726 return invalidReference, nil, err |
|
727 } |
|
728 |
|
729 ref := p.builder.Push(Node{ |
|
730 Kind: Key, |
|
731 Raw: p.Range(raw), |
|
732 Data: key, |
|
733 }) |
|
734 |
|
735 for { |
|
736 b = p.parseWhitespace(b) |
|
737 if len(b) > 0 && b[0] == '.' { |
|
738 b = p.parseWhitespace(b[1:]) |
|
739 |
|
740 raw, key, b, err = p.parseSimpleKey(b) |
|
741 if err != nil { |
|
742 return ref, nil, err |
|
743 } |
|
744 |
|
745 p.builder.PushAndChain(Node{ |
|
746 Kind: Key, |
|
747 Raw: p.Range(raw), |
|
748 Data: key, |
|
749 }) |
|
750 } else { |
|
751 break |
|
752 } |
|
753 } |
|
754 |
|
755 return ref, b, nil |
|
756 } |
|
757 |
|
758 func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { |
|
759 if len(b) == 0 { |
|
760 return nil, nil, nil, NewParserError(b, "expected key but found none") |
|
761 } |
|
762 |
|
763 // simple-key = quoted-key / unquoted-key |
|
764 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
765 // quoted-key = basic-string / literal-string |
|
766 switch { |
|
767 case b[0] == '\'': |
|
768 return p.parseLiteralString(b) |
|
769 case b[0] == '"': |
|
770 return p.parseBasicString(b) |
|
771 case isUnquotedKeyChar(b[0]): |
|
772 key, rest = scanUnquotedKey(b) |
|
773 return key, key, rest, nil |
|
774 default: |
|
775 return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0]) |
|
776 } |
|
777 } |
|
778 |
|
779 //nolint:funlen,cyclop |
|
780 func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { |
|
781 // basic-string = quotation-mark *basic-char quotation-mark |
|
782 // quotation-mark = %x22 ; " |
|
783 // basic-char = basic-unescaped / escaped |
|
784 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
785 // escaped = escape escape-seq-char |
|
786 // escape-seq-char = %x22 ; " quotation mark U+0022 |
|
787 // escape-seq-char =/ %x5C ; \ reverse solidus U+005C |
|
788 // escape-seq-char =/ %x62 ; b backspace U+0008 |
|
789 // escape-seq-char =/ %x66 ; f form feed U+000C |
|
790 // escape-seq-char =/ %x6E ; n line feed U+000A |
|
791 // escape-seq-char =/ %x72 ; r carriage return U+000D |
|
792 // escape-seq-char =/ %x74 ; t tab U+0009 |
|
793 // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX |
|
794 // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX |
|
795 token, escaped, rest, err := scanBasicString(b) |
|
796 if err != nil { |
|
797 return nil, nil, nil, err |
|
798 } |
|
799 |
|
800 startIdx := len(`"`) |
|
801 endIdx := len(token) - len(`"`) |
|
802 |
|
803 // Fast path. If there is no escape sequence, the string should just be |
|
804 // an UTF-8 encoded string, which is the same as Go. In that case, |
|
805 // validate the string and return a direct reference to the buffer. |
|
806 if !escaped { |
|
807 str := token[startIdx:endIdx] |
|
808 verr := characters.Utf8TomlValidAlreadyEscaped(str) |
|
809 if verr.Zero() { |
|
810 return token, str, rest, nil |
|
811 } |
|
812 return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") |
|
813 } |
|
814 |
|
815 i := startIdx |
|
816 |
|
817 var builder bytes.Buffer |
|
818 |
|
819 // The scanner ensures that the token starts and ends with quotes and that |
|
820 // escapes are balanced. |
|
821 for i < len(token)-1 { |
|
822 c := token[i] |
|
823 if c == '\\' { |
|
824 i++ |
|
825 c = token[i] |
|
826 |
|
827 switch c { |
|
828 case '"', '\\': |
|
829 builder.WriteByte(c) |
|
830 case 'b': |
|
831 builder.WriteByte('\b') |
|
832 case 'f': |
|
833 builder.WriteByte('\f') |
|
834 case 'n': |
|
835 builder.WriteByte('\n') |
|
836 case 'r': |
|
837 builder.WriteByte('\r') |
|
838 case 't': |
|
839 builder.WriteByte('\t') |
|
840 case 'e': |
|
841 builder.WriteByte(0x1B) |
|
842 case 'u': |
|
843 x, err := hexToRune(token[i+1:len(token)-1], 4) |
|
844 if err != nil { |
|
845 return nil, nil, nil, err |
|
846 } |
|
847 |
|
848 builder.WriteRune(x) |
|
849 i += 4 |
|
850 case 'U': |
|
851 x, err := hexToRune(token[i+1:len(token)-1], 8) |
|
852 if err != nil { |
|
853 return nil, nil, nil, err |
|
854 } |
|
855 |
|
856 builder.WriteRune(x) |
|
857 i += 8 |
|
858 default: |
|
859 return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) |
|
860 } |
|
861 i++ |
|
862 } else { |
|
863 size := characters.Utf8ValidNext(token[i:]) |
|
864 if size == 0 { |
|
865 return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) |
|
866 } |
|
867 builder.Write(token[i : i+size]) |
|
868 i += size |
|
869 } |
|
870 } |
|
871 |
|
872 return token, builder.Bytes(), rest, nil |
|
873 } |
|
874 |
|
875 func hexToRune(b []byte, length int) (rune, error) { |
|
876 if len(b) < length { |
|
877 return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b)) |
|
878 } |
|
879 b = b[:length] |
|
880 |
|
881 var r uint32 |
|
882 for i, c := range b { |
|
883 d := uint32(0) |
|
884 switch { |
|
885 case '0' <= c && c <= '9': |
|
886 d = uint32(c - '0') |
|
887 case 'a' <= c && c <= 'f': |
|
888 d = uint32(c - 'a' + 10) |
|
889 case 'A' <= c && c <= 'F': |
|
890 d = uint32(c - 'A' + 10) |
|
891 default: |
|
892 return -1, NewParserError(b[i:i+1], "non-hex character") |
|
893 } |
|
894 r = r*16 + d |
|
895 } |
|
896 |
|
897 if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 { |
|
898 return -1, NewParserError(b, "escape sequence is invalid Unicode code point") |
|
899 } |
|
900 |
|
901 return rune(r), nil |
|
902 } |
|
903 |
|
904 func (p *Parser) parseWhitespace(b []byte) []byte { |
|
905 // ws = *wschar |
|
906 // wschar = %x20 ; Space |
|
907 // wschar =/ %x09 ; Horizontal tab |
|
908 _, rest := scanWhitespace(b) |
|
909 |
|
910 return rest |
|
911 } |
|
912 |
|
913 //nolint:cyclop |
|
914 func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) { |
|
915 switch b[0] { |
|
916 case 'i': |
|
917 if !scanFollowsInf(b) { |
|
918 return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'") |
|
919 } |
|
920 |
|
921 return p.builder.Push(Node{ |
|
922 Kind: Float, |
|
923 Data: b[:3], |
|
924 }), b[3:], nil |
|
925 case 'n': |
|
926 if !scanFollowsNan(b) { |
|
927 return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'") |
|
928 } |
|
929 |
|
930 return p.builder.Push(Node{ |
|
931 Kind: Float, |
|
932 Data: b[:3], |
|
933 }), b[3:], nil |
|
934 case '+', '-': |
|
935 return p.scanIntOrFloat(b) |
|
936 } |
|
937 |
|
938 if len(b) < 3 { |
|
939 return p.scanIntOrFloat(b) |
|
940 } |
|
941 |
|
942 s := 5 |
|
943 if len(b) < s { |
|
944 s = len(b) |
|
945 } |
|
946 |
|
947 for idx, c := range b[:s] { |
|
948 if isDigit(c) { |
|
949 continue |
|
950 } |
|
951 |
|
952 if idx == 2 && c == ':' || (idx == 4 && c == '-') { |
|
953 return p.scanDateTime(b) |
|
954 } |
|
955 |
|
956 break |
|
957 } |
|
958 |
|
959 return p.scanIntOrFloat(b) |
|
960 } |
|
961 |
|
962 func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) { |
|
963 // scans for contiguous characters in [0-9T:Z.+-], and up to one space if |
|
964 // followed by a digit. |
|
965 hasDate := false |
|
966 hasTime := false |
|
967 hasTz := false |
|
968 seenSpace := false |
|
969 |
|
970 i := 0 |
|
971 byteLoop: |
|
972 for ; i < len(b); i++ { |
|
973 c := b[i] |
|
974 |
|
975 switch { |
|
976 case isDigit(c): |
|
977 case c == '-': |
|
978 hasDate = true |
|
979 const minOffsetOfTz = 8 |
|
980 if i >= minOffsetOfTz { |
|
981 hasTz = true |
|
982 } |
|
983 case c == 'T' || c == 't' || c == ':' || c == '.': |
|
984 hasTime = true |
|
985 case c == '+' || c == '-' || c == 'Z' || c == 'z': |
|
986 hasTz = true |
|
987 case c == ' ': |
|
988 if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) { |
|
989 i += 2 |
|
990 // Avoid reaching past the end of the document in case the time |
|
991 // is malformed. See TestIssue585. |
|
992 if i >= len(b) { |
|
993 i-- |
|
994 } |
|
995 seenSpace = true |
|
996 hasTime = true |
|
997 } else { |
|
998 break byteLoop |
|
999 } |
|
1000 default: |
|
1001 break byteLoop |
|
1002 } |
|
1003 } |
|
1004 |
|
1005 var kind Kind |
|
1006 |
|
1007 if hasTime { |
|
1008 if hasDate { |
|
1009 if hasTz { |
|
1010 kind = DateTime |
|
1011 } else { |
|
1012 kind = LocalDateTime |
|
1013 } |
|
1014 } else { |
|
1015 kind = LocalTime |
|
1016 } |
|
1017 } else { |
|
1018 kind = LocalDate |
|
1019 } |
|
1020 |
|
1021 return p.builder.Push(Node{ |
|
1022 Kind: kind, |
|
1023 Data: b[:i], |
|
1024 }), b[i:], nil |
|
1025 } |
|
1026 |
|
1027 //nolint:funlen,gocognit,cyclop |
|
1028 func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { |
|
1029 i := 0 |
|
1030 |
|
1031 if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' { |
|
1032 var isValidRune validRuneFn |
|
1033 |
|
1034 switch b[1] { |
|
1035 case 'x': |
|
1036 isValidRune = isValidHexRune |
|
1037 case 'o': |
|
1038 isValidRune = isValidOctalRune |
|
1039 case 'b': |
|
1040 isValidRune = isValidBinaryRune |
|
1041 default: |
|
1042 i++ |
|
1043 } |
|
1044 |
|
1045 if isValidRune != nil { |
|
1046 i += 2 |
|
1047 for ; i < len(b); i++ { |
|
1048 if !isValidRune(b[i]) { |
|
1049 break |
|
1050 } |
|
1051 } |
|
1052 } |
|
1053 |
|
1054 return p.builder.Push(Node{ |
|
1055 Kind: Integer, |
|
1056 Data: b[:i], |
|
1057 }), b[i:], nil |
|
1058 } |
|
1059 |
|
1060 isFloat := false |
|
1061 |
|
1062 for ; i < len(b); i++ { |
|
1063 c := b[i] |
|
1064 |
|
1065 if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' { |
|
1066 continue |
|
1067 } |
|
1068 |
|
1069 if c == '.' || c == 'e' || c == 'E' { |
|
1070 isFloat = true |
|
1071 |
|
1072 continue |
|
1073 } |
|
1074 |
|
1075 if c == 'i' { |
|
1076 if scanFollowsInf(b[i:]) { |
|
1077 return p.builder.Push(Node{ |
|
1078 Kind: Float, |
|
1079 Data: b[:i+3], |
|
1080 }), b[i+3:], nil |
|
1081 } |
|
1082 |
|
1083 return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number") |
|
1084 } |
|
1085 |
|
1086 if c == 'n' { |
|
1087 if scanFollowsNan(b[i:]) { |
|
1088 return p.builder.Push(Node{ |
|
1089 Kind: Float, |
|
1090 Data: b[:i+3], |
|
1091 }), b[i+3:], nil |
|
1092 } |
|
1093 |
|
1094 return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number") |
|
1095 } |
|
1096 |
|
1097 break |
|
1098 } |
|
1099 |
|
1100 if i == 0 { |
|
1101 return invalidReference, b, NewParserError(b, "incomplete number") |
|
1102 } |
|
1103 |
|
1104 kind := Integer |
|
1105 |
|
1106 if isFloat { |
|
1107 kind = Float |
|
1108 } |
|
1109 |
|
1110 return p.builder.Push(Node{ |
|
1111 Kind: kind, |
|
1112 Data: b[:i], |
|
1113 }), b[i:], nil |
|
1114 } |
|
1115 |
|
1116 func isDigit(r byte) bool { |
|
1117 return r >= '0' && r <= '9' |
|
1118 } |
|
1119 |
|
1120 type validRuneFn func(r byte) bool |
|
1121 |
|
1122 func isValidHexRune(r byte) bool { |
|
1123 return r >= 'a' && r <= 'f' || |
|
1124 r >= 'A' && r <= 'F' || |
|
1125 r >= '0' && r <= '9' || |
|
1126 r == '_' |
|
1127 } |
|
1128 |
|
1129 func isValidOctalRune(r byte) bool { |
|
1130 return r >= '0' && r <= '7' || r == '_' |
|
1131 } |
|
1132 |
|
1133 func isValidBinaryRune(r byte) bool { |
|
1134 return r == '0' || r == '1' || r == '_' |
|
1135 } |
|
1136 |
|
1137 func expect(x byte, b []byte) ([]byte, error) { |
|
1138 if len(b) == 0 { |
|
1139 return nil, NewParserError(b, "expected character %c but the document ended here", x) |
|
1140 } |
|
1141 |
|
1142 if b[0] != x { |
|
1143 return nil, NewParserError(b[0:1], "expected character %c", x) |
|
1144 } |
|
1145 |
|
1146 return b[1:], nil |
|
1147 } |