1 package toml |
|
2 |
|
3 import ( |
|
4 "bytes" |
|
5 "unicode" |
|
6 |
|
7 "github.com/pelletier/go-toml/v2/internal/ast" |
|
8 "github.com/pelletier/go-toml/v2/internal/danger" |
|
9 ) |
|
10 |
|
11 type parser struct { |
|
12 builder ast.Builder |
|
13 ref ast.Reference |
|
14 data []byte |
|
15 left []byte |
|
16 err error |
|
17 first bool |
|
18 } |
|
19 |
|
20 func (p *parser) Range(b []byte) ast.Range { |
|
21 return ast.Range{ |
|
22 Offset: uint32(danger.SubsliceOffset(p.data, b)), |
|
23 Length: uint32(len(b)), |
|
24 } |
|
25 } |
|
26 |
|
27 func (p *parser) Raw(raw ast.Range) []byte { |
|
28 return p.data[raw.Offset : raw.Offset+raw.Length] |
|
29 } |
|
30 |
|
31 func (p *parser) Reset(b []byte) { |
|
32 p.builder.Reset() |
|
33 p.ref = ast.InvalidReference |
|
34 p.data = b |
|
35 p.left = b |
|
36 p.err = nil |
|
37 p.first = true |
|
38 } |
|
39 |
|
40 //nolint:cyclop |
|
41 func (p *parser) NextExpression() bool { |
|
42 if len(p.left) == 0 || p.err != nil { |
|
43 return false |
|
44 } |
|
45 |
|
46 p.builder.Reset() |
|
47 p.ref = ast.InvalidReference |
|
48 |
|
49 for { |
|
50 if len(p.left) == 0 || p.err != nil { |
|
51 return false |
|
52 } |
|
53 |
|
54 if !p.first { |
|
55 p.left, p.err = p.parseNewline(p.left) |
|
56 } |
|
57 |
|
58 if len(p.left) == 0 || p.err != nil { |
|
59 return false |
|
60 } |
|
61 |
|
62 p.ref, p.left, p.err = p.parseExpression(p.left) |
|
63 |
|
64 if p.err != nil { |
|
65 return false |
|
66 } |
|
67 |
|
68 p.first = false |
|
69 |
|
70 if p.ref.Valid() { |
|
71 return true |
|
72 } |
|
73 } |
|
74 } |
|
75 |
|
76 func (p *parser) Expression() *ast.Node { |
|
77 return p.builder.NodeAt(p.ref) |
|
78 } |
|
79 |
|
80 func (p *parser) Error() error { |
|
81 return p.err |
|
82 } |
|
83 |
|
84 func (p *parser) parseNewline(b []byte) ([]byte, error) { |
|
85 if b[0] == '\n' { |
|
86 return b[1:], nil |
|
87 } |
|
88 |
|
89 if b[0] == '\r' { |
|
90 _, rest, err := scanWindowsNewline(b) |
|
91 return rest, err |
|
92 } |
|
93 |
|
94 return nil, newDecodeError(b[0:1], "expected newline but got %#U", b[0]) |
|
95 } |
|
96 |
|
97 func (p *parser) parseExpression(b []byte) (ast.Reference, []byte, error) { |
|
98 // expression = ws [ comment ] |
|
99 // expression =/ ws keyval ws [ comment ] |
|
100 // expression =/ ws table ws [ comment ] |
|
101 ref := ast.InvalidReference |
|
102 |
|
103 b = p.parseWhitespace(b) |
|
104 |
|
105 if len(b) == 0 { |
|
106 return ref, b, nil |
|
107 } |
|
108 |
|
109 if b[0] == '#' { |
|
110 _, rest, err := scanComment(b) |
|
111 return ref, rest, err |
|
112 } |
|
113 |
|
114 if b[0] == '\n' || b[0] == '\r' { |
|
115 return ref, b, nil |
|
116 } |
|
117 |
|
118 var err error |
|
119 if b[0] == '[' { |
|
120 ref, b, err = p.parseTable(b) |
|
121 } else { |
|
122 ref, b, err = p.parseKeyval(b) |
|
123 } |
|
124 |
|
125 if err != nil { |
|
126 return ref, nil, err |
|
127 } |
|
128 |
|
129 b = p.parseWhitespace(b) |
|
130 |
|
131 if len(b) > 0 && b[0] == '#' { |
|
132 _, rest, err := scanComment(b) |
|
133 return ref, rest, err |
|
134 } |
|
135 |
|
136 return ref, b, nil |
|
137 } |
|
138 |
|
139 func (p *parser) parseTable(b []byte) (ast.Reference, []byte, error) { |
|
140 // table = std-table / array-table |
|
141 if len(b) > 1 && b[1] == '[' { |
|
142 return p.parseArrayTable(b) |
|
143 } |
|
144 |
|
145 return p.parseStdTable(b) |
|
146 } |
|
147 |
|
148 func (p *parser) parseArrayTable(b []byte) (ast.Reference, []byte, error) { |
|
149 // array-table = array-table-open key array-table-close |
|
150 // array-table-open = %x5B.5B ws ; [[ Double left square bracket |
|
151 // array-table-close = ws %x5D.5D ; ]] Double right square bracket |
|
152 ref := p.builder.Push(ast.Node{ |
|
153 Kind: ast.ArrayTable, |
|
154 }) |
|
155 |
|
156 b = b[2:] |
|
157 b = p.parseWhitespace(b) |
|
158 |
|
159 k, b, err := p.parseKey(b) |
|
160 if err != nil { |
|
161 return ref, nil, err |
|
162 } |
|
163 |
|
164 p.builder.AttachChild(ref, k) |
|
165 b = p.parseWhitespace(b) |
|
166 |
|
167 b, err = expect(']', b) |
|
168 if err != nil { |
|
169 return ref, nil, err |
|
170 } |
|
171 |
|
172 b, err = expect(']', b) |
|
173 |
|
174 return ref, b, err |
|
175 } |
|
176 |
|
177 func (p *parser) parseStdTable(b []byte) (ast.Reference, []byte, error) { |
|
178 // std-table = std-table-open key std-table-close |
|
179 // std-table-open = %x5B ws ; [ Left square bracket |
|
180 // std-table-close = ws %x5D ; ] Right square bracket |
|
181 ref := p.builder.Push(ast.Node{ |
|
182 Kind: ast.Table, |
|
183 }) |
|
184 |
|
185 b = b[1:] |
|
186 b = p.parseWhitespace(b) |
|
187 |
|
188 key, b, err := p.parseKey(b) |
|
189 if err != nil { |
|
190 return ref, nil, err |
|
191 } |
|
192 |
|
193 p.builder.AttachChild(ref, key) |
|
194 |
|
195 b = p.parseWhitespace(b) |
|
196 |
|
197 b, err = expect(']', b) |
|
198 |
|
199 return ref, b, err |
|
200 } |
|
201 |
|
202 func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) { |
|
203 // keyval = key keyval-sep val |
|
204 ref := p.builder.Push(ast.Node{ |
|
205 Kind: ast.KeyValue, |
|
206 }) |
|
207 |
|
208 key, b, err := p.parseKey(b) |
|
209 if err != nil { |
|
210 return ast.InvalidReference, nil, err |
|
211 } |
|
212 |
|
213 // keyval-sep = ws %x3D ws ; = |
|
214 |
|
215 b = p.parseWhitespace(b) |
|
216 |
|
217 if len(b) == 0 { |
|
218 return ast.InvalidReference, nil, newDecodeError(b, "expected = after a key, but the document ends there") |
|
219 } |
|
220 |
|
221 b, err = expect('=', b) |
|
222 if err != nil { |
|
223 return ast.InvalidReference, nil, err |
|
224 } |
|
225 |
|
226 b = p.parseWhitespace(b) |
|
227 |
|
228 valRef, b, err := p.parseVal(b) |
|
229 if err != nil { |
|
230 return ref, b, err |
|
231 } |
|
232 |
|
233 p.builder.Chain(valRef, key) |
|
234 p.builder.AttachChild(ref, valRef) |
|
235 |
|
236 return ref, b, err |
|
237 } |
|
238 |
|
239 //nolint:cyclop,funlen |
|
240 func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) { |
|
241 // val = string / boolean / array / inline-table / date-time / float / integer |
|
242 ref := ast.InvalidReference |
|
243 |
|
244 if len(b) == 0 { |
|
245 return ref, nil, newDecodeError(b, "expected value, not eof") |
|
246 } |
|
247 |
|
248 var err error |
|
249 c := b[0] |
|
250 |
|
251 switch c { |
|
252 case '"': |
|
253 var raw []byte |
|
254 var v []byte |
|
255 if scanFollowsMultilineBasicStringDelimiter(b) { |
|
256 raw, v, b, err = p.parseMultilineBasicString(b) |
|
257 } else { |
|
258 raw, v, b, err = p.parseBasicString(b) |
|
259 } |
|
260 |
|
261 if err == nil { |
|
262 ref = p.builder.Push(ast.Node{ |
|
263 Kind: ast.String, |
|
264 Raw: p.Range(raw), |
|
265 Data: v, |
|
266 }) |
|
267 } |
|
268 |
|
269 return ref, b, err |
|
270 case '\'': |
|
271 var raw []byte |
|
272 var v []byte |
|
273 if scanFollowsMultilineLiteralStringDelimiter(b) { |
|
274 raw, v, b, err = p.parseMultilineLiteralString(b) |
|
275 } else { |
|
276 raw, v, b, err = p.parseLiteralString(b) |
|
277 } |
|
278 |
|
279 if err == nil { |
|
280 ref = p.builder.Push(ast.Node{ |
|
281 Kind: ast.String, |
|
282 Raw: p.Range(raw), |
|
283 Data: v, |
|
284 }) |
|
285 } |
|
286 |
|
287 return ref, b, err |
|
288 case 't': |
|
289 if !scanFollowsTrue(b) { |
|
290 return ref, nil, newDecodeError(atmost(b, 4), "expected 'true'") |
|
291 } |
|
292 |
|
293 ref = p.builder.Push(ast.Node{ |
|
294 Kind: ast.Bool, |
|
295 Data: b[:4], |
|
296 }) |
|
297 |
|
298 return ref, b[4:], nil |
|
299 case 'f': |
|
300 if !scanFollowsFalse(b) { |
|
301 return ref, nil, newDecodeError(atmost(b, 5), "expected 'false'") |
|
302 } |
|
303 |
|
304 ref = p.builder.Push(ast.Node{ |
|
305 Kind: ast.Bool, |
|
306 Data: b[:5], |
|
307 }) |
|
308 |
|
309 return ref, b[5:], nil |
|
310 case '[': |
|
311 return p.parseValArray(b) |
|
312 case '{': |
|
313 return p.parseInlineTable(b) |
|
314 default: |
|
315 return p.parseIntOrFloatOrDateTime(b) |
|
316 } |
|
317 } |
|
318 |
|
319 func atmost(b []byte, n int) []byte { |
|
320 if n >= len(b) { |
|
321 return b |
|
322 } |
|
323 |
|
324 return b[:n] |
|
325 } |
|
326 |
|
327 func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { |
|
328 v, rest, err := scanLiteralString(b) |
|
329 if err != nil { |
|
330 return nil, nil, nil, err |
|
331 } |
|
332 |
|
333 return v, v[1 : len(v)-1], rest, nil |
|
334 } |
|
335 |
|
336 func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) { |
|
337 // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close |
|
338 // inline-table-open = %x7B ws ; { |
|
339 // inline-table-close = ws %x7D ; } |
|
340 // inline-table-sep = ws %x2C ws ; , Comma |
|
341 // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] |
|
342 parent := p.builder.Push(ast.Node{ |
|
343 Kind: ast.InlineTable, |
|
344 }) |
|
345 |
|
346 first := true |
|
347 |
|
348 var child ast.Reference |
|
349 |
|
350 b = b[1:] |
|
351 |
|
352 var err error |
|
353 |
|
354 for len(b) > 0 { |
|
355 previousB := b |
|
356 b = p.parseWhitespace(b) |
|
357 |
|
358 if len(b) == 0 { |
|
359 return parent, nil, newDecodeError(previousB[:1], "inline table is incomplete") |
|
360 } |
|
361 |
|
362 if b[0] == '}' { |
|
363 break |
|
364 } |
|
365 |
|
366 if !first { |
|
367 b, err = expect(',', b) |
|
368 if err != nil { |
|
369 return parent, nil, err |
|
370 } |
|
371 b = p.parseWhitespace(b) |
|
372 } |
|
373 |
|
374 var kv ast.Reference |
|
375 |
|
376 kv, b, err = p.parseKeyval(b) |
|
377 if err != nil { |
|
378 return parent, nil, err |
|
379 } |
|
380 |
|
381 if first { |
|
382 p.builder.AttachChild(parent, kv) |
|
383 } else { |
|
384 p.builder.Chain(child, kv) |
|
385 } |
|
386 child = kv |
|
387 |
|
388 first = false |
|
389 } |
|
390 |
|
391 rest, err := expect('}', b) |
|
392 |
|
393 return parent, rest, err |
|
394 } |
|
395 |
|
396 //nolint:funlen,cyclop |
|
397 func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) { |
|
398 // array = array-open [ array-values ] ws-comment-newline array-close |
|
399 // array-open = %x5B ; [ |
|
400 // array-close = %x5D ; ] |
|
401 // array-values = ws-comment-newline val ws-comment-newline array-sep array-values |
|
402 // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] |
|
403 // array-sep = %x2C ; , Comma |
|
404 // ws-comment-newline = *( wschar / [ comment ] newline ) |
|
405 arrayStart := b |
|
406 b = b[1:] |
|
407 |
|
408 parent := p.builder.Push(ast.Node{ |
|
409 Kind: ast.Array, |
|
410 }) |
|
411 |
|
412 first := true |
|
413 |
|
414 var lastChild ast.Reference |
|
415 |
|
416 var err error |
|
417 for len(b) > 0 { |
|
418 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
419 if err != nil { |
|
420 return parent, nil, err |
|
421 } |
|
422 |
|
423 if len(b) == 0 { |
|
424 return parent, nil, newDecodeError(arrayStart[:1], "array is incomplete") |
|
425 } |
|
426 |
|
427 if b[0] == ']' { |
|
428 break |
|
429 } |
|
430 |
|
431 if b[0] == ',' { |
|
432 if first { |
|
433 return parent, nil, newDecodeError(b[0:1], "array cannot start with comma") |
|
434 } |
|
435 b = b[1:] |
|
436 |
|
437 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
438 if err != nil { |
|
439 return parent, nil, err |
|
440 } |
|
441 } else if !first { |
|
442 return parent, nil, newDecodeError(b[0:1], "array elements must be separated by commas") |
|
443 } |
|
444 |
|
445 // TOML allows trailing commas in arrays. |
|
446 if len(b) > 0 && b[0] == ']' { |
|
447 break |
|
448 } |
|
449 |
|
450 var valueRef ast.Reference |
|
451 valueRef, b, err = p.parseVal(b) |
|
452 if err != nil { |
|
453 return parent, nil, err |
|
454 } |
|
455 |
|
456 if first { |
|
457 p.builder.AttachChild(parent, valueRef) |
|
458 } else { |
|
459 p.builder.Chain(lastChild, valueRef) |
|
460 } |
|
461 lastChild = valueRef |
|
462 |
|
463 b, err = p.parseOptionalWhitespaceCommentNewline(b) |
|
464 if err != nil { |
|
465 return parent, nil, err |
|
466 } |
|
467 first = false |
|
468 } |
|
469 |
|
470 rest, err := expect(']', b) |
|
471 |
|
472 return parent, rest, err |
|
473 } |
|
474 |
|
475 func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) { |
|
476 for len(b) > 0 { |
|
477 var err error |
|
478 b = p.parseWhitespace(b) |
|
479 |
|
480 if len(b) > 0 && b[0] == '#' { |
|
481 _, b, err = scanComment(b) |
|
482 if err != nil { |
|
483 return nil, err |
|
484 } |
|
485 } |
|
486 |
|
487 if len(b) == 0 { |
|
488 break |
|
489 } |
|
490 |
|
491 if b[0] == '\n' || b[0] == '\r' { |
|
492 b, err = p.parseNewline(b) |
|
493 if err != nil { |
|
494 return nil, err |
|
495 } |
|
496 } else { |
|
497 break |
|
498 } |
|
499 } |
|
500 |
|
501 return b, nil |
|
502 } |
|
503 |
|
504 func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { |
|
505 token, rest, err := scanMultilineLiteralString(b) |
|
506 if err != nil { |
|
507 return nil, nil, nil, err |
|
508 } |
|
509 |
|
510 i := 3 |
|
511 |
|
512 // skip the immediate new line |
|
513 if token[i] == '\n' { |
|
514 i++ |
|
515 } else if token[i] == '\r' && token[i+1] == '\n' { |
|
516 i += 2 |
|
517 } |
|
518 |
|
519 return token, token[i : len(token)-3], rest, err |
|
520 } |
|
521 |
|
522 //nolint:funlen,gocognit,cyclop |
|
523 func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { |
|
524 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body |
|
525 // ml-basic-string-delim |
|
526 // ml-basic-string-delim = 3quotation-mark |
|
527 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] |
|
528 // |
|
529 // mlb-content = mlb-char / newline / mlb-escaped-nl |
|
530 // mlb-char = mlb-unescaped / escaped |
|
531 // mlb-quotes = 1*2quotation-mark |
|
532 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
533 // mlb-escaped-nl = escape ws newline *( wschar / newline ) |
|
534 token, escaped, rest, err := scanMultilineBasicString(b) |
|
535 if err != nil { |
|
536 return nil, nil, nil, err |
|
537 } |
|
538 |
|
539 i := 3 |
|
540 |
|
541 // skip the immediate new line |
|
542 if token[i] == '\n' { |
|
543 i++ |
|
544 } else if token[i] == '\r' && token[i+1] == '\n' { |
|
545 i += 2 |
|
546 } |
|
547 |
|
548 // fast path |
|
549 startIdx := i |
|
550 endIdx := len(token) - len(`"""`) |
|
551 |
|
552 if !escaped { |
|
553 str := token[startIdx:endIdx] |
|
554 verr := utf8TomlValidAlreadyEscaped(str) |
|
555 if verr.Zero() { |
|
556 return token, str, rest, nil |
|
557 } |
|
558 return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") |
|
559 } |
|
560 |
|
561 var builder bytes.Buffer |
|
562 |
|
563 // The scanner ensures that the token starts and ends with quotes and that |
|
564 // escapes are balanced. |
|
565 for i < len(token)-3 { |
|
566 c := token[i] |
|
567 |
|
568 //nolint:nestif |
|
569 if c == '\\' { |
|
570 // When the last non-whitespace character on a line is an unescaped \, |
|
571 // it will be trimmed along with all whitespace (including newlines) up |
|
572 // to the next non-whitespace character or closing delimiter. |
|
573 |
|
574 isLastNonWhitespaceOnLine := false |
|
575 j := 1 |
|
576 findEOLLoop: |
|
577 for ; j < len(token)-3-i; j++ { |
|
578 switch token[i+j] { |
|
579 case ' ', '\t': |
|
580 continue |
|
581 case '\r': |
|
582 if token[i+j+1] == '\n' { |
|
583 continue |
|
584 } |
|
585 case '\n': |
|
586 isLastNonWhitespaceOnLine = true |
|
587 } |
|
588 break findEOLLoop |
|
589 } |
|
590 if isLastNonWhitespaceOnLine { |
|
591 i += j |
|
592 for ; i < len(token)-3; i++ { |
|
593 c := token[i] |
|
594 if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') { |
|
595 i-- |
|
596 break |
|
597 } |
|
598 } |
|
599 i++ |
|
600 continue |
|
601 } |
|
602 |
|
603 // handle escaping |
|
604 i++ |
|
605 c = token[i] |
|
606 |
|
607 switch c { |
|
608 case '"', '\\': |
|
609 builder.WriteByte(c) |
|
610 case 'b': |
|
611 builder.WriteByte('\b') |
|
612 case 'f': |
|
613 builder.WriteByte('\f') |
|
614 case 'n': |
|
615 builder.WriteByte('\n') |
|
616 case 'r': |
|
617 builder.WriteByte('\r') |
|
618 case 't': |
|
619 builder.WriteByte('\t') |
|
620 case 'e': |
|
621 builder.WriteByte(0x1B) |
|
622 case 'u': |
|
623 x, err := hexToRune(atmost(token[i+1:], 4), 4) |
|
624 if err != nil { |
|
625 return nil, nil, nil, err |
|
626 } |
|
627 builder.WriteRune(x) |
|
628 i += 4 |
|
629 case 'U': |
|
630 x, err := hexToRune(atmost(token[i+1:], 8), 8) |
|
631 if err != nil { |
|
632 return nil, nil, nil, err |
|
633 } |
|
634 |
|
635 builder.WriteRune(x) |
|
636 i += 8 |
|
637 default: |
|
638 return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c) |
|
639 } |
|
640 i++ |
|
641 } else { |
|
642 size := utf8ValidNext(token[i:]) |
|
643 if size == 0 { |
|
644 return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c) |
|
645 } |
|
646 builder.Write(token[i : i+size]) |
|
647 i += size |
|
648 } |
|
649 } |
|
650 |
|
651 return token, builder.Bytes(), rest, nil |
|
652 } |
|
653 |
|
654 func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) { |
|
655 // key = simple-key / dotted-key |
|
656 // simple-key = quoted-key / unquoted-key |
|
657 // |
|
658 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
659 // quoted-key = basic-string / literal-string |
|
660 // dotted-key = simple-key 1*( dot-sep simple-key ) |
|
661 // |
|
662 // dot-sep = ws %x2E ws ; . Period |
|
663 raw, key, b, err := p.parseSimpleKey(b) |
|
664 if err != nil { |
|
665 return ast.InvalidReference, nil, err |
|
666 } |
|
667 |
|
668 ref := p.builder.Push(ast.Node{ |
|
669 Kind: ast.Key, |
|
670 Raw: p.Range(raw), |
|
671 Data: key, |
|
672 }) |
|
673 |
|
674 for { |
|
675 b = p.parseWhitespace(b) |
|
676 if len(b) > 0 && b[0] == '.' { |
|
677 b = p.parseWhitespace(b[1:]) |
|
678 |
|
679 raw, key, b, err = p.parseSimpleKey(b) |
|
680 if err != nil { |
|
681 return ref, nil, err |
|
682 } |
|
683 |
|
684 p.builder.PushAndChain(ast.Node{ |
|
685 Kind: ast.Key, |
|
686 Raw: p.Range(raw), |
|
687 Data: key, |
|
688 }) |
|
689 } else { |
|
690 break |
|
691 } |
|
692 } |
|
693 |
|
694 return ref, b, nil |
|
695 } |
|
696 |
|
697 func (p *parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { |
|
698 if len(b) == 0 { |
|
699 return nil, nil, nil, newDecodeError(b, "expected key but found none") |
|
700 } |
|
701 |
|
702 // simple-key = quoted-key / unquoted-key |
|
703 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
704 // quoted-key = basic-string / literal-string |
|
705 switch { |
|
706 case b[0] == '\'': |
|
707 return p.parseLiteralString(b) |
|
708 case b[0] == '"': |
|
709 return p.parseBasicString(b) |
|
710 case isUnquotedKeyChar(b[0]): |
|
711 key, rest = scanUnquotedKey(b) |
|
712 return key, key, rest, nil |
|
713 default: |
|
714 return nil, nil, nil, newDecodeError(b[0:1], "invalid character at start of key: %c", b[0]) |
|
715 } |
|
716 } |
|
717 |
|
718 //nolint:funlen,cyclop |
|
719 func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { |
|
720 // basic-string = quotation-mark *basic-char quotation-mark |
|
721 // quotation-mark = %x22 ; " |
|
722 // basic-char = basic-unescaped / escaped |
|
723 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
724 // escaped = escape escape-seq-char |
|
725 // escape-seq-char = %x22 ; " quotation mark U+0022 |
|
726 // escape-seq-char =/ %x5C ; \ reverse solidus U+005C |
|
727 // escape-seq-char =/ %x62 ; b backspace U+0008 |
|
728 // escape-seq-char =/ %x66 ; f form feed U+000C |
|
729 // escape-seq-char =/ %x6E ; n line feed U+000A |
|
730 // escape-seq-char =/ %x72 ; r carriage return U+000D |
|
731 // escape-seq-char =/ %x74 ; t tab U+0009 |
|
732 // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX |
|
733 // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX |
|
734 token, escaped, rest, err := scanBasicString(b) |
|
735 if err != nil { |
|
736 return nil, nil, nil, err |
|
737 } |
|
738 |
|
739 startIdx := len(`"`) |
|
740 endIdx := len(token) - len(`"`) |
|
741 |
|
742 // Fast path. If there is no escape sequence, the string should just be |
|
743 // an UTF-8 encoded string, which is the same as Go. In that case, |
|
744 // validate the string and return a direct reference to the buffer. |
|
745 if !escaped { |
|
746 str := token[startIdx:endIdx] |
|
747 verr := utf8TomlValidAlreadyEscaped(str) |
|
748 if verr.Zero() { |
|
749 return token, str, rest, nil |
|
750 } |
|
751 return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") |
|
752 } |
|
753 |
|
754 i := startIdx |
|
755 |
|
756 var builder bytes.Buffer |
|
757 |
|
758 // The scanner ensures that the token starts and ends with quotes and that |
|
759 // escapes are balanced. |
|
760 for i < len(token)-1 { |
|
761 c := token[i] |
|
762 if c == '\\' { |
|
763 i++ |
|
764 c = token[i] |
|
765 |
|
766 switch c { |
|
767 case '"', '\\': |
|
768 builder.WriteByte(c) |
|
769 case 'b': |
|
770 builder.WriteByte('\b') |
|
771 case 'f': |
|
772 builder.WriteByte('\f') |
|
773 case 'n': |
|
774 builder.WriteByte('\n') |
|
775 case 'r': |
|
776 builder.WriteByte('\r') |
|
777 case 't': |
|
778 builder.WriteByte('\t') |
|
779 case 'e': |
|
780 builder.WriteByte(0x1B) |
|
781 case 'u': |
|
782 x, err := hexToRune(token[i+1:len(token)-1], 4) |
|
783 if err != nil { |
|
784 return nil, nil, nil, err |
|
785 } |
|
786 |
|
787 builder.WriteRune(x) |
|
788 i += 4 |
|
789 case 'U': |
|
790 x, err := hexToRune(token[i+1:len(token)-1], 8) |
|
791 if err != nil { |
|
792 return nil, nil, nil, err |
|
793 } |
|
794 |
|
795 builder.WriteRune(x) |
|
796 i += 8 |
|
797 default: |
|
798 return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c) |
|
799 } |
|
800 i++ |
|
801 } else { |
|
802 size := utf8ValidNext(token[i:]) |
|
803 if size == 0 { |
|
804 return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c) |
|
805 } |
|
806 builder.Write(token[i : i+size]) |
|
807 i += size |
|
808 } |
|
809 } |
|
810 |
|
811 return token, builder.Bytes(), rest, nil |
|
812 } |
|
813 |
|
814 func hexToRune(b []byte, length int) (rune, error) { |
|
815 if len(b) < length { |
|
816 return -1, newDecodeError(b, "unicode point needs %d character, not %d", length, len(b)) |
|
817 } |
|
818 b = b[:length] |
|
819 |
|
820 var r uint32 |
|
821 for i, c := range b { |
|
822 d := uint32(0) |
|
823 switch { |
|
824 case '0' <= c && c <= '9': |
|
825 d = uint32(c - '0') |
|
826 case 'a' <= c && c <= 'f': |
|
827 d = uint32(c - 'a' + 10) |
|
828 case 'A' <= c && c <= 'F': |
|
829 d = uint32(c - 'A' + 10) |
|
830 default: |
|
831 return -1, newDecodeError(b[i:i+1], "non-hex character") |
|
832 } |
|
833 r = r*16 + d |
|
834 } |
|
835 |
|
836 if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 { |
|
837 return -1, newDecodeError(b, "escape sequence is invalid Unicode code point") |
|
838 } |
|
839 |
|
840 return rune(r), nil |
|
841 } |
|
842 |
|
843 func (p *parser) parseWhitespace(b []byte) []byte { |
|
844 // ws = *wschar |
|
845 // wschar = %x20 ; Space |
|
846 // wschar =/ %x09 ; Horizontal tab |
|
847 _, rest := scanWhitespace(b) |
|
848 |
|
849 return rest |
|
850 } |
|
851 |
|
852 //nolint:cyclop |
|
853 func (p *parser) parseIntOrFloatOrDateTime(b []byte) (ast.Reference, []byte, error) { |
|
854 switch b[0] { |
|
855 case 'i': |
|
856 if !scanFollowsInf(b) { |
|
857 return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'inf'") |
|
858 } |
|
859 |
|
860 return p.builder.Push(ast.Node{ |
|
861 Kind: ast.Float, |
|
862 Data: b[:3], |
|
863 }), b[3:], nil |
|
864 case 'n': |
|
865 if !scanFollowsNan(b) { |
|
866 return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'nan'") |
|
867 } |
|
868 |
|
869 return p.builder.Push(ast.Node{ |
|
870 Kind: ast.Float, |
|
871 Data: b[:3], |
|
872 }), b[3:], nil |
|
873 case '+', '-': |
|
874 return p.scanIntOrFloat(b) |
|
875 } |
|
876 |
|
877 if len(b) < 3 { |
|
878 return p.scanIntOrFloat(b) |
|
879 } |
|
880 |
|
881 s := 5 |
|
882 if len(b) < s { |
|
883 s = len(b) |
|
884 } |
|
885 |
|
886 for idx, c := range b[:s] { |
|
887 if isDigit(c) { |
|
888 continue |
|
889 } |
|
890 |
|
891 if idx == 2 && c == ':' || (idx == 4 && c == '-') { |
|
892 return p.scanDateTime(b) |
|
893 } |
|
894 |
|
895 break |
|
896 } |
|
897 |
|
898 return p.scanIntOrFloat(b) |
|
899 } |
|
900 |
|
901 func (p *parser) scanDateTime(b []byte) (ast.Reference, []byte, error) { |
|
902 // scans for contiguous characters in [0-9T:Z.+-], and up to one space if |
|
903 // followed by a digit. |
|
904 hasDate := false |
|
905 hasTime := false |
|
906 hasTz := false |
|
907 seenSpace := false |
|
908 |
|
909 i := 0 |
|
910 byteLoop: |
|
911 for ; i < len(b); i++ { |
|
912 c := b[i] |
|
913 |
|
914 switch { |
|
915 case isDigit(c): |
|
916 case c == '-': |
|
917 hasDate = true |
|
918 const minOffsetOfTz = 8 |
|
919 if i >= minOffsetOfTz { |
|
920 hasTz = true |
|
921 } |
|
922 case c == 'T' || c == 't' || c == ':' || c == '.': |
|
923 hasTime = true |
|
924 case c == '+' || c == '-' || c == 'Z' || c == 'z': |
|
925 hasTz = true |
|
926 case c == ' ': |
|
927 if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) { |
|
928 i += 2 |
|
929 // Avoid reaching past the end of the document in case the time |
|
930 // is malformed. See TestIssue585. |
|
931 if i >= len(b) { |
|
932 i-- |
|
933 } |
|
934 seenSpace = true |
|
935 hasTime = true |
|
936 } else { |
|
937 break byteLoop |
|
938 } |
|
939 default: |
|
940 break byteLoop |
|
941 } |
|
942 } |
|
943 |
|
944 var kind ast.Kind |
|
945 |
|
946 if hasTime { |
|
947 if hasDate { |
|
948 if hasTz { |
|
949 kind = ast.DateTime |
|
950 } else { |
|
951 kind = ast.LocalDateTime |
|
952 } |
|
953 } else { |
|
954 kind = ast.LocalTime |
|
955 } |
|
956 } else { |
|
957 kind = ast.LocalDate |
|
958 } |
|
959 |
|
960 return p.builder.Push(ast.Node{ |
|
961 Kind: kind, |
|
962 Data: b[:i], |
|
963 }), b[i:], nil |
|
964 } |
|
965 |
|
966 //nolint:funlen,gocognit,cyclop |
|
967 func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) { |
|
968 i := 0 |
|
969 |
|
970 if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' { |
|
971 var isValidRune validRuneFn |
|
972 |
|
973 switch b[1] { |
|
974 case 'x': |
|
975 isValidRune = isValidHexRune |
|
976 case 'o': |
|
977 isValidRune = isValidOctalRune |
|
978 case 'b': |
|
979 isValidRune = isValidBinaryRune |
|
980 default: |
|
981 i++ |
|
982 } |
|
983 |
|
984 if isValidRune != nil { |
|
985 i += 2 |
|
986 for ; i < len(b); i++ { |
|
987 if !isValidRune(b[i]) { |
|
988 break |
|
989 } |
|
990 } |
|
991 } |
|
992 |
|
993 return p.builder.Push(ast.Node{ |
|
994 Kind: ast.Integer, |
|
995 Data: b[:i], |
|
996 }), b[i:], nil |
|
997 } |
|
998 |
|
999 isFloat := false |
|
1000 |
|
1001 for ; i < len(b); i++ { |
|
1002 c := b[i] |
|
1003 |
|
1004 if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' { |
|
1005 continue |
|
1006 } |
|
1007 |
|
1008 if c == '.' || c == 'e' || c == 'E' { |
|
1009 isFloat = true |
|
1010 |
|
1011 continue |
|
1012 } |
|
1013 |
|
1014 if c == 'i' { |
|
1015 if scanFollowsInf(b[i:]) { |
|
1016 return p.builder.Push(ast.Node{ |
|
1017 Kind: ast.Float, |
|
1018 Data: b[:i+3], |
|
1019 }), b[i+3:], nil |
|
1020 } |
|
1021 |
|
1022 return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number") |
|
1023 } |
|
1024 |
|
1025 if c == 'n' { |
|
1026 if scanFollowsNan(b[i:]) { |
|
1027 return p.builder.Push(ast.Node{ |
|
1028 Kind: ast.Float, |
|
1029 Data: b[:i+3], |
|
1030 }), b[i+3:], nil |
|
1031 } |
|
1032 |
|
1033 return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number") |
|
1034 } |
|
1035 |
|
1036 break |
|
1037 } |
|
1038 |
|
1039 if i == 0 { |
|
1040 return ast.InvalidReference, b, newDecodeError(b, "incomplete number") |
|
1041 } |
|
1042 |
|
1043 kind := ast.Integer |
|
1044 |
|
1045 if isFloat { |
|
1046 kind = ast.Float |
|
1047 } |
|
1048 |
|
1049 return p.builder.Push(ast.Node{ |
|
1050 Kind: kind, |
|
1051 Data: b[:i], |
|
1052 }), b[i:], nil |
|
1053 } |
|
1054 |
|
1055 func isDigit(r byte) bool { |
|
1056 return r >= '0' && r <= '9' |
|
1057 } |
|
1058 |
|
1059 type validRuneFn func(r byte) bool |
|
1060 |
|
1061 func isValidHexRune(r byte) bool { |
|
1062 return r >= 'a' && r <= 'f' || |
|
1063 r >= 'A' && r <= 'F' || |
|
1064 r >= '0' && r <= '9' || |
|
1065 r == '_' |
|
1066 } |
|
1067 |
|
1068 func isValidOctalRune(r byte) bool { |
|
1069 return r >= '0' && r <= '7' || r == '_' |
|
1070 } |
|
1071 |
|
1072 func isValidBinaryRune(r byte) bool { |
|
1073 return r == '0' || r == '1' || r == '_' |
|
1074 } |
|
1075 |
|
1076 func expect(x byte, b []byte) ([]byte, error) { |
|
1077 if len(b) == 0 { |
|
1078 return nil, newDecodeError(b, "expected character %c but the document ended here", x) |
|
1079 } |
|
1080 |
|
1081 if b[0] != x { |
|
1082 return nil, newDecodeError(b[0:1], "expected character %c", x) |
|
1083 } |
|
1084 |
|
1085 return b[1:], nil |
|
1086 } |
|