vendor/github.com/pelletier/go-toml/v2/unstable/parser.go
changeset 265 05c40b36d3b2
parent 260 445e01aede7e
child 268 4dd196a4ee7c
equal deleted inserted replaced
264:8f478162d991 265:05c40b36d3b2
       
     1 package unstable
       
     2 
       
     3 import (
       
     4 	"bytes"
       
     5 	"fmt"
       
     6 	"unicode"
       
     7 
       
     8 	"github.com/pelletier/go-toml/v2/internal/characters"
       
     9 	"github.com/pelletier/go-toml/v2/internal/danger"
       
    10 )
       
    11 
       
    12 // ParserError describes an error relative to the content of the document.
       
    13 //
       
    14 // It cannot outlive the instance of Parser it refers to, and may cause panics
       
    15 // if the parser is reset.
       
    16 type ParserError struct {
       
    17 	Highlight []byte
       
    18 	Message   string
       
    19 	Key       []string // optional
       
    20 }
       
    21 
       
    22 // Error is the implementation of the error interface.
       
    23 func (e *ParserError) Error() string {
       
    24 	return e.Message
       
    25 }
       
    26 
       
    27 // NewParserError is a convenience function to create a ParserError
       
    28 //
       
    29 // Warning: Highlight needs to be a subslice of Parser.data, so only slices
       
    30 // returned by Parser.Raw are valid candidates.
       
    31 func NewParserError(highlight []byte, format string, args ...interface{}) error {
       
    32 	return &ParserError{
       
    33 		Highlight: highlight,
       
    34 		Message:   fmt.Errorf(format, args...).Error(),
       
    35 	}
       
    36 }
       
    37 
       
    38 // Parser scans over a TOML-encoded document and generates an iterative AST.
       
    39 //
       
    40 // To prime the Parser, first reset it with the contents of a TOML document.
       
    41 // Then, process all top-level expressions sequentially. See Example.
       
    42 //
       
    43 // Don't forget to check Error() after you're done parsing.
       
    44 //
       
    45 // Each top-level expression needs to be fully processed before calling
       
    46 // NextExpression() again. Otherwise, calls to various Node methods may panic if
       
    47 // the parser has moved on the next expression.
       
    48 //
       
    49 // For performance reasons, go-toml doesn't make a copy of the input bytes to
       
    50 // the parser. Make sure to copy all the bytes you need to outlive the slice
       
    51 // given to the parser.
       
    52 //
       
    53 // The parser doesn't provide nodes for comments yet, nor for whitespace.
       
    54 type Parser struct {
       
    55 	data    []byte
       
    56 	builder builder
       
    57 	ref     reference
       
    58 	left    []byte
       
    59 	err     error
       
    60 	first   bool
       
    61 }
       
    62 
       
    63 // Data returns the slice provided to the last call to Reset.
       
    64 func (p *Parser) Data() []byte {
       
    65 	return p.data
       
    66 }
       
    67 
       
    68 // Range returns a range description that corresponds to a given slice of the
       
    69 // input. If the argument is not a subslice of the parser input, this function
       
    70 // panics.
       
    71 func (p *Parser) Range(b []byte) Range {
       
    72 	return Range{
       
    73 		Offset: uint32(danger.SubsliceOffset(p.data, b)),
       
    74 		Length: uint32(len(b)),
       
    75 	}
       
    76 }
       
    77 
       
    78 // Raw returns the slice corresponding to the bytes in the given range.
       
    79 func (p *Parser) Raw(raw Range) []byte {
       
    80 	return p.data[raw.Offset : raw.Offset+raw.Length]
       
    81 }
       
    82 
       
    83 // Reset brings the parser to its initial state for a given input. It wipes an
       
    84 // reuses internal storage to reduce allocation.
       
    85 func (p *Parser) Reset(b []byte) {
       
    86 	p.builder.Reset()
       
    87 	p.ref = invalidReference
       
    88 	p.data = b
       
    89 	p.left = b
       
    90 	p.err = nil
       
    91 	p.first = true
       
    92 }
       
    93 
       
    94 // NextExpression parses the next top-level expression. If an expression was
       
    95 // successfully parsed, it returns true. If the parser is at the end of the
       
    96 // document or an error occurred, it returns false.
       
    97 //
       
    98 // Retrieve the parsed expression with Expression().
       
    99 func (p *Parser) NextExpression() bool {
       
   100 	if len(p.left) == 0 || p.err != nil {
       
   101 		return false
       
   102 	}
       
   103 
       
   104 	p.builder.Reset()
       
   105 	p.ref = invalidReference
       
   106 
       
   107 	for {
       
   108 		if len(p.left) == 0 || p.err != nil {
       
   109 			return false
       
   110 		}
       
   111 
       
   112 		if !p.first {
       
   113 			p.left, p.err = p.parseNewline(p.left)
       
   114 		}
       
   115 
       
   116 		if len(p.left) == 0 || p.err != nil {
       
   117 			return false
       
   118 		}
       
   119 
       
   120 		p.ref, p.left, p.err = p.parseExpression(p.left)
       
   121 
       
   122 		if p.err != nil {
       
   123 			return false
       
   124 		}
       
   125 
       
   126 		p.first = false
       
   127 
       
   128 		if p.ref.Valid() {
       
   129 			return true
       
   130 		}
       
   131 	}
       
   132 }
       
   133 
       
   134 // Expression returns a pointer to the node representing the last successfully
       
   135 // parsed expresion.
       
   136 func (p *Parser) Expression() *Node {
       
   137 	return p.builder.NodeAt(p.ref)
       
   138 }
       
   139 
       
   140 // Error returns any error that has occured during parsing.
       
   141 func (p *Parser) Error() error {
       
   142 	return p.err
       
   143 }
       
   144 
       
   145 func (p *Parser) parseNewline(b []byte) ([]byte, error) {
       
   146 	if b[0] == '\n' {
       
   147 		return b[1:], nil
       
   148 	}
       
   149 
       
   150 	if b[0] == '\r' {
       
   151 		_, rest, err := scanWindowsNewline(b)
       
   152 		return rest, err
       
   153 	}
       
   154 
       
   155 	return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
       
   156 }
       
   157 
       
   158 func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
       
   159 	// expression =  ws [ comment ]
       
   160 	// expression =/ ws keyval ws [ comment ]
       
   161 	// expression =/ ws table ws [ comment ]
       
   162 	ref := invalidReference
       
   163 
       
   164 	b = p.parseWhitespace(b)
       
   165 
       
   166 	if len(b) == 0 {
       
   167 		return ref, b, nil
       
   168 	}
       
   169 
       
   170 	if b[0] == '#' {
       
   171 		_, rest, err := scanComment(b)
       
   172 		return ref, rest, err
       
   173 	}
       
   174 
       
   175 	if b[0] == '\n' || b[0] == '\r' {
       
   176 		return ref, b, nil
       
   177 	}
       
   178 
       
   179 	var err error
       
   180 	if b[0] == '[' {
       
   181 		ref, b, err = p.parseTable(b)
       
   182 	} else {
       
   183 		ref, b, err = p.parseKeyval(b)
       
   184 	}
       
   185 
       
   186 	if err != nil {
       
   187 		return ref, nil, err
       
   188 	}
       
   189 
       
   190 	b = p.parseWhitespace(b)
       
   191 
       
   192 	if len(b) > 0 && b[0] == '#' {
       
   193 		_, rest, err := scanComment(b)
       
   194 		return ref, rest, err
       
   195 	}
       
   196 
       
   197 	return ref, b, nil
       
   198 }
       
   199 
       
   200 func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
       
   201 	// table = std-table / array-table
       
   202 	if len(b) > 1 && b[1] == '[' {
       
   203 		return p.parseArrayTable(b)
       
   204 	}
       
   205 
       
   206 	return p.parseStdTable(b)
       
   207 }
       
   208 
       
   209 func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
       
   210 	// array-table = array-table-open key array-table-close
       
   211 	// array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
       
   212 	// array-table-close = ws %x5D.5D  ; ]] Double right square bracket
       
   213 	ref := p.builder.Push(Node{
       
   214 		Kind: ArrayTable,
       
   215 	})
       
   216 
       
   217 	b = b[2:]
       
   218 	b = p.parseWhitespace(b)
       
   219 
       
   220 	k, b, err := p.parseKey(b)
       
   221 	if err != nil {
       
   222 		return ref, nil, err
       
   223 	}
       
   224 
       
   225 	p.builder.AttachChild(ref, k)
       
   226 	b = p.parseWhitespace(b)
       
   227 
       
   228 	b, err = expect(']', b)
       
   229 	if err != nil {
       
   230 		return ref, nil, err
       
   231 	}
       
   232 
       
   233 	b, err = expect(']', b)
       
   234 
       
   235 	return ref, b, err
       
   236 }
       
   237 
       
   238 func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
       
   239 	// std-table = std-table-open key std-table-close
       
   240 	// std-table-open  = %x5B ws     ; [ Left square bracket
       
   241 	// std-table-close = ws %x5D     ; ] Right square bracket
       
   242 	ref := p.builder.Push(Node{
       
   243 		Kind: Table,
       
   244 	})
       
   245 
       
   246 	b = b[1:]
       
   247 	b = p.parseWhitespace(b)
       
   248 
       
   249 	key, b, err := p.parseKey(b)
       
   250 	if err != nil {
       
   251 		return ref, nil, err
       
   252 	}
       
   253 
       
   254 	p.builder.AttachChild(ref, key)
       
   255 
       
   256 	b = p.parseWhitespace(b)
       
   257 
       
   258 	b, err = expect(']', b)
       
   259 
       
   260 	return ref, b, err
       
   261 }
       
   262 
       
   263 func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
       
   264 	// keyval = key keyval-sep val
       
   265 	ref := p.builder.Push(Node{
       
   266 		Kind: KeyValue,
       
   267 	})
       
   268 
       
   269 	key, b, err := p.parseKey(b)
       
   270 	if err != nil {
       
   271 		return invalidReference, nil, err
       
   272 	}
       
   273 
       
   274 	// keyval-sep = ws %x3D ws ; =
       
   275 
       
   276 	b = p.parseWhitespace(b)
       
   277 
       
   278 	if len(b) == 0 {
       
   279 		return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
       
   280 	}
       
   281 
       
   282 	b, err = expect('=', b)
       
   283 	if err != nil {
       
   284 		return invalidReference, nil, err
       
   285 	}
       
   286 
       
   287 	b = p.parseWhitespace(b)
       
   288 
       
   289 	valRef, b, err := p.parseVal(b)
       
   290 	if err != nil {
       
   291 		return ref, b, err
       
   292 	}
       
   293 
       
   294 	p.builder.Chain(valRef, key)
       
   295 	p.builder.AttachChild(ref, valRef)
       
   296 
       
   297 	return ref, b, err
       
   298 }
       
   299 
       
   300 //nolint:cyclop,funlen
       
   301 func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
       
   302 	// val = string / boolean / array / inline-table / date-time / float / integer
       
   303 	ref := invalidReference
       
   304 
       
   305 	if len(b) == 0 {
       
   306 		return ref, nil, NewParserError(b, "expected value, not eof")
       
   307 	}
       
   308 
       
   309 	var err error
       
   310 	c := b[0]
       
   311 
       
   312 	switch c {
       
   313 	case '"':
       
   314 		var raw []byte
       
   315 		var v []byte
       
   316 		if scanFollowsMultilineBasicStringDelimiter(b) {
       
   317 			raw, v, b, err = p.parseMultilineBasicString(b)
       
   318 		} else {
       
   319 			raw, v, b, err = p.parseBasicString(b)
       
   320 		}
       
   321 
       
   322 		if err == nil {
       
   323 			ref = p.builder.Push(Node{
       
   324 				Kind: String,
       
   325 				Raw:  p.Range(raw),
       
   326 				Data: v,
       
   327 			})
       
   328 		}
       
   329 
       
   330 		return ref, b, err
       
   331 	case '\'':
       
   332 		var raw []byte
       
   333 		var v []byte
       
   334 		if scanFollowsMultilineLiteralStringDelimiter(b) {
       
   335 			raw, v, b, err = p.parseMultilineLiteralString(b)
       
   336 		} else {
       
   337 			raw, v, b, err = p.parseLiteralString(b)
       
   338 		}
       
   339 
       
   340 		if err == nil {
       
   341 			ref = p.builder.Push(Node{
       
   342 				Kind: String,
       
   343 				Raw:  p.Range(raw),
       
   344 				Data: v,
       
   345 			})
       
   346 		}
       
   347 
       
   348 		return ref, b, err
       
   349 	case 't':
       
   350 		if !scanFollowsTrue(b) {
       
   351 			return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
       
   352 		}
       
   353 
       
   354 		ref = p.builder.Push(Node{
       
   355 			Kind: Bool,
       
   356 			Data: b[:4],
       
   357 		})
       
   358 
       
   359 		return ref, b[4:], nil
       
   360 	case 'f':
       
   361 		if !scanFollowsFalse(b) {
       
   362 			return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
       
   363 		}
       
   364 
       
   365 		ref = p.builder.Push(Node{
       
   366 			Kind: Bool,
       
   367 			Data: b[:5],
       
   368 		})
       
   369 
       
   370 		return ref, b[5:], nil
       
   371 	case '[':
       
   372 		return p.parseValArray(b)
       
   373 	case '{':
       
   374 		return p.parseInlineTable(b)
       
   375 	default:
       
   376 		return p.parseIntOrFloatOrDateTime(b)
       
   377 	}
       
   378 }
       
   379 
       
   380 func atmost(b []byte, n int) []byte {
       
   381 	if n >= len(b) {
       
   382 		return b
       
   383 	}
       
   384 
       
   385 	return b[:n]
       
   386 }
       
   387 
       
   388 func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
       
   389 	v, rest, err := scanLiteralString(b)
       
   390 	if err != nil {
       
   391 		return nil, nil, nil, err
       
   392 	}
       
   393 
       
   394 	return v, v[1 : len(v)-1], rest, nil
       
   395 }
       
   396 
       
   397 func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
       
   398 	// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
       
   399 	// inline-table-open  = %x7B ws     ; {
       
   400 	// inline-table-close = ws %x7D     ; }
       
   401 	// inline-table-sep   = ws %x2C ws  ; , Comma
       
   402 	// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
       
   403 	parent := p.builder.Push(Node{
       
   404 		Kind: InlineTable,
       
   405 	})
       
   406 
       
   407 	first := true
       
   408 
       
   409 	var child reference
       
   410 
       
   411 	b = b[1:]
       
   412 
       
   413 	var err error
       
   414 
       
   415 	for len(b) > 0 {
       
   416 		previousB := b
       
   417 		b = p.parseWhitespace(b)
       
   418 
       
   419 		if len(b) == 0 {
       
   420 			return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
       
   421 		}
       
   422 
       
   423 		if b[0] == '}' {
       
   424 			break
       
   425 		}
       
   426 
       
   427 		if !first {
       
   428 			b, err = expect(',', b)
       
   429 			if err != nil {
       
   430 				return parent, nil, err
       
   431 			}
       
   432 			b = p.parseWhitespace(b)
       
   433 		}
       
   434 
       
   435 		var kv reference
       
   436 
       
   437 		kv, b, err = p.parseKeyval(b)
       
   438 		if err != nil {
       
   439 			return parent, nil, err
       
   440 		}
       
   441 
       
   442 		if first {
       
   443 			p.builder.AttachChild(parent, kv)
       
   444 		} else {
       
   445 			p.builder.Chain(child, kv)
       
   446 		}
       
   447 		child = kv
       
   448 
       
   449 		first = false
       
   450 	}
       
   451 
       
   452 	rest, err := expect('}', b)
       
   453 
       
   454 	return parent, rest, err
       
   455 }
       
   456 
       
   457 //nolint:funlen,cyclop
       
   458 func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
       
   459 	// array = array-open [ array-values ] ws-comment-newline array-close
       
   460 	// array-open =  %x5B ; [
       
   461 	// array-close = %x5D ; ]
       
   462 	// array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
       
   463 	// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
       
   464 	// array-sep = %x2C  ; , Comma
       
   465 	// ws-comment-newline = *( wschar / [ comment ] newline )
       
   466 	arrayStart := b
       
   467 	b = b[1:]
       
   468 
       
   469 	parent := p.builder.Push(Node{
       
   470 		Kind: Array,
       
   471 	})
       
   472 
       
   473 	first := true
       
   474 
       
   475 	var lastChild reference
       
   476 
       
   477 	var err error
       
   478 	for len(b) > 0 {
       
   479 		b, err = p.parseOptionalWhitespaceCommentNewline(b)
       
   480 		if err != nil {
       
   481 			return parent, nil, err
       
   482 		}
       
   483 
       
   484 		if len(b) == 0 {
       
   485 			return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
       
   486 		}
       
   487 
       
   488 		if b[0] == ']' {
       
   489 			break
       
   490 		}
       
   491 
       
   492 		if b[0] == ',' {
       
   493 			if first {
       
   494 				return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
       
   495 			}
       
   496 			b = b[1:]
       
   497 
       
   498 			b, err = p.parseOptionalWhitespaceCommentNewline(b)
       
   499 			if err != nil {
       
   500 				return parent, nil, err
       
   501 			}
       
   502 		} else if !first {
       
   503 			return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
       
   504 		}
       
   505 
       
   506 		// TOML allows trailing commas in arrays.
       
   507 		if len(b) > 0 && b[0] == ']' {
       
   508 			break
       
   509 		}
       
   510 
       
   511 		var valueRef reference
       
   512 		valueRef, b, err = p.parseVal(b)
       
   513 		if err != nil {
       
   514 			return parent, nil, err
       
   515 		}
       
   516 
       
   517 		if first {
       
   518 			p.builder.AttachChild(parent, valueRef)
       
   519 		} else {
       
   520 			p.builder.Chain(lastChild, valueRef)
       
   521 		}
       
   522 		lastChild = valueRef
       
   523 
       
   524 		b, err = p.parseOptionalWhitespaceCommentNewline(b)
       
   525 		if err != nil {
       
   526 			return parent, nil, err
       
   527 		}
       
   528 		first = false
       
   529 	}
       
   530 
       
   531 	rest, err := expect(']', b)
       
   532 
       
   533 	return parent, rest, err
       
   534 }
       
   535 
       
   536 func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
       
   537 	for len(b) > 0 {
       
   538 		var err error
       
   539 		b = p.parseWhitespace(b)
       
   540 
       
   541 		if len(b) > 0 && b[0] == '#' {
       
   542 			_, b, err = scanComment(b)
       
   543 			if err != nil {
       
   544 				return nil, err
       
   545 			}
       
   546 		}
       
   547 
       
   548 		if len(b) == 0 {
       
   549 			break
       
   550 		}
       
   551 
       
   552 		if b[0] == '\n' || b[0] == '\r' {
       
   553 			b, err = p.parseNewline(b)
       
   554 			if err != nil {
       
   555 				return nil, err
       
   556 			}
       
   557 		} else {
       
   558 			break
       
   559 		}
       
   560 	}
       
   561 
       
   562 	return b, nil
       
   563 }
       
   564 
       
   565 func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
       
   566 	token, rest, err := scanMultilineLiteralString(b)
       
   567 	if err != nil {
       
   568 		return nil, nil, nil, err
       
   569 	}
       
   570 
       
   571 	i := 3
       
   572 
       
   573 	// skip the immediate new line
       
   574 	if token[i] == '\n' {
       
   575 		i++
       
   576 	} else if token[i] == '\r' && token[i+1] == '\n' {
       
   577 		i += 2
       
   578 	}
       
   579 
       
   580 	return token, token[i : len(token)-3], rest, err
       
   581 }
       
   582 
       
   583 //nolint:funlen,gocognit,cyclop
       
   584 func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
       
   585 	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
       
   586 	// ml-basic-string-delim
       
   587 	// ml-basic-string-delim = 3quotation-mark
       
   588 	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
       
   589 	//
       
   590 	// mlb-content = mlb-char / newline / mlb-escaped-nl
       
   591 	// mlb-char = mlb-unescaped / escaped
       
   592 	// mlb-quotes = 1*2quotation-mark
       
   593 	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   594 	// mlb-escaped-nl = escape ws newline *( wschar / newline )
       
   595 	token, escaped, rest, err := scanMultilineBasicString(b)
       
   596 	if err != nil {
       
   597 		return nil, nil, nil, err
       
   598 	}
       
   599 
       
   600 	i := 3
       
   601 
       
   602 	// skip the immediate new line
       
   603 	if token[i] == '\n' {
       
   604 		i++
       
   605 	} else if token[i] == '\r' && token[i+1] == '\n' {
       
   606 		i += 2
       
   607 	}
       
   608 
       
   609 	// fast path
       
   610 	startIdx := i
       
   611 	endIdx := len(token) - len(`"""`)
       
   612 
       
   613 	if !escaped {
       
   614 		str := token[startIdx:endIdx]
       
   615 		verr := characters.Utf8TomlValidAlreadyEscaped(str)
       
   616 		if verr.Zero() {
       
   617 			return token, str, rest, nil
       
   618 		}
       
   619 		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
       
   620 	}
       
   621 
       
   622 	var builder bytes.Buffer
       
   623 
       
   624 	// The scanner ensures that the token starts and ends with quotes and that
       
   625 	// escapes are balanced.
       
   626 	for i < len(token)-3 {
       
   627 		c := token[i]
       
   628 
       
   629 		//nolint:nestif
       
   630 		if c == '\\' {
       
   631 			// When the last non-whitespace character on a line is an unescaped \,
       
   632 			// it will be trimmed along with all whitespace (including newlines) up
       
   633 			// to the next non-whitespace character or closing delimiter.
       
   634 
       
   635 			isLastNonWhitespaceOnLine := false
       
   636 			j := 1
       
   637 		findEOLLoop:
       
   638 			for ; j < len(token)-3-i; j++ {
       
   639 				switch token[i+j] {
       
   640 				case ' ', '\t':
       
   641 					continue
       
   642 				case '\r':
       
   643 					if token[i+j+1] == '\n' {
       
   644 						continue
       
   645 					}
       
   646 				case '\n':
       
   647 					isLastNonWhitespaceOnLine = true
       
   648 				}
       
   649 				break findEOLLoop
       
   650 			}
       
   651 			if isLastNonWhitespaceOnLine {
       
   652 				i += j
       
   653 				for ; i < len(token)-3; i++ {
       
   654 					c := token[i]
       
   655 					if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
       
   656 						i--
       
   657 						break
       
   658 					}
       
   659 				}
       
   660 				i++
       
   661 				continue
       
   662 			}
       
   663 
       
   664 			// handle escaping
       
   665 			i++
       
   666 			c = token[i]
       
   667 
       
   668 			switch c {
       
   669 			case '"', '\\':
       
   670 				builder.WriteByte(c)
       
   671 			case 'b':
       
   672 				builder.WriteByte('\b')
       
   673 			case 'f':
       
   674 				builder.WriteByte('\f')
       
   675 			case 'n':
       
   676 				builder.WriteByte('\n')
       
   677 			case 'r':
       
   678 				builder.WriteByte('\r')
       
   679 			case 't':
       
   680 				builder.WriteByte('\t')
       
   681 			case 'e':
       
   682 				builder.WriteByte(0x1B)
       
   683 			case 'u':
       
   684 				x, err := hexToRune(atmost(token[i+1:], 4), 4)
       
   685 				if err != nil {
       
   686 					return nil, nil, nil, err
       
   687 				}
       
   688 				builder.WriteRune(x)
       
   689 				i += 4
       
   690 			case 'U':
       
   691 				x, err := hexToRune(atmost(token[i+1:], 8), 8)
       
   692 				if err != nil {
       
   693 					return nil, nil, nil, err
       
   694 				}
       
   695 
       
   696 				builder.WriteRune(x)
       
   697 				i += 8
       
   698 			default:
       
   699 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
       
   700 			}
       
   701 			i++
       
   702 		} else {
       
   703 			size := characters.Utf8ValidNext(token[i:])
       
   704 			if size == 0 {
       
   705 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
       
   706 			}
       
   707 			builder.Write(token[i : i+size])
       
   708 			i += size
       
   709 		}
       
   710 	}
       
   711 
       
   712 	return token, builder.Bytes(), rest, nil
       
   713 }
       
   714 
       
   715 func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
       
   716 	// key = simple-key / dotted-key
       
   717 	// simple-key = quoted-key / unquoted-key
       
   718 	//
       
   719 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
       
   720 	// quoted-key = basic-string / literal-string
       
   721 	// dotted-key = simple-key 1*( dot-sep simple-key )
       
   722 	//
       
   723 	// dot-sep   = ws %x2E ws  ; . Period
       
   724 	raw, key, b, err := p.parseSimpleKey(b)
       
   725 	if err != nil {
       
   726 		return invalidReference, nil, err
       
   727 	}
       
   728 
       
   729 	ref := p.builder.Push(Node{
       
   730 		Kind: Key,
       
   731 		Raw:  p.Range(raw),
       
   732 		Data: key,
       
   733 	})
       
   734 
       
   735 	for {
       
   736 		b = p.parseWhitespace(b)
       
   737 		if len(b) > 0 && b[0] == '.' {
       
   738 			b = p.parseWhitespace(b[1:])
       
   739 
       
   740 			raw, key, b, err = p.parseSimpleKey(b)
       
   741 			if err != nil {
       
   742 				return ref, nil, err
       
   743 			}
       
   744 
       
   745 			p.builder.PushAndChain(Node{
       
   746 				Kind: Key,
       
   747 				Raw:  p.Range(raw),
       
   748 				Data: key,
       
   749 			})
       
   750 		} else {
       
   751 			break
       
   752 		}
       
   753 	}
       
   754 
       
   755 	return ref, b, nil
       
   756 }
       
   757 
       
   758 func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
       
   759 	if len(b) == 0 {
       
   760 		return nil, nil, nil, NewParserError(b, "expected key but found none")
       
   761 	}
       
   762 
       
   763 	// simple-key = quoted-key / unquoted-key
       
   764 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
       
   765 	// quoted-key = basic-string / literal-string
       
   766 	switch {
       
   767 	case b[0] == '\'':
       
   768 		return p.parseLiteralString(b)
       
   769 	case b[0] == '"':
       
   770 		return p.parseBasicString(b)
       
   771 	case isUnquotedKeyChar(b[0]):
       
   772 		key, rest = scanUnquotedKey(b)
       
   773 		return key, key, rest, nil
       
   774 	default:
       
   775 		return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
       
   776 	}
       
   777 }
       
   778 
       
   779 //nolint:funlen,cyclop
       
   780 func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
       
   781 	// basic-string = quotation-mark *basic-char quotation-mark
       
   782 	// quotation-mark = %x22            ; "
       
   783 	// basic-char = basic-unescaped / escaped
       
   784 	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   785 	// escaped = escape escape-seq-char
       
   786 	// escape-seq-char =  %x22         ; "    quotation mark  U+0022
       
   787 	// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
       
   788 	// escape-seq-char =/ %x62         ; b    backspace       U+0008
       
   789 	// escape-seq-char =/ %x66         ; f    form feed       U+000C
       
   790 	// escape-seq-char =/ %x6E         ; n    line feed       U+000A
       
   791 	// escape-seq-char =/ %x72         ; r    carriage return U+000D
       
   792 	// escape-seq-char =/ %x74         ; t    tab             U+0009
       
   793 	// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
       
   794 	// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
       
   795 	token, escaped, rest, err := scanBasicString(b)
       
   796 	if err != nil {
       
   797 		return nil, nil, nil, err
       
   798 	}
       
   799 
       
   800 	startIdx := len(`"`)
       
   801 	endIdx := len(token) - len(`"`)
       
   802 
       
   803 	// Fast path. If there is no escape sequence, the string should just be
       
   804 	// an UTF-8 encoded string, which is the same as Go. In that case,
       
   805 	// validate the string and return a direct reference to the buffer.
       
   806 	if !escaped {
       
   807 		str := token[startIdx:endIdx]
       
   808 		verr := characters.Utf8TomlValidAlreadyEscaped(str)
       
   809 		if verr.Zero() {
       
   810 			return token, str, rest, nil
       
   811 		}
       
   812 		return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
       
   813 	}
       
   814 
       
   815 	i := startIdx
       
   816 
       
   817 	var builder bytes.Buffer
       
   818 
       
   819 	// The scanner ensures that the token starts and ends with quotes and that
       
   820 	// escapes are balanced.
       
   821 	for i < len(token)-1 {
       
   822 		c := token[i]
       
   823 		if c == '\\' {
       
   824 			i++
       
   825 			c = token[i]
       
   826 
       
   827 			switch c {
       
   828 			case '"', '\\':
       
   829 				builder.WriteByte(c)
       
   830 			case 'b':
       
   831 				builder.WriteByte('\b')
       
   832 			case 'f':
       
   833 				builder.WriteByte('\f')
       
   834 			case 'n':
       
   835 				builder.WriteByte('\n')
       
   836 			case 'r':
       
   837 				builder.WriteByte('\r')
       
   838 			case 't':
       
   839 				builder.WriteByte('\t')
       
   840 			case 'e':
       
   841 				builder.WriteByte(0x1B)
       
   842 			case 'u':
       
   843 				x, err := hexToRune(token[i+1:len(token)-1], 4)
       
   844 				if err != nil {
       
   845 					return nil, nil, nil, err
       
   846 				}
       
   847 
       
   848 				builder.WriteRune(x)
       
   849 				i += 4
       
   850 			case 'U':
       
   851 				x, err := hexToRune(token[i+1:len(token)-1], 8)
       
   852 				if err != nil {
       
   853 					return nil, nil, nil, err
       
   854 				}
       
   855 
       
   856 				builder.WriteRune(x)
       
   857 				i += 8
       
   858 			default:
       
   859 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
       
   860 			}
       
   861 			i++
       
   862 		} else {
       
   863 			size := characters.Utf8ValidNext(token[i:])
       
   864 			if size == 0 {
       
   865 				return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
       
   866 			}
       
   867 			builder.Write(token[i : i+size])
       
   868 			i += size
       
   869 		}
       
   870 	}
       
   871 
       
   872 	return token, builder.Bytes(), rest, nil
       
   873 }
       
   874 
       
   875 func hexToRune(b []byte, length int) (rune, error) {
       
   876 	if len(b) < length {
       
   877 		return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
       
   878 	}
       
   879 	b = b[:length]
       
   880 
       
   881 	var r uint32
       
   882 	for i, c := range b {
       
   883 		d := uint32(0)
       
   884 		switch {
       
   885 		case '0' <= c && c <= '9':
       
   886 			d = uint32(c - '0')
       
   887 		case 'a' <= c && c <= 'f':
       
   888 			d = uint32(c - 'a' + 10)
       
   889 		case 'A' <= c && c <= 'F':
       
   890 			d = uint32(c - 'A' + 10)
       
   891 		default:
       
   892 			return -1, NewParserError(b[i:i+1], "non-hex character")
       
   893 		}
       
   894 		r = r*16 + d
       
   895 	}
       
   896 
       
   897 	if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
       
   898 		return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
       
   899 	}
       
   900 
       
   901 	return rune(r), nil
       
   902 }
       
   903 
       
   904 func (p *Parser) parseWhitespace(b []byte) []byte {
       
   905 	// ws = *wschar
       
   906 	// wschar =  %x20  ; Space
       
   907 	// wschar =/ %x09  ; Horizontal tab
       
   908 	_, rest := scanWhitespace(b)
       
   909 
       
   910 	return rest
       
   911 }
       
   912 
       
   913 //nolint:cyclop
       
   914 func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
       
   915 	switch b[0] {
       
   916 	case 'i':
       
   917 		if !scanFollowsInf(b) {
       
   918 			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
       
   919 		}
       
   920 
       
   921 		return p.builder.Push(Node{
       
   922 			Kind: Float,
       
   923 			Data: b[:3],
       
   924 		}), b[3:], nil
       
   925 	case 'n':
       
   926 		if !scanFollowsNan(b) {
       
   927 			return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
       
   928 		}
       
   929 
       
   930 		return p.builder.Push(Node{
       
   931 			Kind: Float,
       
   932 			Data: b[:3],
       
   933 		}), b[3:], nil
       
   934 	case '+', '-':
       
   935 		return p.scanIntOrFloat(b)
       
   936 	}
       
   937 
       
   938 	if len(b) < 3 {
       
   939 		return p.scanIntOrFloat(b)
       
   940 	}
       
   941 
       
   942 	s := 5
       
   943 	if len(b) < s {
       
   944 		s = len(b)
       
   945 	}
       
   946 
       
   947 	for idx, c := range b[:s] {
       
   948 		if isDigit(c) {
       
   949 			continue
       
   950 		}
       
   951 
       
   952 		if idx == 2 && c == ':' || (idx == 4 && c == '-') {
       
   953 			return p.scanDateTime(b)
       
   954 		}
       
   955 
       
   956 		break
       
   957 	}
       
   958 
       
   959 	return p.scanIntOrFloat(b)
       
   960 }
       
   961 
       
   962 func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
       
   963 	// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
       
   964 	// followed by a digit.
       
   965 	hasDate := false
       
   966 	hasTime := false
       
   967 	hasTz := false
       
   968 	seenSpace := false
       
   969 
       
   970 	i := 0
       
   971 byteLoop:
       
   972 	for ; i < len(b); i++ {
       
   973 		c := b[i]
       
   974 
       
   975 		switch {
       
   976 		case isDigit(c):
       
   977 		case c == '-':
       
   978 			hasDate = true
       
   979 			const minOffsetOfTz = 8
       
   980 			if i >= minOffsetOfTz {
       
   981 				hasTz = true
       
   982 			}
       
   983 		case c == 'T' || c == 't' || c == ':' || c == '.':
       
   984 			hasTime = true
       
   985 		case c == '+' || c == '-' || c == 'Z' || c == 'z':
       
   986 			hasTz = true
       
   987 		case c == ' ':
       
   988 			if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
       
   989 				i += 2
       
   990 				// Avoid reaching past the end of the document in case the time
       
   991 				// is malformed. See TestIssue585.
       
   992 				if i >= len(b) {
       
   993 					i--
       
   994 				}
       
   995 				seenSpace = true
       
   996 				hasTime = true
       
   997 			} else {
       
   998 				break byteLoop
       
   999 			}
       
  1000 		default:
       
  1001 			break byteLoop
       
  1002 		}
       
  1003 	}
       
  1004 
       
  1005 	var kind Kind
       
  1006 
       
  1007 	if hasTime {
       
  1008 		if hasDate {
       
  1009 			if hasTz {
       
  1010 				kind = DateTime
       
  1011 			} else {
       
  1012 				kind = LocalDateTime
       
  1013 			}
       
  1014 		} else {
       
  1015 			kind = LocalTime
       
  1016 		}
       
  1017 	} else {
       
  1018 		kind = LocalDate
       
  1019 	}
       
  1020 
       
  1021 	return p.builder.Push(Node{
       
  1022 		Kind: kind,
       
  1023 		Data: b[:i],
       
  1024 	}), b[i:], nil
       
  1025 }
       
  1026 
       
  1027 //nolint:funlen,gocognit,cyclop
       
  1028 func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
       
  1029 	i := 0
       
  1030 
       
  1031 	if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
       
  1032 		var isValidRune validRuneFn
       
  1033 
       
  1034 		switch b[1] {
       
  1035 		case 'x':
       
  1036 			isValidRune = isValidHexRune
       
  1037 		case 'o':
       
  1038 			isValidRune = isValidOctalRune
       
  1039 		case 'b':
       
  1040 			isValidRune = isValidBinaryRune
       
  1041 		default:
       
  1042 			i++
       
  1043 		}
       
  1044 
       
  1045 		if isValidRune != nil {
       
  1046 			i += 2
       
  1047 			for ; i < len(b); i++ {
       
  1048 				if !isValidRune(b[i]) {
       
  1049 					break
       
  1050 				}
       
  1051 			}
       
  1052 		}
       
  1053 
       
  1054 		return p.builder.Push(Node{
       
  1055 			Kind: Integer,
       
  1056 			Data: b[:i],
       
  1057 		}), b[i:], nil
       
  1058 	}
       
  1059 
       
  1060 	isFloat := false
       
  1061 
       
  1062 	for ; i < len(b); i++ {
       
  1063 		c := b[i]
       
  1064 
       
  1065 		if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
       
  1066 			continue
       
  1067 		}
       
  1068 
       
  1069 		if c == '.' || c == 'e' || c == 'E' {
       
  1070 			isFloat = true
       
  1071 
       
  1072 			continue
       
  1073 		}
       
  1074 
       
  1075 		if c == 'i' {
       
  1076 			if scanFollowsInf(b[i:]) {
       
  1077 				return p.builder.Push(Node{
       
  1078 					Kind: Float,
       
  1079 					Data: b[:i+3],
       
  1080 				}), b[i+3:], nil
       
  1081 			}
       
  1082 
       
  1083 			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
       
  1084 		}
       
  1085 
       
  1086 		if c == 'n' {
       
  1087 			if scanFollowsNan(b[i:]) {
       
  1088 				return p.builder.Push(Node{
       
  1089 					Kind: Float,
       
  1090 					Data: b[:i+3],
       
  1091 				}), b[i+3:], nil
       
  1092 			}
       
  1093 
       
  1094 			return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
       
  1095 		}
       
  1096 
       
  1097 		break
       
  1098 	}
       
  1099 
       
  1100 	if i == 0 {
       
  1101 		return invalidReference, b, NewParserError(b, "incomplete number")
       
  1102 	}
       
  1103 
       
  1104 	kind := Integer
       
  1105 
       
  1106 	if isFloat {
       
  1107 		kind = Float
       
  1108 	}
       
  1109 
       
  1110 	return p.builder.Push(Node{
       
  1111 		Kind: kind,
       
  1112 		Data: b[:i],
       
  1113 	}), b[i:], nil
       
  1114 }
       
  1115 
       
  1116 func isDigit(r byte) bool {
       
  1117 	return r >= '0' && r <= '9'
       
  1118 }
       
  1119 
       
  1120 type validRuneFn func(r byte) bool
       
  1121 
       
  1122 func isValidHexRune(r byte) bool {
       
  1123 	return r >= 'a' && r <= 'f' ||
       
  1124 		r >= 'A' && r <= 'F' ||
       
  1125 		r >= '0' && r <= '9' ||
       
  1126 		r == '_'
       
  1127 }
       
  1128 
       
  1129 func isValidOctalRune(r byte) bool {
       
  1130 	return r >= '0' && r <= '7' || r == '_'
       
  1131 }
       
  1132 
       
  1133 func isValidBinaryRune(r byte) bool {
       
  1134 	return r == '0' || r == '1' || r == '_'
       
  1135 }
       
  1136 
       
  1137 func expect(x byte, b []byte) ([]byte, error) {
       
  1138 	if len(b) == 0 {
       
  1139 		return nil, NewParserError(b, "expected character %c but the document ended here", x)
       
  1140 	}
       
  1141 
       
  1142 	if b[0] != x {
       
  1143 		return nil, NewParserError(b[0:1], "expected character %c", x)
       
  1144 	}
       
  1145 
       
  1146 	return b[1:], nil
       
  1147 }