vendor/github.com/pelletier/go-toml/lexer.go
changeset 265 05c40b36d3b2
parent 264 8f478162d991
child 266 80973a656b81
equal deleted inserted replaced
264:8f478162d991 265:05c40b36d3b2
     1 // TOML lexer.
       
     2 //
       
     3 // Written using the principles developed by Rob Pike in
       
     4 // http://www.youtube.com/watch?v=HxaD_trXwRE
       
     5 
       
     6 package toml
       
     7 
       
     8 import (
       
     9 	"bytes"
       
    10 	"errors"
       
    11 	"fmt"
       
    12 	"strconv"
       
    13 	"strings"
       
    14 )
       
    15 
       
    16 // Define state functions
       
    17 type tomlLexStateFn func() tomlLexStateFn
       
    18 
       
    19 // Define lexer
       
    20 type tomlLexer struct {
       
    21 	inputIdx          int
       
    22 	input             []rune // Textual source
       
    23 	currentTokenStart int
       
    24 	currentTokenStop  int
       
    25 	tokens            []token
       
    26 	brackets          []rune
       
    27 	line              int
       
    28 	col               int
       
    29 	endbufferLine     int
       
    30 	endbufferCol      int
       
    31 }
       
    32 
       
    33 // Basic read operations on input
       
    34 
       
    35 func (l *tomlLexer) read() rune {
       
    36 	r := l.peek()
       
    37 	if r == '\n' {
       
    38 		l.endbufferLine++
       
    39 		l.endbufferCol = 1
       
    40 	} else {
       
    41 		l.endbufferCol++
       
    42 	}
       
    43 	l.inputIdx++
       
    44 	return r
       
    45 }
       
    46 
       
    47 func (l *tomlLexer) next() rune {
       
    48 	r := l.read()
       
    49 
       
    50 	if r != eof {
       
    51 		l.currentTokenStop++
       
    52 	}
       
    53 	return r
       
    54 }
       
    55 
       
    56 func (l *tomlLexer) ignore() {
       
    57 	l.currentTokenStart = l.currentTokenStop
       
    58 	l.line = l.endbufferLine
       
    59 	l.col = l.endbufferCol
       
    60 }
       
    61 
       
    62 func (l *tomlLexer) skip() {
       
    63 	l.next()
       
    64 	l.ignore()
       
    65 }
       
    66 
       
    67 func (l *tomlLexer) fastForward(n int) {
       
    68 	for i := 0; i < n; i++ {
       
    69 		l.next()
       
    70 	}
       
    71 }
       
    72 
       
    73 func (l *tomlLexer) emitWithValue(t tokenType, value string) {
       
    74 	l.tokens = append(l.tokens, token{
       
    75 		Position: Position{l.line, l.col},
       
    76 		typ:      t,
       
    77 		val:      value,
       
    78 	})
       
    79 	l.ignore()
       
    80 }
       
    81 
       
    82 func (l *tomlLexer) emit(t tokenType) {
       
    83 	l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
       
    84 }
       
    85 
       
    86 func (l *tomlLexer) peek() rune {
       
    87 	if l.inputIdx >= len(l.input) {
       
    88 		return eof
       
    89 	}
       
    90 	return l.input[l.inputIdx]
       
    91 }
       
    92 
       
    93 func (l *tomlLexer) peekString(size int) string {
       
    94 	maxIdx := len(l.input)
       
    95 	upperIdx := l.inputIdx + size // FIXME: potential overflow
       
    96 	if upperIdx > maxIdx {
       
    97 		upperIdx = maxIdx
       
    98 	}
       
    99 	return string(l.input[l.inputIdx:upperIdx])
       
   100 }
       
   101 
       
   102 func (l *tomlLexer) follow(next string) bool {
       
   103 	return next == l.peekString(len(next))
       
   104 }
       
   105 
       
   106 // Error management
       
   107 
       
   108 func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
       
   109 	l.tokens = append(l.tokens, token{
       
   110 		Position: Position{l.line, l.col},
       
   111 		typ:      tokenError,
       
   112 		val:      fmt.Sprintf(format, args...),
       
   113 	})
       
   114 	return nil
       
   115 }
       
   116 
       
   117 // State functions
       
   118 
       
   119 func (l *tomlLexer) lexVoid() tomlLexStateFn {
       
   120 	for {
       
   121 		next := l.peek()
       
   122 		switch next {
       
   123 		case '}': // after '{'
       
   124 			return l.lexRightCurlyBrace
       
   125 		case '[':
       
   126 			return l.lexTableKey
       
   127 		case '#':
       
   128 			return l.lexComment(l.lexVoid)
       
   129 		case '=':
       
   130 			return l.lexEqual
       
   131 		case '\r':
       
   132 			fallthrough
       
   133 		case '\n':
       
   134 			l.skip()
       
   135 			continue
       
   136 		}
       
   137 
       
   138 		if isSpace(next) {
       
   139 			l.skip()
       
   140 		}
       
   141 
       
   142 		if isKeyStartChar(next) {
       
   143 			return l.lexKey
       
   144 		}
       
   145 
       
   146 		if next == eof {
       
   147 			l.next()
       
   148 			break
       
   149 		}
       
   150 	}
       
   151 
       
   152 	l.emit(tokenEOF)
       
   153 	return nil
       
   154 }
       
   155 
       
   156 func (l *tomlLexer) lexRvalue() tomlLexStateFn {
       
   157 	for {
       
   158 		next := l.peek()
       
   159 		switch next {
       
   160 		case '.':
       
   161 			return l.errorf("cannot start float with a dot")
       
   162 		case '=':
       
   163 			return l.lexEqual
       
   164 		case '[':
       
   165 			return l.lexLeftBracket
       
   166 		case ']':
       
   167 			return l.lexRightBracket
       
   168 		case '{':
       
   169 			return l.lexLeftCurlyBrace
       
   170 		case '}':
       
   171 			return l.lexRightCurlyBrace
       
   172 		case '#':
       
   173 			return l.lexComment(l.lexRvalue)
       
   174 		case '"':
       
   175 			return l.lexString
       
   176 		case '\'':
       
   177 			return l.lexLiteralString
       
   178 		case ',':
       
   179 			return l.lexComma
       
   180 		case '\r':
       
   181 			fallthrough
       
   182 		case '\n':
       
   183 			l.skip()
       
   184 			if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
       
   185 				return l.lexRvalue
       
   186 			}
       
   187 			return l.lexVoid
       
   188 		}
       
   189 
       
   190 		if l.follow("true") {
       
   191 			return l.lexTrue
       
   192 		}
       
   193 
       
   194 		if l.follow("false") {
       
   195 			return l.lexFalse
       
   196 		}
       
   197 
       
   198 		if l.follow("inf") {
       
   199 			return l.lexInf
       
   200 		}
       
   201 
       
   202 		if l.follow("nan") {
       
   203 			return l.lexNan
       
   204 		}
       
   205 
       
   206 		if isSpace(next) {
       
   207 			l.skip()
       
   208 			continue
       
   209 		}
       
   210 
       
   211 		if next == eof {
       
   212 			l.next()
       
   213 			break
       
   214 		}
       
   215 
       
   216 		if next == '+' || next == '-' {
       
   217 			return l.lexNumber
       
   218 		}
       
   219 
       
   220 		if isDigit(next) {
       
   221 			return l.lexDateTimeOrNumber
       
   222 		}
       
   223 
       
   224 		return l.errorf("no value can start with %c", next)
       
   225 	}
       
   226 
       
   227 	l.emit(tokenEOF)
       
   228 	return nil
       
   229 }
       
   230 
       
   231 func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn {
       
   232 	// Could be either a date/time, or a digit.
       
   233 	// The options for date/times are:
       
   234 	//   YYYY-... => date or date-time
       
   235 	//   HH:... => time
       
   236 	// Anything else should be a number.
       
   237 
       
   238 	lookAhead := l.peekString(5)
       
   239 	if len(lookAhead) < 3 {
       
   240 		return l.lexNumber()
       
   241 	}
       
   242 
       
   243 	for idx, r := range lookAhead {
       
   244 		if !isDigit(r) {
       
   245 			if idx == 2 && r == ':' {
       
   246 				return l.lexDateTimeOrTime()
       
   247 			}
       
   248 			if idx == 4 && r == '-' {
       
   249 				return l.lexDateTimeOrTime()
       
   250 			}
       
   251 			return l.lexNumber()
       
   252 		}
       
   253 	}
       
   254 	return l.lexNumber()
       
   255 }
       
   256 
       
   257 func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
       
   258 	l.next()
       
   259 	l.emit(tokenLeftCurlyBrace)
       
   260 	l.brackets = append(l.brackets, '{')
       
   261 	return l.lexVoid
       
   262 }
       
   263 
       
   264 func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
       
   265 	l.next()
       
   266 	l.emit(tokenRightCurlyBrace)
       
   267 	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
       
   268 		return l.errorf("cannot have '}' here")
       
   269 	}
       
   270 	l.brackets = l.brackets[:len(l.brackets)-1]
       
   271 	return l.lexRvalue
       
   272 }
       
   273 
       
   274 func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn {
       
   275 	// Example matches:
       
   276 	// 1979-05-27T07:32:00Z
       
   277 	// 1979-05-27T00:32:00-07:00
       
   278 	// 1979-05-27T00:32:00.999999-07:00
       
   279 	// 1979-05-27 07:32:00Z
       
   280 	// 1979-05-27 00:32:00-07:00
       
   281 	// 1979-05-27 00:32:00.999999-07:00
       
   282 	// 1979-05-27T07:32:00
       
   283 	// 1979-05-27T00:32:00.999999
       
   284 	// 1979-05-27 07:32:00
       
   285 	// 1979-05-27 00:32:00.999999
       
   286 	// 1979-05-27
       
   287 	// 07:32:00
       
   288 	// 00:32:00.999999
       
   289 
       
   290 	// we already know those two are digits
       
   291 	l.next()
       
   292 	l.next()
       
   293 
       
   294 	// Got 2 digits. At that point it could be either a time or a date(-time).
       
   295 
       
   296 	r := l.next()
       
   297 	if r == ':' {
       
   298 		return l.lexTime()
       
   299 	}
       
   300 
       
   301 	return l.lexDateTime()
       
   302 }
       
   303 
       
   304 func (l *tomlLexer) lexDateTime() tomlLexStateFn {
       
   305 	// This state accepts an offset date-time, a local date-time, or a local date.
       
   306 	//
       
   307 	//   v--- cursor
       
   308 	// 1979-05-27T07:32:00Z
       
   309 	// 1979-05-27T00:32:00-07:00
       
   310 	// 1979-05-27T00:32:00.999999-07:00
       
   311 	// 1979-05-27 07:32:00Z
       
   312 	// 1979-05-27 00:32:00-07:00
       
   313 	// 1979-05-27 00:32:00.999999-07:00
       
   314 	// 1979-05-27T07:32:00
       
   315 	// 1979-05-27T00:32:00.999999
       
   316 	// 1979-05-27 07:32:00
       
   317 	// 1979-05-27 00:32:00.999999
       
   318 	// 1979-05-27
       
   319 
       
   320 	// date
       
   321 
       
   322 	// already checked by lexRvalue
       
   323 	l.next() // digit
       
   324 	l.next() // -
       
   325 
       
   326 	for i := 0; i < 2; i++ {
       
   327 		r := l.next()
       
   328 		if !isDigit(r) {
       
   329 			return l.errorf("invalid month digit in date: %c", r)
       
   330 		}
       
   331 	}
       
   332 
       
   333 	r := l.next()
       
   334 	if r != '-' {
       
   335 		return l.errorf("expected - to separate month of a date, not %c", r)
       
   336 	}
       
   337 
       
   338 	for i := 0; i < 2; i++ {
       
   339 		r := l.next()
       
   340 		if !isDigit(r) {
       
   341 			return l.errorf("invalid day digit in date: %c", r)
       
   342 		}
       
   343 	}
       
   344 
       
   345 	l.emit(tokenLocalDate)
       
   346 
       
   347 	r = l.peek()
       
   348 
       
   349 	if r == eof {
       
   350 
       
   351 		return l.lexRvalue
       
   352 	}
       
   353 
       
   354 	if r != ' ' && r != 'T' {
       
   355 		return l.errorf("incorrect date/time separation character: %c", r)
       
   356 	}
       
   357 
       
   358 	if r == ' ' {
       
   359 		lookAhead := l.peekString(3)[1:]
       
   360 		if len(lookAhead) < 2 {
       
   361 			return l.lexRvalue
       
   362 		}
       
   363 		for _, r := range lookAhead {
       
   364 			if !isDigit(r) {
       
   365 				return l.lexRvalue
       
   366 			}
       
   367 		}
       
   368 	}
       
   369 
       
   370 	l.skip() // skip the T or ' '
       
   371 
       
   372 	// time
       
   373 
       
   374 	for i := 0; i < 2; i++ {
       
   375 		r := l.next()
       
   376 		if !isDigit(r) {
       
   377 			return l.errorf("invalid hour digit in time: %c", r)
       
   378 		}
       
   379 	}
       
   380 
       
   381 	r = l.next()
       
   382 	if r != ':' {
       
   383 		return l.errorf("time hour/minute separator should be :, not %c", r)
       
   384 	}
       
   385 
       
   386 	for i := 0; i < 2; i++ {
       
   387 		r := l.next()
       
   388 		if !isDigit(r) {
       
   389 			return l.errorf("invalid minute digit in time: %c", r)
       
   390 		}
       
   391 	}
       
   392 
       
   393 	r = l.next()
       
   394 	if r != ':' {
       
   395 		return l.errorf("time minute/second separator should be :, not %c", r)
       
   396 	}
       
   397 
       
   398 	for i := 0; i < 2; i++ {
       
   399 		r := l.next()
       
   400 		if !isDigit(r) {
       
   401 			return l.errorf("invalid second digit in time: %c", r)
       
   402 		}
       
   403 	}
       
   404 
       
   405 	r = l.peek()
       
   406 	if r == '.' {
       
   407 		l.next()
       
   408 		r := l.next()
       
   409 		if !isDigit(r) {
       
   410 			return l.errorf("expected at least one digit in time's fraction, not %c", r)
       
   411 		}
       
   412 
       
   413 		for {
       
   414 			r := l.peek()
       
   415 			if !isDigit(r) {
       
   416 				break
       
   417 			}
       
   418 			l.next()
       
   419 		}
       
   420 	}
       
   421 
       
   422 	l.emit(tokenLocalTime)
       
   423 
       
   424 	return l.lexTimeOffset
       
   425 
       
   426 }
       
   427 
       
   428 func (l *tomlLexer) lexTimeOffset() tomlLexStateFn {
       
   429 	// potential offset
       
   430 
       
   431 	// Z
       
   432 	// -07:00
       
   433 	// +07:00
       
   434 	// nothing
       
   435 
       
   436 	r := l.peek()
       
   437 
       
   438 	if r == 'Z' {
       
   439 		l.next()
       
   440 		l.emit(tokenTimeOffset)
       
   441 	} else if r == '+' || r == '-' {
       
   442 		l.next()
       
   443 
       
   444 		for i := 0; i < 2; i++ {
       
   445 			r := l.next()
       
   446 			if !isDigit(r) {
       
   447 				return l.errorf("invalid hour digit in time offset: %c", r)
       
   448 			}
       
   449 		}
       
   450 
       
   451 		r = l.next()
       
   452 		if r != ':' {
       
   453 			return l.errorf("time offset hour/minute separator should be :, not %c", r)
       
   454 		}
       
   455 
       
   456 		for i := 0; i < 2; i++ {
       
   457 			r := l.next()
       
   458 			if !isDigit(r) {
       
   459 				return l.errorf("invalid minute digit in time offset: %c", r)
       
   460 			}
       
   461 		}
       
   462 
       
   463 		l.emit(tokenTimeOffset)
       
   464 	}
       
   465 
       
   466 	return l.lexRvalue
       
   467 }
       
   468 
       
   469 func (l *tomlLexer) lexTime() tomlLexStateFn {
       
   470 	//   v--- cursor
       
   471 	// 07:32:00
       
   472 	// 00:32:00.999999
       
   473 
       
   474 	for i := 0; i < 2; i++ {
       
   475 		r := l.next()
       
   476 		if !isDigit(r) {
       
   477 			return l.errorf("invalid minute digit in time: %c", r)
       
   478 		}
       
   479 	}
       
   480 
       
   481 	r := l.next()
       
   482 	if r != ':' {
       
   483 		return l.errorf("time minute/second separator should be :, not %c", r)
       
   484 	}
       
   485 
       
   486 	for i := 0; i < 2; i++ {
       
   487 		r := l.next()
       
   488 		if !isDigit(r) {
       
   489 			return l.errorf("invalid second digit in time: %c", r)
       
   490 		}
       
   491 	}
       
   492 
       
   493 	r = l.peek()
       
   494 	if r == '.' {
       
   495 		l.next()
       
   496 		r := l.next()
       
   497 		if !isDigit(r) {
       
   498 			return l.errorf("expected at least one digit in time's fraction, not %c", r)
       
   499 		}
       
   500 
       
   501 		for {
       
   502 			r := l.peek()
       
   503 			if !isDigit(r) {
       
   504 				break
       
   505 			}
       
   506 			l.next()
       
   507 		}
       
   508 	}
       
   509 
       
   510 	l.emit(tokenLocalTime)
       
   511 	return l.lexRvalue
       
   512 
       
   513 }
       
   514 
       
   515 func (l *tomlLexer) lexTrue() tomlLexStateFn {
       
   516 	l.fastForward(4)
       
   517 	l.emit(tokenTrue)
       
   518 	return l.lexRvalue
       
   519 }
       
   520 
       
   521 func (l *tomlLexer) lexFalse() tomlLexStateFn {
       
   522 	l.fastForward(5)
       
   523 	l.emit(tokenFalse)
       
   524 	return l.lexRvalue
       
   525 }
       
   526 
       
   527 func (l *tomlLexer) lexInf() tomlLexStateFn {
       
   528 	l.fastForward(3)
       
   529 	l.emit(tokenInf)
       
   530 	return l.lexRvalue
       
   531 }
       
   532 
       
   533 func (l *tomlLexer) lexNan() tomlLexStateFn {
       
   534 	l.fastForward(3)
       
   535 	l.emit(tokenNan)
       
   536 	return l.lexRvalue
       
   537 }
       
   538 
       
   539 func (l *tomlLexer) lexEqual() tomlLexStateFn {
       
   540 	l.next()
       
   541 	l.emit(tokenEqual)
       
   542 	return l.lexRvalue
       
   543 }
       
   544 
       
   545 func (l *tomlLexer) lexComma() tomlLexStateFn {
       
   546 	l.next()
       
   547 	l.emit(tokenComma)
       
   548 	if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
       
   549 		return l.lexVoid
       
   550 	}
       
   551 	return l.lexRvalue
       
   552 }
       
   553 
       
   554 // Parse the key and emits its value without escape sequences.
       
   555 // bare keys, basic string keys and literal string keys are supported.
       
   556 func (l *tomlLexer) lexKey() tomlLexStateFn {
       
   557 	var sb strings.Builder
       
   558 
       
   559 	for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
       
   560 		if r == '"' {
       
   561 			l.next()
       
   562 			str, err := l.lexStringAsString(`"`, false, true)
       
   563 			if err != nil {
       
   564 				return l.errorf(err.Error())
       
   565 			}
       
   566 			sb.WriteString("\"")
       
   567 			sb.WriteString(str)
       
   568 			sb.WriteString("\"")
       
   569 			l.next()
       
   570 			continue
       
   571 		} else if r == '\'' {
       
   572 			l.next()
       
   573 			str, err := l.lexLiteralStringAsString(`'`, false)
       
   574 			if err != nil {
       
   575 				return l.errorf(err.Error())
       
   576 			}
       
   577 			sb.WriteString("'")
       
   578 			sb.WriteString(str)
       
   579 			sb.WriteString("'")
       
   580 			l.next()
       
   581 			continue
       
   582 		} else if r == '\n' {
       
   583 			return l.errorf("keys cannot contain new lines")
       
   584 		} else if isSpace(r) {
       
   585 			var str strings.Builder
       
   586 			str.WriteString(" ")
       
   587 
       
   588 			// skip trailing whitespace
       
   589 			l.next()
       
   590 			for r = l.peek(); isSpace(r); r = l.peek() {
       
   591 				str.WriteRune(r)
       
   592 				l.next()
       
   593 			}
       
   594 			// break loop if not a dot
       
   595 			if r != '.' {
       
   596 				break
       
   597 			}
       
   598 			str.WriteString(".")
       
   599 			// skip trailing whitespace after dot
       
   600 			l.next()
       
   601 			for r = l.peek(); isSpace(r); r = l.peek() {
       
   602 				str.WriteRune(r)
       
   603 				l.next()
       
   604 			}
       
   605 			sb.WriteString(str.String())
       
   606 			continue
       
   607 		} else if r == '.' {
       
   608 			// skip
       
   609 		} else if !isValidBareChar(r) {
       
   610 			return l.errorf("keys cannot contain %c character", r)
       
   611 		}
       
   612 		sb.WriteRune(r)
       
   613 		l.next()
       
   614 	}
       
   615 	l.emitWithValue(tokenKey, sb.String())
       
   616 	return l.lexVoid
       
   617 }
       
   618 
       
   619 func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
       
   620 	return func() tomlLexStateFn {
       
   621 		for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
       
   622 			if next == '\r' && l.follow("\r\n") {
       
   623 				break
       
   624 			}
       
   625 			l.next()
       
   626 		}
       
   627 		l.ignore()
       
   628 		return previousState
       
   629 	}
       
   630 }
       
   631 
       
   632 func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
       
   633 	l.next()
       
   634 	l.emit(tokenLeftBracket)
       
   635 	l.brackets = append(l.brackets, '[')
       
   636 	return l.lexRvalue
       
   637 }
       
   638 
       
   639 func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
       
   640 	var sb strings.Builder
       
   641 
       
   642 	if discardLeadingNewLine {
       
   643 		if l.follow("\r\n") {
       
   644 			l.skip()
       
   645 			l.skip()
       
   646 		} else if l.peek() == '\n' {
       
   647 			l.skip()
       
   648 		}
       
   649 	}
       
   650 
       
   651 	// find end of string
       
   652 	for {
       
   653 		if l.follow(terminator) {
       
   654 			return sb.String(), nil
       
   655 		}
       
   656 
       
   657 		next := l.peek()
       
   658 		if next == eof {
       
   659 			break
       
   660 		}
       
   661 		sb.WriteRune(l.next())
       
   662 	}
       
   663 
       
   664 	return "", errors.New("unclosed string")
       
   665 }
       
   666 
       
   667 func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
       
   668 	l.skip()
       
   669 
       
   670 	// handle special case for triple-quote
       
   671 	terminator := "'"
       
   672 	discardLeadingNewLine := false
       
   673 	if l.follow("''") {
       
   674 		l.skip()
       
   675 		l.skip()
       
   676 		terminator = "'''"
       
   677 		discardLeadingNewLine = true
       
   678 	}
       
   679 
       
   680 	str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
       
   681 	if err != nil {
       
   682 		return l.errorf(err.Error())
       
   683 	}
       
   684 
       
   685 	l.emitWithValue(tokenString, str)
       
   686 	l.fastForward(len(terminator))
       
   687 	l.ignore()
       
   688 	return l.lexRvalue
       
   689 }
       
   690 
       
   691 // Lex a string and return the results as a string.
       
   692 // Terminator is the substring indicating the end of the token.
       
   693 // The resulting string does not include the terminator.
       
   694 func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
       
   695 	var sb strings.Builder
       
   696 
       
   697 	if discardLeadingNewLine {
       
   698 		if l.follow("\r\n") {
       
   699 			l.skip()
       
   700 			l.skip()
       
   701 		} else if l.peek() == '\n' {
       
   702 			l.skip()
       
   703 		}
       
   704 	}
       
   705 
       
   706 	for {
       
   707 		if l.follow(terminator) {
       
   708 			return sb.String(), nil
       
   709 		}
       
   710 
       
   711 		if l.follow("\\") {
       
   712 			l.next()
       
   713 			switch l.peek() {
       
   714 			case '\r':
       
   715 				fallthrough
       
   716 			case '\n':
       
   717 				fallthrough
       
   718 			case '\t':
       
   719 				fallthrough
       
   720 			case ' ':
       
   721 				// skip all whitespace chars following backslash
       
   722 				for strings.ContainsRune("\r\n\t ", l.peek()) {
       
   723 					l.next()
       
   724 				}
       
   725 			case '"':
       
   726 				sb.WriteString("\"")
       
   727 				l.next()
       
   728 			case 'n':
       
   729 				sb.WriteString("\n")
       
   730 				l.next()
       
   731 			case 'b':
       
   732 				sb.WriteString("\b")
       
   733 				l.next()
       
   734 			case 'f':
       
   735 				sb.WriteString("\f")
       
   736 				l.next()
       
   737 			case '/':
       
   738 				sb.WriteString("/")
       
   739 				l.next()
       
   740 			case 't':
       
   741 				sb.WriteString("\t")
       
   742 				l.next()
       
   743 			case 'r':
       
   744 				sb.WriteString("\r")
       
   745 				l.next()
       
   746 			case '\\':
       
   747 				sb.WriteString("\\")
       
   748 				l.next()
       
   749 			case 'u':
       
   750 				l.next()
       
   751 				var code strings.Builder
       
   752 				for i := 0; i < 4; i++ {
       
   753 					c := l.peek()
       
   754 					if !isHexDigit(c) {
       
   755 						return "", errors.New("unfinished unicode escape")
       
   756 					}
       
   757 					l.next()
       
   758 					code.WriteRune(c)
       
   759 				}
       
   760 				intcode, err := strconv.ParseInt(code.String(), 16, 32)
       
   761 				if err != nil {
       
   762 					return "", errors.New("invalid unicode escape: \\u" + code.String())
       
   763 				}
       
   764 				sb.WriteRune(rune(intcode))
       
   765 			case 'U':
       
   766 				l.next()
       
   767 				var code strings.Builder
       
   768 				for i := 0; i < 8; i++ {
       
   769 					c := l.peek()
       
   770 					if !isHexDigit(c) {
       
   771 						return "", errors.New("unfinished unicode escape")
       
   772 					}
       
   773 					l.next()
       
   774 					code.WriteRune(c)
       
   775 				}
       
   776 				intcode, err := strconv.ParseInt(code.String(), 16, 64)
       
   777 				if err != nil {
       
   778 					return "", errors.New("invalid unicode escape: \\U" + code.String())
       
   779 				}
       
   780 				sb.WriteRune(rune(intcode))
       
   781 			default:
       
   782 				return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
       
   783 			}
       
   784 		} else {
       
   785 			r := l.peek()
       
   786 
       
   787 			if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
       
   788 				return "", fmt.Errorf("unescaped control character %U", r)
       
   789 			}
       
   790 			l.next()
       
   791 			sb.WriteRune(r)
       
   792 		}
       
   793 
       
   794 		if l.peek() == eof {
       
   795 			break
       
   796 		}
       
   797 	}
       
   798 
       
   799 	return "", errors.New("unclosed string")
       
   800 }
       
   801 
       
   802 func (l *tomlLexer) lexString() tomlLexStateFn {
       
   803 	l.skip()
       
   804 
       
   805 	// handle special case for triple-quote
       
   806 	terminator := `"`
       
   807 	discardLeadingNewLine := false
       
   808 	acceptNewLines := false
       
   809 	if l.follow(`""`) {
       
   810 		l.skip()
       
   811 		l.skip()
       
   812 		terminator = `"""`
       
   813 		discardLeadingNewLine = true
       
   814 		acceptNewLines = true
       
   815 	}
       
   816 
       
   817 	str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
       
   818 	if err != nil {
       
   819 		return l.errorf(err.Error())
       
   820 	}
       
   821 
       
   822 	l.emitWithValue(tokenString, str)
       
   823 	l.fastForward(len(terminator))
       
   824 	l.ignore()
       
   825 	return l.lexRvalue
       
   826 }
       
   827 
       
   828 func (l *tomlLexer) lexTableKey() tomlLexStateFn {
       
   829 	l.next()
       
   830 
       
   831 	if l.peek() == '[' {
       
   832 		// token '[[' signifies an array of tables
       
   833 		l.next()
       
   834 		l.emit(tokenDoubleLeftBracket)
       
   835 		return l.lexInsideTableArrayKey
       
   836 	}
       
   837 	// vanilla table key
       
   838 	l.emit(tokenLeftBracket)
       
   839 	return l.lexInsideTableKey
       
   840 }
       
   841 
       
   842 // Parse the key till "]]", but only bare keys are supported
       
   843 func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
       
   844 	for r := l.peek(); r != eof; r = l.peek() {
       
   845 		switch r {
       
   846 		case ']':
       
   847 			if l.currentTokenStop > l.currentTokenStart {
       
   848 				l.emit(tokenKeyGroupArray)
       
   849 			}
       
   850 			l.next()
       
   851 			if l.peek() != ']' {
       
   852 				break
       
   853 			}
       
   854 			l.next()
       
   855 			l.emit(tokenDoubleRightBracket)
       
   856 			return l.lexVoid
       
   857 		case '[':
       
   858 			return l.errorf("table array key cannot contain ']'")
       
   859 		default:
       
   860 			l.next()
       
   861 		}
       
   862 	}
       
   863 	return l.errorf("unclosed table array key")
       
   864 }
       
   865 
       
   866 // Parse the key till "]" but only bare keys are supported
       
   867 func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
       
   868 	for r := l.peek(); r != eof; r = l.peek() {
       
   869 		switch r {
       
   870 		case ']':
       
   871 			if l.currentTokenStop > l.currentTokenStart {
       
   872 				l.emit(tokenKeyGroup)
       
   873 			}
       
   874 			l.next()
       
   875 			l.emit(tokenRightBracket)
       
   876 			return l.lexVoid
       
   877 		case '[':
       
   878 			return l.errorf("table key cannot contain ']'")
       
   879 		default:
       
   880 			l.next()
       
   881 		}
       
   882 	}
       
   883 	return l.errorf("unclosed table key")
       
   884 }
       
   885 
       
   886 func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
       
   887 	l.next()
       
   888 	l.emit(tokenRightBracket)
       
   889 	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
       
   890 		return l.errorf("cannot have ']' here")
       
   891 	}
       
   892 	l.brackets = l.brackets[:len(l.brackets)-1]
       
   893 	return l.lexRvalue
       
   894 }
       
   895 
       
   896 type validRuneFn func(r rune) bool
       
   897 
       
   898 func isValidHexRune(r rune) bool {
       
   899 	return r >= 'a' && r <= 'f' ||
       
   900 		r >= 'A' && r <= 'F' ||
       
   901 		r >= '0' && r <= '9' ||
       
   902 		r == '_'
       
   903 }
       
   904 
       
   905 func isValidOctalRune(r rune) bool {
       
   906 	return r >= '0' && r <= '7' || r == '_'
       
   907 }
       
   908 
       
   909 func isValidBinaryRune(r rune) bool {
       
   910 	return r == '0' || r == '1' || r == '_'
       
   911 }
       
   912 
       
   913 func (l *tomlLexer) lexNumber() tomlLexStateFn {
       
   914 	r := l.peek()
       
   915 
       
   916 	if r == '0' {
       
   917 		follow := l.peekString(2)
       
   918 		if len(follow) == 2 {
       
   919 			var isValidRune validRuneFn
       
   920 			switch follow[1] {
       
   921 			case 'x':
       
   922 				isValidRune = isValidHexRune
       
   923 			case 'o':
       
   924 				isValidRune = isValidOctalRune
       
   925 			case 'b':
       
   926 				isValidRune = isValidBinaryRune
       
   927 			default:
       
   928 				if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
       
   929 					return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
       
   930 				}
       
   931 			}
       
   932 
       
   933 			if isValidRune != nil {
       
   934 				l.next()
       
   935 				l.next()
       
   936 				digitSeen := false
       
   937 				for {
       
   938 					next := l.peek()
       
   939 					if !isValidRune(next) {
       
   940 						break
       
   941 					}
       
   942 					digitSeen = true
       
   943 					l.next()
       
   944 				}
       
   945 
       
   946 				if !digitSeen {
       
   947 					return l.errorf("number needs at least one digit")
       
   948 				}
       
   949 
       
   950 				l.emit(tokenInteger)
       
   951 
       
   952 				return l.lexRvalue
       
   953 			}
       
   954 		}
       
   955 	}
       
   956 
       
   957 	if r == '+' || r == '-' {
       
   958 		l.next()
       
   959 		if l.follow("inf") {
       
   960 			return l.lexInf
       
   961 		}
       
   962 		if l.follow("nan") {
       
   963 			return l.lexNan
       
   964 		}
       
   965 	}
       
   966 
       
   967 	pointSeen := false
       
   968 	expSeen := false
       
   969 	digitSeen := false
       
   970 	for {
       
   971 		next := l.peek()
       
   972 		if next == '.' {
       
   973 			if pointSeen {
       
   974 				return l.errorf("cannot have two dots in one float")
       
   975 			}
       
   976 			l.next()
       
   977 			if !isDigit(l.peek()) {
       
   978 				return l.errorf("float cannot end with a dot")
       
   979 			}
       
   980 			pointSeen = true
       
   981 		} else if next == 'e' || next == 'E' {
       
   982 			expSeen = true
       
   983 			l.next()
       
   984 			r := l.peek()
       
   985 			if r == '+' || r == '-' {
       
   986 				l.next()
       
   987 			}
       
   988 		} else if isDigit(next) {
       
   989 			digitSeen = true
       
   990 			l.next()
       
   991 		} else if next == '_' {
       
   992 			l.next()
       
   993 		} else {
       
   994 			break
       
   995 		}
       
   996 		if pointSeen && !digitSeen {
       
   997 			return l.errorf("cannot start float with a dot")
       
   998 		}
       
   999 	}
       
  1000 
       
  1001 	if !digitSeen {
       
  1002 		return l.errorf("no digit in that number")
       
  1003 	}
       
  1004 	if pointSeen || expSeen {
       
  1005 		l.emit(tokenFloat)
       
  1006 	} else {
       
  1007 		l.emit(tokenInteger)
       
  1008 	}
       
  1009 	return l.lexRvalue
       
  1010 }
       
  1011 
       
  1012 func (l *tomlLexer) run() {
       
  1013 	for state := l.lexVoid; state != nil; {
       
  1014 		state = state()
       
  1015 	}
       
  1016 }
       
  1017 
       
  1018 // Entry point
       
  1019 func lexToml(inputBytes []byte) []token {
       
  1020 	runes := bytes.Runes(inputBytes)
       
  1021 	l := &tomlLexer{
       
  1022 		input:         runes,
       
  1023 		tokens:        make([]token, 0, 256),
       
  1024 		line:          1,
       
  1025 		col:           1,
       
  1026 		endbufferLine: 1,
       
  1027 		endbufferCol:  1,
       
  1028 	}
       
  1029 	l.run()
       
  1030 	return l.tokens
       
  1031 }