vendor/github.com/pelletier/go-toml/v2/scanner.go
changeset 260 445e01aede7e
equal deleted inserted replaced
259:db4911b0c721 260:445e01aede7e
       
     1 package toml
       
     2 
       
     3 func scanFollows(b []byte, pattern string) bool {
       
     4 	n := len(pattern)
       
     5 
       
     6 	return len(b) >= n && string(b[:n]) == pattern
       
     7 }
       
     8 
       
     9 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
       
    10 	return scanFollows(b, `"""`)
       
    11 }
       
    12 
       
    13 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
       
    14 	return scanFollows(b, `'''`)
       
    15 }
       
    16 
       
    17 func scanFollowsTrue(b []byte) bool {
       
    18 	return scanFollows(b, `true`)
       
    19 }
       
    20 
       
    21 func scanFollowsFalse(b []byte) bool {
       
    22 	return scanFollows(b, `false`)
       
    23 }
       
    24 
       
    25 func scanFollowsInf(b []byte) bool {
       
    26 	return scanFollows(b, `inf`)
       
    27 }
       
    28 
       
    29 func scanFollowsNan(b []byte) bool {
       
    30 	return scanFollows(b, `nan`)
       
    31 }
       
    32 
       
    33 func scanUnquotedKey(b []byte) ([]byte, []byte) {
       
    34 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
       
    35 	for i := 0; i < len(b); i++ {
       
    36 		if !isUnquotedKeyChar(b[i]) {
       
    37 			return b[:i], b[i:]
       
    38 		}
       
    39 	}
       
    40 
       
    41 	return b, b[len(b):]
       
    42 }
       
    43 
       
    44 func isUnquotedKeyChar(r byte) bool {
       
    45 	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
       
    46 }
       
    47 
       
    48 func scanLiteralString(b []byte) ([]byte, []byte, error) {
       
    49 	// literal-string = apostrophe *literal-char apostrophe
       
    50 	// apostrophe = %x27 ; ' apostrophe
       
    51 	// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
       
    52 	for i := 1; i < len(b); {
       
    53 		switch b[i] {
       
    54 		case '\'':
       
    55 			return b[:i+1], b[i+1:], nil
       
    56 		case '\n', '\r':
       
    57 			return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
       
    58 		}
       
    59 		size := utf8ValidNext(b[i:])
       
    60 		if size == 0 {
       
    61 			return nil, nil, newDecodeError(b[i:i+1], "invalid character")
       
    62 		}
       
    63 		i += size
       
    64 	}
       
    65 
       
    66 	return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
       
    67 }
       
    68 
       
    69 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
       
    70 	// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
       
    71 	// ml-literal-string-delim
       
    72 	// ml-literal-string-delim = 3apostrophe
       
    73 	// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
       
    74 	//
       
    75 	// mll-content = mll-char / newline
       
    76 	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
       
    77 	// mll-quotes = 1*2apostrophe
       
    78 	for i := 3; i < len(b); {
       
    79 		switch b[i] {
       
    80 		case '\'':
       
    81 			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
       
    82 				i += 3
       
    83 
       
    84 				// At that point we found 3 apostrophe, and i is the
       
    85 				// index of the byte after the third one. The scanner
       
    86 				// needs to be eager, because there can be an extra 2
       
    87 				// apostrophe that can be accepted at the end of the
       
    88 				// string.
       
    89 
       
    90 				if i >= len(b) || b[i] != '\'' {
       
    91 					return b[:i], b[i:], nil
       
    92 				}
       
    93 				i++
       
    94 
       
    95 				if i >= len(b) || b[i] != '\'' {
       
    96 					return b[:i], b[i:], nil
       
    97 				}
       
    98 				i++
       
    99 
       
   100 				if i < len(b) && b[i] == '\'' {
       
   101 					return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
       
   102 				}
       
   103 
       
   104 				return b[:i], b[i:], nil
       
   105 			}
       
   106 		case '\r':
       
   107 			if len(b) < i+2 {
       
   108 				return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`)
       
   109 			}
       
   110 			if b[i+1] != '\n' {
       
   111 				return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
       
   112 			}
       
   113 			i += 2 // skip the \n
       
   114 			continue
       
   115 		}
       
   116 		size := utf8ValidNext(b[i:])
       
   117 		if size == 0 {
       
   118 			return nil, nil, newDecodeError(b[i:i+1], "invalid character")
       
   119 		}
       
   120 		i += size
       
   121 	}
       
   122 
       
   123 	return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
       
   124 }
       
   125 
       
   126 func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
       
   127 	const lenCRLF = 2
       
   128 	if len(b) < lenCRLF {
       
   129 		return nil, nil, newDecodeError(b, "windows new line expected")
       
   130 	}
       
   131 
       
   132 	if b[1] != '\n' {
       
   133 		return nil, nil, newDecodeError(b, `windows new line should be \r\n`)
       
   134 	}
       
   135 
       
   136 	return b[:lenCRLF], b[lenCRLF:], nil
       
   137 }
       
   138 
       
   139 func scanWhitespace(b []byte) ([]byte, []byte) {
       
   140 	for i := 0; i < len(b); i++ {
       
   141 		switch b[i] {
       
   142 		case ' ', '\t':
       
   143 			continue
       
   144 		default:
       
   145 			return b[:i], b[i:]
       
   146 		}
       
   147 	}
       
   148 
       
   149 	return b, b[len(b):]
       
   150 }
       
   151 
       
   152 //nolint:unparam
       
   153 func scanComment(b []byte) ([]byte, []byte, error) {
       
   154 	// comment-start-symbol = %x23 ; #
       
   155 	// non-ascii = %x80-D7FF / %xE000-10FFFF
       
   156 	// non-eol = %x09 / %x20-7F / non-ascii
       
   157 	//
       
   158 	// comment = comment-start-symbol *non-eol
       
   159 
       
   160 	for i := 1; i < len(b); {
       
   161 		if b[i] == '\n' {
       
   162 			return b[:i], b[i:], nil
       
   163 		}
       
   164 		if b[i] == '\r' {
       
   165 			if i+1 < len(b) && b[i+1] == '\n' {
       
   166 				return b[:i+1], b[i+1:], nil
       
   167 			}
       
   168 			return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
       
   169 		}
       
   170 		size := utf8ValidNext(b[i:])
       
   171 		if size == 0 {
       
   172 			return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
       
   173 		}
       
   174 
       
   175 		i += size
       
   176 	}
       
   177 
       
   178 	return b, b[len(b):], nil
       
   179 }
       
   180 
       
   181 func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
       
   182 	// basic-string = quotation-mark *basic-char quotation-mark
       
   183 	// quotation-mark = %x22            ; "
       
   184 	// basic-char = basic-unescaped / escaped
       
   185 	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   186 	// escaped = escape escape-seq-char
       
   187 	escaped := false
       
   188 	i := 1
       
   189 
       
   190 	for ; i < len(b); i++ {
       
   191 		switch b[i] {
       
   192 		case '"':
       
   193 			return b[:i+1], escaped, b[i+1:], nil
       
   194 		case '\n', '\r':
       
   195 			return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
       
   196 		case '\\':
       
   197 			if len(b) < i+2 {
       
   198 				return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
       
   199 			}
       
   200 			escaped = true
       
   201 			i++ // skip the next character
       
   202 		}
       
   203 	}
       
   204 
       
   205 	return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
       
   206 }
       
   207 
       
   208 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
       
   209 	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
       
   210 	// ml-basic-string-delim
       
   211 	// ml-basic-string-delim = 3quotation-mark
       
   212 	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
       
   213 	//
       
   214 	// mlb-content = mlb-char / newline / mlb-escaped-nl
       
   215 	// mlb-char = mlb-unescaped / escaped
       
   216 	// mlb-quotes = 1*2quotation-mark
       
   217 	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   218 	// mlb-escaped-nl = escape ws newline *( wschar / newline )
       
   219 
       
   220 	escaped := false
       
   221 	i := 3
       
   222 
       
   223 	for ; i < len(b); i++ {
       
   224 		switch b[i] {
       
   225 		case '"':
       
   226 			if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
       
   227 				i += 3
       
   228 
       
   229 				// At that point we found 3 apostrophe, and i is the
       
   230 				// index of the byte after the third one. The scanner
       
   231 				// needs to be eager, because there can be an extra 2
       
   232 				// apostrophe that can be accepted at the end of the
       
   233 				// string.
       
   234 
       
   235 				if i >= len(b) || b[i] != '"' {
       
   236 					return b[:i], escaped, b[i:], nil
       
   237 				}
       
   238 				i++
       
   239 
       
   240 				if i >= len(b) || b[i] != '"' {
       
   241 					return b[:i], escaped, b[i:], nil
       
   242 				}
       
   243 				i++
       
   244 
       
   245 				if i < len(b) && b[i] == '"' {
       
   246 					return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`)
       
   247 				}
       
   248 
       
   249 				return b[:i], escaped, b[i:], nil
       
   250 			}
       
   251 		case '\\':
       
   252 			if len(b) < i+2 {
       
   253 				return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
       
   254 			}
       
   255 			escaped = true
       
   256 			i++ // skip the next character
       
   257 		case '\r':
       
   258 			if len(b) < i+2 {
       
   259 				return nil, escaped, nil, newDecodeError(b[len(b):], `need a \n after \r`)
       
   260 			}
       
   261 			if b[i+1] != '\n' {
       
   262 				return nil, escaped, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
       
   263 			}
       
   264 			i++ // skip the \n
       
   265 		}
       
   266 	}
       
   267 
       
   268 	return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
       
   269 }