vendor/github.com/pelletier/go-toml/v2/unstable/scanner.go
changeset 265 05c40b36d3b2
parent 260 445e01aede7e
child 268 4dd196a4ee7c
equal deleted inserted replaced
264:8f478162d991 265:05c40b36d3b2
       
     1 package unstable
       
     2 
       
     3 import "github.com/pelletier/go-toml/v2/internal/characters"
       
     4 
       
     5 func scanFollows(b []byte, pattern string) bool {
       
     6 	n := len(pattern)
       
     7 
       
     8 	return len(b) >= n && string(b[:n]) == pattern
       
     9 }
       
    10 
       
    11 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
       
    12 	return scanFollows(b, `"""`)
       
    13 }
       
    14 
       
    15 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
       
    16 	return scanFollows(b, `'''`)
       
    17 }
       
    18 
       
    19 func scanFollowsTrue(b []byte) bool {
       
    20 	return scanFollows(b, `true`)
       
    21 }
       
    22 
       
    23 func scanFollowsFalse(b []byte) bool {
       
    24 	return scanFollows(b, `false`)
       
    25 }
       
    26 
       
    27 func scanFollowsInf(b []byte) bool {
       
    28 	return scanFollows(b, `inf`)
       
    29 }
       
    30 
       
    31 func scanFollowsNan(b []byte) bool {
       
    32 	return scanFollows(b, `nan`)
       
    33 }
       
    34 
       
    35 func scanUnquotedKey(b []byte) ([]byte, []byte) {
       
    36 	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
       
    37 	for i := 0; i < len(b); i++ {
       
    38 		if !isUnquotedKeyChar(b[i]) {
       
    39 			return b[:i], b[i:]
       
    40 		}
       
    41 	}
       
    42 
       
    43 	return b, b[len(b):]
       
    44 }
       
    45 
       
    46 func isUnquotedKeyChar(r byte) bool {
       
    47 	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
       
    48 }
       
    49 
       
    50 func scanLiteralString(b []byte) ([]byte, []byte, error) {
       
    51 	// literal-string = apostrophe *literal-char apostrophe
       
    52 	// apostrophe = %x27 ; ' apostrophe
       
    53 	// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
       
    54 	for i := 1; i < len(b); {
       
    55 		switch b[i] {
       
    56 		case '\'':
       
    57 			return b[:i+1], b[i+1:], nil
       
    58 		case '\n', '\r':
       
    59 			return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
       
    60 		}
       
    61 		size := characters.Utf8ValidNext(b[i:])
       
    62 		if size == 0 {
       
    63 			return nil, nil, NewParserError(b[i:i+1], "invalid character")
       
    64 		}
       
    65 		i += size
       
    66 	}
       
    67 
       
    68 	return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
       
    69 }
       
    70 
       
    71 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
       
    72 	// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
       
    73 	// ml-literal-string-delim
       
    74 	// ml-literal-string-delim = 3apostrophe
       
    75 	// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
       
    76 	//
       
    77 	// mll-content = mll-char / newline
       
    78 	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
       
    79 	// mll-quotes = 1*2apostrophe
       
    80 	for i := 3; i < len(b); {
       
    81 		switch b[i] {
       
    82 		case '\'':
       
    83 			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
       
    84 				i += 3
       
    85 
       
    86 				// At that point we found 3 apostrophe, and i is the
       
    87 				// index of the byte after the third one. The scanner
       
    88 				// needs to be eager, because there can be an extra 2
       
    89 				// apostrophe that can be accepted at the end of the
       
    90 				// string.
       
    91 
       
    92 				if i >= len(b) || b[i] != '\'' {
       
    93 					return b[:i], b[i:], nil
       
    94 				}
       
    95 				i++
       
    96 
       
    97 				if i >= len(b) || b[i] != '\'' {
       
    98 					return b[:i], b[i:], nil
       
    99 				}
       
   100 				i++
       
   101 
       
   102 				if i < len(b) && b[i] == '\'' {
       
   103 					return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
       
   104 				}
       
   105 
       
   106 				return b[:i], b[i:], nil
       
   107 			}
       
   108 		case '\r':
       
   109 			if len(b) < i+2 {
       
   110 				return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
       
   111 			}
       
   112 			if b[i+1] != '\n' {
       
   113 				return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
       
   114 			}
       
   115 			i += 2 // skip the \n
       
   116 			continue
       
   117 		}
       
   118 		size := characters.Utf8ValidNext(b[i:])
       
   119 		if size == 0 {
       
   120 			return nil, nil, NewParserError(b[i:i+1], "invalid character")
       
   121 		}
       
   122 		i += size
       
   123 	}
       
   124 
       
   125 	return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
       
   126 }
       
   127 
       
   128 func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
       
   129 	const lenCRLF = 2
       
   130 	if len(b) < lenCRLF {
       
   131 		return nil, nil, NewParserError(b, "windows new line expected")
       
   132 	}
       
   133 
       
   134 	if b[1] != '\n' {
       
   135 		return nil, nil, NewParserError(b, `windows new line should be \r\n`)
       
   136 	}
       
   137 
       
   138 	return b[:lenCRLF], b[lenCRLF:], nil
       
   139 }
       
   140 
       
   141 func scanWhitespace(b []byte) ([]byte, []byte) {
       
   142 	for i := 0; i < len(b); i++ {
       
   143 		switch b[i] {
       
   144 		case ' ', '\t':
       
   145 			continue
       
   146 		default:
       
   147 			return b[:i], b[i:]
       
   148 		}
       
   149 	}
       
   150 
       
   151 	return b, b[len(b):]
       
   152 }
       
   153 
       
   154 //nolint:unparam
       
   155 func scanComment(b []byte) ([]byte, []byte, error) {
       
   156 	// comment-start-symbol = %x23 ; #
       
   157 	// non-ascii = %x80-D7FF / %xE000-10FFFF
       
   158 	// non-eol = %x09 / %x20-7F / non-ascii
       
   159 	//
       
   160 	// comment = comment-start-symbol *non-eol
       
   161 
       
   162 	for i := 1; i < len(b); {
       
   163 		if b[i] == '\n' {
       
   164 			return b[:i], b[i:], nil
       
   165 		}
       
   166 		if b[i] == '\r' {
       
   167 			if i+1 < len(b) && b[i+1] == '\n' {
       
   168 				return b[:i+1], b[i+1:], nil
       
   169 			}
       
   170 			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
       
   171 		}
       
   172 		size := characters.Utf8ValidNext(b[i:])
       
   173 		if size == 0 {
       
   174 			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
       
   175 		}
       
   176 
       
   177 		i += size
       
   178 	}
       
   179 
       
   180 	return b, b[len(b):], nil
       
   181 }
       
   182 
       
   183 func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
       
   184 	// basic-string = quotation-mark *basic-char quotation-mark
       
   185 	// quotation-mark = %x22            ; "
       
   186 	// basic-char = basic-unescaped / escaped
       
   187 	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   188 	// escaped = escape escape-seq-char
       
   189 	escaped := false
       
   190 	i := 1
       
   191 
       
   192 	for ; i < len(b); i++ {
       
   193 		switch b[i] {
       
   194 		case '"':
       
   195 			return b[:i+1], escaped, b[i+1:], nil
       
   196 		case '\n', '\r':
       
   197 			return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
       
   198 		case '\\':
       
   199 			if len(b) < i+2 {
       
   200 				return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
       
   201 			}
       
   202 			escaped = true
       
   203 			i++ // skip the next character
       
   204 		}
       
   205 	}
       
   206 
       
   207 	return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
       
   208 }
       
   209 
       
   210 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
       
   211 	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
       
   212 	// ml-basic-string-delim
       
   213 	// ml-basic-string-delim = 3quotation-mark
       
   214 	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
       
   215 	//
       
   216 	// mlb-content = mlb-char / newline / mlb-escaped-nl
       
   217 	// mlb-char = mlb-unescaped / escaped
       
   218 	// mlb-quotes = 1*2quotation-mark
       
   219 	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
       
   220 	// mlb-escaped-nl = escape ws newline *( wschar / newline )
       
   221 
       
   222 	escaped := false
       
   223 	i := 3
       
   224 
       
   225 	for ; i < len(b); i++ {
       
   226 		switch b[i] {
       
   227 		case '"':
       
   228 			if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
       
   229 				i += 3
       
   230 
       
   231 				// At that point we found 3 apostrophe, and i is the
       
   232 				// index of the byte after the third one. The scanner
       
   233 				// needs to be eager, because there can be an extra 2
       
   234 				// apostrophe that can be accepted at the end of the
       
   235 				// string.
       
   236 
       
   237 				if i >= len(b) || b[i] != '"' {
       
   238 					return b[:i], escaped, b[i:], nil
       
   239 				}
       
   240 				i++
       
   241 
       
   242 				if i >= len(b) || b[i] != '"' {
       
   243 					return b[:i], escaped, b[i:], nil
       
   244 				}
       
   245 				i++
       
   246 
       
   247 				if i < len(b) && b[i] == '"' {
       
   248 					return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
       
   249 				}
       
   250 
       
   251 				return b[:i], escaped, b[i:], nil
       
   252 			}
       
   253 		case '\\':
       
   254 			if len(b) < i+2 {
       
   255 				return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
       
   256 			}
       
   257 			escaped = true
       
   258 			i++ // skip the next character
       
   259 		case '\r':
       
   260 			if len(b) < i+2 {
       
   261 				return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
       
   262 			}
       
   263 			if b[i+1] != '\n' {
       
   264 				return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
       
   265 			}
       
   266 			i++ // skip the \n
       
   267 		}
       
   268 	}
       
   269 
       
   270 	return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
       
   271 }