260
|
1 |
package toml |
|
2 |
|
|
3 |
func scanFollows(b []byte, pattern string) bool { |
|
4 |
n := len(pattern) |
|
5 |
|
|
6 |
return len(b) >= n && string(b[:n]) == pattern |
|
7 |
} |
|
8 |
|
|
9 |
func scanFollowsMultilineBasicStringDelimiter(b []byte) bool { |
|
10 |
return scanFollows(b, `"""`) |
|
11 |
} |
|
12 |
|
|
13 |
func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool { |
|
14 |
return scanFollows(b, `'''`) |
|
15 |
} |
|
16 |
|
|
17 |
func scanFollowsTrue(b []byte) bool { |
|
18 |
return scanFollows(b, `true`) |
|
19 |
} |
|
20 |
|
|
21 |
func scanFollowsFalse(b []byte) bool { |
|
22 |
return scanFollows(b, `false`) |
|
23 |
} |
|
24 |
|
|
25 |
func scanFollowsInf(b []byte) bool { |
|
26 |
return scanFollows(b, `inf`) |
|
27 |
} |
|
28 |
|
|
29 |
func scanFollowsNan(b []byte) bool { |
|
30 |
return scanFollows(b, `nan`) |
|
31 |
} |
|
32 |
|
|
33 |
func scanUnquotedKey(b []byte) ([]byte, []byte) { |
|
34 |
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
35 |
for i := 0; i < len(b); i++ { |
|
36 |
if !isUnquotedKeyChar(b[i]) { |
|
37 |
return b[:i], b[i:] |
|
38 |
} |
|
39 |
} |
|
40 |
|
|
41 |
return b, b[len(b):] |
|
42 |
} |
|
43 |
|
|
44 |
func isUnquotedKeyChar(r byte) bool { |
|
45 |
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' |
|
46 |
} |
|
47 |
|
|
48 |
func scanLiteralString(b []byte) ([]byte, []byte, error) { |
|
49 |
// literal-string = apostrophe *literal-char apostrophe |
|
50 |
// apostrophe = %x27 ; ' apostrophe |
|
51 |
// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
52 |
for i := 1; i < len(b); { |
|
53 |
switch b[i] { |
|
54 |
case '\'': |
|
55 |
return b[:i+1], b[i+1:], nil |
|
56 |
case '\n', '\r': |
|
57 |
return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines") |
|
58 |
} |
|
59 |
size := utf8ValidNext(b[i:]) |
|
60 |
if size == 0 { |
|
61 |
return nil, nil, newDecodeError(b[i:i+1], "invalid character") |
|
62 |
} |
|
63 |
i += size |
|
64 |
} |
|
65 |
|
|
66 |
return nil, nil, newDecodeError(b[len(b):], "unterminated literal string") |
|
67 |
} |
|
68 |
|
|
69 |
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { |
|
70 |
// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body |
|
71 |
// ml-literal-string-delim |
|
72 |
// ml-literal-string-delim = 3apostrophe |
|
73 |
// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] |
|
74 |
// |
|
75 |
// mll-content = mll-char / newline |
|
76 |
// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
77 |
// mll-quotes = 1*2apostrophe |
|
78 |
for i := 3; i < len(b); { |
|
79 |
switch b[i] { |
|
80 |
case '\'': |
|
81 |
if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { |
|
82 |
i += 3 |
|
83 |
|
|
84 |
// At that point we found 3 apostrophe, and i is the |
|
85 |
// index of the byte after the third one. The scanner |
|
86 |
// needs to be eager, because there can be an extra 2 |
|
87 |
// apostrophe that can be accepted at the end of the |
|
88 |
// string. |
|
89 |
|
|
90 |
if i >= len(b) || b[i] != '\'' { |
|
91 |
return b[:i], b[i:], nil |
|
92 |
} |
|
93 |
i++ |
|
94 |
|
|
95 |
if i >= len(b) || b[i] != '\'' { |
|
96 |
return b[:i], b[i:], nil |
|
97 |
} |
|
98 |
i++ |
|
99 |
|
|
100 |
if i < len(b) && b[i] == '\'' { |
|
101 |
return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string") |
|
102 |
} |
|
103 |
|
|
104 |
return b[:i], b[i:], nil |
|
105 |
} |
|
106 |
case '\r': |
|
107 |
if len(b) < i+2 { |
|
108 |
return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`) |
|
109 |
} |
|
110 |
if b[i+1] != '\n' { |
|
111 |
return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`) |
|
112 |
} |
|
113 |
i += 2 // skip the \n |
|
114 |
continue |
|
115 |
} |
|
116 |
size := utf8ValidNext(b[i:]) |
|
117 |
if size == 0 { |
|
118 |
return nil, nil, newDecodeError(b[i:i+1], "invalid character") |
|
119 |
} |
|
120 |
i += size |
|
121 |
} |
|
122 |
|
|
123 |
return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`) |
|
124 |
} |
|
125 |
|
|
126 |
func scanWindowsNewline(b []byte) ([]byte, []byte, error) { |
|
127 |
const lenCRLF = 2 |
|
128 |
if len(b) < lenCRLF { |
|
129 |
return nil, nil, newDecodeError(b, "windows new line expected") |
|
130 |
} |
|
131 |
|
|
132 |
if b[1] != '\n' { |
|
133 |
return nil, nil, newDecodeError(b, `windows new line should be \r\n`) |
|
134 |
} |
|
135 |
|
|
136 |
return b[:lenCRLF], b[lenCRLF:], nil |
|
137 |
} |
|
138 |
|
|
139 |
func scanWhitespace(b []byte) ([]byte, []byte) { |
|
140 |
for i := 0; i < len(b); i++ { |
|
141 |
switch b[i] { |
|
142 |
case ' ', '\t': |
|
143 |
continue |
|
144 |
default: |
|
145 |
return b[:i], b[i:] |
|
146 |
} |
|
147 |
} |
|
148 |
|
|
149 |
return b, b[len(b):] |
|
150 |
} |
|
151 |
|
|
152 |
//nolint:unparam |
|
153 |
func scanComment(b []byte) ([]byte, []byte, error) { |
|
154 |
// comment-start-symbol = %x23 ; # |
|
155 |
// non-ascii = %x80-D7FF / %xE000-10FFFF |
|
156 |
// non-eol = %x09 / %x20-7F / non-ascii |
|
157 |
// |
|
158 |
// comment = comment-start-symbol *non-eol |
|
159 |
|
|
160 |
for i := 1; i < len(b); { |
|
161 |
if b[i] == '\n' { |
|
162 |
return b[:i], b[i:], nil |
|
163 |
} |
|
164 |
if b[i] == '\r' { |
|
165 |
if i+1 < len(b) && b[i+1] == '\n' { |
|
166 |
return b[:i+1], b[i+1:], nil |
|
167 |
} |
|
168 |
return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment") |
|
169 |
} |
|
170 |
size := utf8ValidNext(b[i:]) |
|
171 |
if size == 0 { |
|
172 |
return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment") |
|
173 |
} |
|
174 |
|
|
175 |
i += size |
|
176 |
} |
|
177 |
|
|
178 |
return b, b[len(b):], nil |
|
179 |
} |
|
180 |
|
|
181 |
func scanBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
182 |
// basic-string = quotation-mark *basic-char quotation-mark |
|
183 |
// quotation-mark = %x22 ; " |
|
184 |
// basic-char = basic-unescaped / escaped |
|
185 |
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
186 |
// escaped = escape escape-seq-char |
|
187 |
escaped := false |
|
188 |
i := 1 |
|
189 |
|
|
190 |
for ; i < len(b); i++ { |
|
191 |
switch b[i] { |
|
192 |
case '"': |
|
193 |
return b[:i+1], escaped, b[i+1:], nil |
|
194 |
case '\n', '\r': |
|
195 |
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines") |
|
196 |
case '\\': |
|
197 |
if len(b) < i+2 { |
|
198 |
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\") |
|
199 |
} |
|
200 |
escaped = true |
|
201 |
i++ // skip the next character |
|
202 |
} |
|
203 |
} |
|
204 |
|
|
205 |
return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`) |
|
206 |
} |
|
207 |
|
|
208 |
func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
209 |
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body |
|
210 |
// ml-basic-string-delim |
|
211 |
// ml-basic-string-delim = 3quotation-mark |
|
212 |
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] |
|
213 |
// |
|
214 |
// mlb-content = mlb-char / newline / mlb-escaped-nl |
|
215 |
// mlb-char = mlb-unescaped / escaped |
|
216 |
// mlb-quotes = 1*2quotation-mark |
|
217 |
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
218 |
// mlb-escaped-nl = escape ws newline *( wschar / newline ) |
|
219 |
|
|
220 |
escaped := false |
|
221 |
i := 3 |
|
222 |
|
|
223 |
for ; i < len(b); i++ { |
|
224 |
switch b[i] { |
|
225 |
case '"': |
|
226 |
if scanFollowsMultilineBasicStringDelimiter(b[i:]) { |
|
227 |
i += 3 |
|
228 |
|
|
229 |
// At that point we found 3 apostrophe, and i is the |
|
230 |
// index of the byte after the third one. The scanner |
|
231 |
// needs to be eager, because there can be an extra 2 |
|
232 |
// apostrophe that can be accepted at the end of the |
|
233 |
// string. |
|
234 |
|
|
235 |
if i >= len(b) || b[i] != '"' { |
|
236 |
return b[:i], escaped, b[i:], nil |
|
237 |
} |
|
238 |
i++ |
|
239 |
|
|
240 |
if i >= len(b) || b[i] != '"' { |
|
241 |
return b[:i], escaped, b[i:], nil |
|
242 |
} |
|
243 |
i++ |
|
244 |
|
|
245 |
if i < len(b) && b[i] == '"' { |
|
246 |
return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`) |
|
247 |
} |
|
248 |
|
|
249 |
return b[:i], escaped, b[i:], nil |
|
250 |
} |
|
251 |
case '\\': |
|
252 |
if len(b) < i+2 { |
|
253 |
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\") |
|
254 |
} |
|
255 |
escaped = true |
|
256 |
i++ // skip the next character |
|
257 |
case '\r': |
|
258 |
if len(b) < i+2 { |
|
259 |
return nil, escaped, nil, newDecodeError(b[len(b):], `need a \n after \r`) |
|
260 |
} |
|
261 |
if b[i+1] != '\n' { |
|
262 |
return nil, escaped, nil, newDecodeError(b[i:i+2], `need a \n after \r`) |
|
263 |
} |
|
264 |
i++ // skip the \n |
|
265 |
} |
|
266 |
} |
|
267 |
|
|
268 |
return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`) |
|
269 |
} |