|
1 package toml |
|
2 |
|
3 func scanFollows(b []byte, pattern string) bool { |
|
4 n := len(pattern) |
|
5 |
|
6 return len(b) >= n && string(b[:n]) == pattern |
|
7 } |
|
8 |
|
9 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool { |
|
10 return scanFollows(b, `"""`) |
|
11 } |
|
12 |
|
13 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool { |
|
14 return scanFollows(b, `'''`) |
|
15 } |
|
16 |
|
17 func scanFollowsTrue(b []byte) bool { |
|
18 return scanFollows(b, `true`) |
|
19 } |
|
20 |
|
21 func scanFollowsFalse(b []byte) bool { |
|
22 return scanFollows(b, `false`) |
|
23 } |
|
24 |
|
25 func scanFollowsInf(b []byte) bool { |
|
26 return scanFollows(b, `inf`) |
|
27 } |
|
28 |
|
29 func scanFollowsNan(b []byte) bool { |
|
30 return scanFollows(b, `nan`) |
|
31 } |
|
32 |
|
33 func scanUnquotedKey(b []byte) ([]byte, []byte) { |
|
34 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
35 for i := 0; i < len(b); i++ { |
|
36 if !isUnquotedKeyChar(b[i]) { |
|
37 return b[:i], b[i:] |
|
38 } |
|
39 } |
|
40 |
|
41 return b, b[len(b):] |
|
42 } |
|
43 |
|
44 func isUnquotedKeyChar(r byte) bool { |
|
45 return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' |
|
46 } |
|
47 |
|
48 func scanLiteralString(b []byte) ([]byte, []byte, error) { |
|
49 // literal-string = apostrophe *literal-char apostrophe |
|
50 // apostrophe = %x27 ; ' apostrophe |
|
51 // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
52 for i := 1; i < len(b); { |
|
53 switch b[i] { |
|
54 case '\'': |
|
55 return b[:i+1], b[i+1:], nil |
|
56 case '\n', '\r': |
|
57 return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines") |
|
58 } |
|
59 size := utf8ValidNext(b[i:]) |
|
60 if size == 0 { |
|
61 return nil, nil, newDecodeError(b[i:i+1], "invalid character") |
|
62 } |
|
63 i += size |
|
64 } |
|
65 |
|
66 return nil, nil, newDecodeError(b[len(b):], "unterminated literal string") |
|
67 } |
|
68 |
|
69 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { |
|
70 // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body |
|
71 // ml-literal-string-delim |
|
72 // ml-literal-string-delim = 3apostrophe |
|
73 // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] |
|
74 // |
|
75 // mll-content = mll-char / newline |
|
76 // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
77 // mll-quotes = 1*2apostrophe |
|
78 for i := 3; i < len(b); { |
|
79 switch b[i] { |
|
80 case '\'': |
|
81 if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { |
|
82 i += 3 |
|
83 |
|
84 // At that point we found 3 apostrophe, and i is the |
|
85 // index of the byte after the third one. The scanner |
|
86 // needs to be eager, because there can be an extra 2 |
|
87 // apostrophe that can be accepted at the end of the |
|
88 // string. |
|
89 |
|
90 if i >= len(b) || b[i] != '\'' { |
|
91 return b[:i], b[i:], nil |
|
92 } |
|
93 i++ |
|
94 |
|
95 if i >= len(b) || b[i] != '\'' { |
|
96 return b[:i], b[i:], nil |
|
97 } |
|
98 i++ |
|
99 |
|
100 if i < len(b) && b[i] == '\'' { |
|
101 return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string") |
|
102 } |
|
103 |
|
104 return b[:i], b[i:], nil |
|
105 } |
|
106 case '\r': |
|
107 if len(b) < i+2 { |
|
108 return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`) |
|
109 } |
|
110 if b[i+1] != '\n' { |
|
111 return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`) |
|
112 } |
|
113 i += 2 // skip the \n |
|
114 continue |
|
115 } |
|
116 size := utf8ValidNext(b[i:]) |
|
117 if size == 0 { |
|
118 return nil, nil, newDecodeError(b[i:i+1], "invalid character") |
|
119 } |
|
120 i += size |
|
121 } |
|
122 |
|
123 return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`) |
|
124 } |
|
125 |
|
126 func scanWindowsNewline(b []byte) ([]byte, []byte, error) { |
|
127 const lenCRLF = 2 |
|
128 if len(b) < lenCRLF { |
|
129 return nil, nil, newDecodeError(b, "windows new line expected") |
|
130 } |
|
131 |
|
132 if b[1] != '\n' { |
|
133 return nil, nil, newDecodeError(b, `windows new line should be \r\n`) |
|
134 } |
|
135 |
|
136 return b[:lenCRLF], b[lenCRLF:], nil |
|
137 } |
|
138 |
|
139 func scanWhitespace(b []byte) ([]byte, []byte) { |
|
140 for i := 0; i < len(b); i++ { |
|
141 switch b[i] { |
|
142 case ' ', '\t': |
|
143 continue |
|
144 default: |
|
145 return b[:i], b[i:] |
|
146 } |
|
147 } |
|
148 |
|
149 return b, b[len(b):] |
|
150 } |
|
151 |
|
152 //nolint:unparam |
|
153 func scanComment(b []byte) ([]byte, []byte, error) { |
|
154 // comment-start-symbol = %x23 ; # |
|
155 // non-ascii = %x80-D7FF / %xE000-10FFFF |
|
156 // non-eol = %x09 / %x20-7F / non-ascii |
|
157 // |
|
158 // comment = comment-start-symbol *non-eol |
|
159 |
|
160 for i := 1; i < len(b); { |
|
161 if b[i] == '\n' { |
|
162 return b[:i], b[i:], nil |
|
163 } |
|
164 if b[i] == '\r' { |
|
165 if i+1 < len(b) && b[i+1] == '\n' { |
|
166 return b[:i+1], b[i+1:], nil |
|
167 } |
|
168 return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment") |
|
169 } |
|
170 size := utf8ValidNext(b[i:]) |
|
171 if size == 0 { |
|
172 return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment") |
|
173 } |
|
174 |
|
175 i += size |
|
176 } |
|
177 |
|
178 return b, b[len(b):], nil |
|
179 } |
|
180 |
|
181 func scanBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
182 // basic-string = quotation-mark *basic-char quotation-mark |
|
183 // quotation-mark = %x22 ; " |
|
184 // basic-char = basic-unescaped / escaped |
|
185 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
186 // escaped = escape escape-seq-char |
|
187 escaped := false |
|
188 i := 1 |
|
189 |
|
190 for ; i < len(b); i++ { |
|
191 switch b[i] { |
|
192 case '"': |
|
193 return b[:i+1], escaped, b[i+1:], nil |
|
194 case '\n', '\r': |
|
195 return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines") |
|
196 case '\\': |
|
197 if len(b) < i+2 { |
|
198 return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\") |
|
199 } |
|
200 escaped = true |
|
201 i++ // skip the next character |
|
202 } |
|
203 } |
|
204 |
|
205 return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`) |
|
206 } |
|
207 |
|
208 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
209 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body |
|
210 // ml-basic-string-delim |
|
211 // ml-basic-string-delim = 3quotation-mark |
|
212 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] |
|
213 // |
|
214 // mlb-content = mlb-char / newline / mlb-escaped-nl |
|
215 // mlb-char = mlb-unescaped / escaped |
|
216 // mlb-quotes = 1*2quotation-mark |
|
217 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
218 // mlb-escaped-nl = escape ws newline *( wschar / newline ) |
|
219 |
|
220 escaped := false |
|
221 i := 3 |
|
222 |
|
223 for ; i < len(b); i++ { |
|
224 switch b[i] { |
|
225 case '"': |
|
226 if scanFollowsMultilineBasicStringDelimiter(b[i:]) { |
|
227 i += 3 |
|
228 |
|
229 // At that point we found 3 apostrophe, and i is the |
|
230 // index of the byte after the third one. The scanner |
|
231 // needs to be eager, because there can be an extra 2 |
|
232 // apostrophe that can be accepted at the end of the |
|
233 // string. |
|
234 |
|
235 if i >= len(b) || b[i] != '"' { |
|
236 return b[:i], escaped, b[i:], nil |
|
237 } |
|
238 i++ |
|
239 |
|
240 if i >= len(b) || b[i] != '"' { |
|
241 return b[:i], escaped, b[i:], nil |
|
242 } |
|
243 i++ |
|
244 |
|
245 if i < len(b) && b[i] == '"' { |
|
246 return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`) |
|
247 } |
|
248 |
|
249 return b[:i], escaped, b[i:], nil |
|
250 } |
|
251 case '\\': |
|
252 if len(b) < i+2 { |
|
253 return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\") |
|
254 } |
|
255 escaped = true |
|
256 i++ // skip the next character |
|
257 case '\r': |
|
258 if len(b) < i+2 { |
|
259 return nil, escaped, nil, newDecodeError(b[len(b):], `need a \n after \r`) |
|
260 } |
|
261 if b[i+1] != '\n' { |
|
262 return nil, escaped, nil, newDecodeError(b[i:i+2], `need a \n after \r`) |
|
263 } |
|
264 i++ // skip the \n |
|
265 } |
|
266 } |
|
267 |
|
268 return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`) |
|
269 } |