|
1 package unstable |
|
2 |
|
3 import "github.com/pelletier/go-toml/v2/internal/characters" |
|
4 |
|
5 func scanFollows(b []byte, pattern string) bool { |
|
6 n := len(pattern) |
|
7 |
|
8 return len(b) >= n && string(b[:n]) == pattern |
|
9 } |
|
10 |
|
11 func scanFollowsMultilineBasicStringDelimiter(b []byte) bool { |
|
12 return scanFollows(b, `"""`) |
|
13 } |
|
14 |
|
15 func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool { |
|
16 return scanFollows(b, `'''`) |
|
17 } |
|
18 |
|
19 func scanFollowsTrue(b []byte) bool { |
|
20 return scanFollows(b, `true`) |
|
21 } |
|
22 |
|
23 func scanFollowsFalse(b []byte) bool { |
|
24 return scanFollows(b, `false`) |
|
25 } |
|
26 |
|
27 func scanFollowsInf(b []byte) bool { |
|
28 return scanFollows(b, `inf`) |
|
29 } |
|
30 |
|
31 func scanFollowsNan(b []byte) bool { |
|
32 return scanFollows(b, `nan`) |
|
33 } |
|
34 |
|
35 func scanUnquotedKey(b []byte) ([]byte, []byte) { |
|
36 // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ |
|
37 for i := 0; i < len(b); i++ { |
|
38 if !isUnquotedKeyChar(b[i]) { |
|
39 return b[:i], b[i:] |
|
40 } |
|
41 } |
|
42 |
|
43 return b, b[len(b):] |
|
44 } |
|
45 |
|
46 func isUnquotedKeyChar(r byte) bool { |
|
47 return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' |
|
48 } |
|
49 |
|
50 func scanLiteralString(b []byte) ([]byte, []byte, error) { |
|
51 // literal-string = apostrophe *literal-char apostrophe |
|
52 // apostrophe = %x27 ; ' apostrophe |
|
53 // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
54 for i := 1; i < len(b); { |
|
55 switch b[i] { |
|
56 case '\'': |
|
57 return b[:i+1], b[i+1:], nil |
|
58 case '\n', '\r': |
|
59 return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines") |
|
60 } |
|
61 size := characters.Utf8ValidNext(b[i:]) |
|
62 if size == 0 { |
|
63 return nil, nil, NewParserError(b[i:i+1], "invalid character") |
|
64 } |
|
65 i += size |
|
66 } |
|
67 |
|
68 return nil, nil, NewParserError(b[len(b):], "unterminated literal string") |
|
69 } |
|
70 |
|
71 func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { |
|
72 // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body |
|
73 // ml-literal-string-delim |
|
74 // ml-literal-string-delim = 3apostrophe |
|
75 // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] |
|
76 // |
|
77 // mll-content = mll-char / newline |
|
78 // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii |
|
79 // mll-quotes = 1*2apostrophe |
|
80 for i := 3; i < len(b); { |
|
81 switch b[i] { |
|
82 case '\'': |
|
83 if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { |
|
84 i += 3 |
|
85 |
|
86 // At that point we found 3 apostrophe, and i is the |
|
87 // index of the byte after the third one. The scanner |
|
88 // needs to be eager, because there can be an extra 2 |
|
89 // apostrophe that can be accepted at the end of the |
|
90 // string. |
|
91 |
|
92 if i >= len(b) || b[i] != '\'' { |
|
93 return b[:i], b[i:], nil |
|
94 } |
|
95 i++ |
|
96 |
|
97 if i >= len(b) || b[i] != '\'' { |
|
98 return b[:i], b[i:], nil |
|
99 } |
|
100 i++ |
|
101 |
|
102 if i < len(b) && b[i] == '\'' { |
|
103 return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string") |
|
104 } |
|
105 |
|
106 return b[:i], b[i:], nil |
|
107 } |
|
108 case '\r': |
|
109 if len(b) < i+2 { |
|
110 return nil, nil, NewParserError(b[len(b):], `need a \n after \r`) |
|
111 } |
|
112 if b[i+1] != '\n' { |
|
113 return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`) |
|
114 } |
|
115 i += 2 // skip the \n |
|
116 continue |
|
117 } |
|
118 size := characters.Utf8ValidNext(b[i:]) |
|
119 if size == 0 { |
|
120 return nil, nil, NewParserError(b[i:i+1], "invalid character") |
|
121 } |
|
122 i += size |
|
123 } |
|
124 |
|
125 return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`) |
|
126 } |
|
127 |
|
128 func scanWindowsNewline(b []byte) ([]byte, []byte, error) { |
|
129 const lenCRLF = 2 |
|
130 if len(b) < lenCRLF { |
|
131 return nil, nil, NewParserError(b, "windows new line expected") |
|
132 } |
|
133 |
|
134 if b[1] != '\n' { |
|
135 return nil, nil, NewParserError(b, `windows new line should be \r\n`) |
|
136 } |
|
137 |
|
138 return b[:lenCRLF], b[lenCRLF:], nil |
|
139 } |
|
140 |
|
141 func scanWhitespace(b []byte) ([]byte, []byte) { |
|
142 for i := 0; i < len(b); i++ { |
|
143 switch b[i] { |
|
144 case ' ', '\t': |
|
145 continue |
|
146 default: |
|
147 return b[:i], b[i:] |
|
148 } |
|
149 } |
|
150 |
|
151 return b, b[len(b):] |
|
152 } |
|
153 |
|
154 //nolint:unparam |
|
155 func scanComment(b []byte) ([]byte, []byte, error) { |
|
156 // comment-start-symbol = %x23 ; # |
|
157 // non-ascii = %x80-D7FF / %xE000-10FFFF |
|
158 // non-eol = %x09 / %x20-7F / non-ascii |
|
159 // |
|
160 // comment = comment-start-symbol *non-eol |
|
161 |
|
162 for i := 1; i < len(b); { |
|
163 if b[i] == '\n' { |
|
164 return b[:i], b[i:], nil |
|
165 } |
|
166 if b[i] == '\r' { |
|
167 if i+1 < len(b) && b[i+1] == '\n' { |
|
168 return b[:i+1], b[i+1:], nil |
|
169 } |
|
170 return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") |
|
171 } |
|
172 size := characters.Utf8ValidNext(b[i:]) |
|
173 if size == 0 { |
|
174 return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") |
|
175 } |
|
176 |
|
177 i += size |
|
178 } |
|
179 |
|
180 return b, b[len(b):], nil |
|
181 } |
|
182 |
|
183 func scanBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
184 // basic-string = quotation-mark *basic-char quotation-mark |
|
185 // quotation-mark = %x22 ; " |
|
186 // basic-char = basic-unescaped / escaped |
|
187 // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
188 // escaped = escape escape-seq-char |
|
189 escaped := false |
|
190 i := 1 |
|
191 |
|
192 for ; i < len(b); i++ { |
|
193 switch b[i] { |
|
194 case '"': |
|
195 return b[:i+1], escaped, b[i+1:], nil |
|
196 case '\n', '\r': |
|
197 return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") |
|
198 case '\\': |
|
199 if len(b) < i+2 { |
|
200 return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\") |
|
201 } |
|
202 escaped = true |
|
203 i++ // skip the next character |
|
204 } |
|
205 } |
|
206 |
|
207 return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`) |
|
208 } |
|
209 |
|
210 func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { |
|
211 // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body |
|
212 // ml-basic-string-delim |
|
213 // ml-basic-string-delim = 3quotation-mark |
|
214 // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] |
|
215 // |
|
216 // mlb-content = mlb-char / newline / mlb-escaped-nl |
|
217 // mlb-char = mlb-unescaped / escaped |
|
218 // mlb-quotes = 1*2quotation-mark |
|
219 // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii |
|
220 // mlb-escaped-nl = escape ws newline *( wschar / newline ) |
|
221 |
|
222 escaped := false |
|
223 i := 3 |
|
224 |
|
225 for ; i < len(b); i++ { |
|
226 switch b[i] { |
|
227 case '"': |
|
228 if scanFollowsMultilineBasicStringDelimiter(b[i:]) { |
|
229 i += 3 |
|
230 |
|
231 // At that point we found 3 apostrophe, and i is the |
|
232 // index of the byte after the third one. The scanner |
|
233 // needs to be eager, because there can be an extra 2 |
|
234 // apostrophe that can be accepted at the end of the |
|
235 // string. |
|
236 |
|
237 if i >= len(b) || b[i] != '"' { |
|
238 return b[:i], escaped, b[i:], nil |
|
239 } |
|
240 i++ |
|
241 |
|
242 if i >= len(b) || b[i] != '"' { |
|
243 return b[:i], escaped, b[i:], nil |
|
244 } |
|
245 i++ |
|
246 |
|
247 if i < len(b) && b[i] == '"' { |
|
248 return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`) |
|
249 } |
|
250 |
|
251 return b[:i], escaped, b[i:], nil |
|
252 } |
|
253 case '\\': |
|
254 if len(b) < i+2 { |
|
255 return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\") |
|
256 } |
|
257 escaped = true |
|
258 i++ // skip the next character |
|
259 case '\r': |
|
260 if len(b) < i+2 { |
|
261 return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`) |
|
262 } |
|
263 if b[i+1] != '\n' { |
|
264 return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`) |
|
265 } |
|
266 i++ // skip the \n |
|
267 } |
|
268 } |
|
269 |
|
270 return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) |
|
271 } |