251
|
1 |
// Copyright 2015 Unknwon |
|
2 |
// |
|
3 |
// Licensed under the Apache License, Version 2.0 (the "License"): you may |
|
4 |
// not use this file except in compliance with the License. You may obtain |
|
5 |
// a copy of the License at |
|
6 |
// |
|
7 |
// http://www.apache.org/licenses/LICENSE-2.0 |
|
8 |
// |
|
9 |
// Unless required by applicable law or agreed to in writing, software |
|
10 |
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
|
11 |
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
|
12 |
// License for the specific language governing permissions and limitations |
|
13 |
// under the License. |
|
14 |
|
|
15 |
package ini |
|
16 |
|
|
17 |
import ( |
|
18 |
"bufio" |
|
19 |
"bytes" |
|
20 |
"fmt" |
|
21 |
"io" |
|
22 |
"regexp" |
|
23 |
"strconv" |
|
24 |
"strings" |
|
25 |
"unicode" |
|
26 |
) |
|
27 |
|
|
28 |
const minReaderBufferSize = 4096 |
|
29 |
|
|
30 |
var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`) |
|
31 |
|
|
32 |
type parserOptions struct { |
|
33 |
IgnoreContinuation bool |
|
34 |
IgnoreInlineComment bool |
|
35 |
AllowPythonMultilineValues bool |
|
36 |
SpaceBeforeInlineComment bool |
|
37 |
UnescapeValueDoubleQuotes bool |
|
38 |
UnescapeValueCommentSymbols bool |
|
39 |
PreserveSurroundedQuote bool |
|
40 |
DebugFunc DebugFunc |
|
41 |
ReaderBufferSize int |
|
42 |
} |
|
43 |
|
|
44 |
type parser struct { |
|
45 |
buf *bufio.Reader |
|
46 |
options parserOptions |
|
47 |
|
|
48 |
isEOF bool |
|
49 |
count int |
|
50 |
comment *bytes.Buffer |
|
51 |
} |
|
52 |
|
|
53 |
func (p *parser) debug(format string, args ...interface{}) { |
|
54 |
if p.options.DebugFunc != nil { |
|
55 |
p.options.DebugFunc(fmt.Sprintf(format, args...)) |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
func newParser(r io.Reader, opts parserOptions) *parser { |
|
60 |
size := opts.ReaderBufferSize |
|
61 |
if size < minReaderBufferSize { |
|
62 |
size = minReaderBufferSize |
|
63 |
} |
|
64 |
|
|
65 |
return &parser{ |
|
66 |
buf: bufio.NewReaderSize(r, size), |
|
67 |
options: opts, |
|
68 |
count: 1, |
|
69 |
comment: &bytes.Buffer{}, |
|
70 |
} |
|
71 |
} |
|
72 |
|
|
73 |
// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format. |
|
74 |
// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding |
|
75 |
func (p *parser) BOM() error { |
|
76 |
mask, err := p.buf.Peek(2) |
|
77 |
if err != nil && err != io.EOF { |
|
78 |
return err |
|
79 |
} else if len(mask) < 2 { |
|
80 |
return nil |
|
81 |
} |
|
82 |
|
|
83 |
switch { |
|
84 |
case mask[0] == 254 && mask[1] == 255: |
|
85 |
fallthrough |
|
86 |
case mask[0] == 255 && mask[1] == 254: |
|
87 |
p.buf.Read(mask) |
|
88 |
case mask[0] == 239 && mask[1] == 187: |
|
89 |
mask, err := p.buf.Peek(3) |
|
90 |
if err != nil && err != io.EOF { |
|
91 |
return err |
|
92 |
} else if len(mask) < 3 { |
|
93 |
return nil |
|
94 |
} |
|
95 |
if mask[2] == 191 { |
|
96 |
p.buf.Read(mask) |
|
97 |
} |
|
98 |
} |
|
99 |
return nil |
|
100 |
} |
|
101 |
|
|
102 |
func (p *parser) readUntil(delim byte) ([]byte, error) { |
|
103 |
data, err := p.buf.ReadBytes(delim) |
|
104 |
if err != nil { |
|
105 |
if err == io.EOF { |
|
106 |
p.isEOF = true |
|
107 |
} else { |
|
108 |
return nil, err |
|
109 |
} |
|
110 |
} |
|
111 |
return data, nil |
|
112 |
} |
|
113 |
|
|
114 |
func cleanComment(in []byte) ([]byte, bool) { |
|
115 |
i := bytes.IndexAny(in, "#;") |
|
116 |
if i == -1 { |
|
117 |
return nil, false |
|
118 |
} |
|
119 |
return in[i:], true |
|
120 |
} |
|
121 |
|
|
122 |
func readKeyName(delimiters string, in []byte) (string, int, error) { |
|
123 |
line := string(in) |
|
124 |
|
|
125 |
// Check if key name surrounded by quotes. |
|
126 |
var keyQuote string |
|
127 |
if line[0] == '"' { |
|
128 |
if len(line) > 6 && string(line[0:3]) == `"""` { |
|
129 |
keyQuote = `"""` |
|
130 |
} else { |
|
131 |
keyQuote = `"` |
|
132 |
} |
|
133 |
} else if line[0] == '`' { |
|
134 |
keyQuote = "`" |
|
135 |
} |
|
136 |
|
|
137 |
// Get out key name |
|
138 |
endIdx := -1 |
|
139 |
if len(keyQuote) > 0 { |
|
140 |
startIdx := len(keyQuote) |
|
141 |
// FIXME: fail case -> """"""name"""=value |
|
142 |
pos := strings.Index(line[startIdx:], keyQuote) |
|
143 |
if pos == -1 { |
|
144 |
return "", -1, fmt.Errorf("missing closing key quote: %s", line) |
|
145 |
} |
|
146 |
pos += startIdx |
|
147 |
|
|
148 |
// Find key-value delimiter |
|
149 |
i := strings.IndexAny(line[pos+startIdx:], delimiters) |
|
150 |
if i < 0 { |
|
151 |
return "", -1, ErrDelimiterNotFound{line} |
|
152 |
} |
|
153 |
endIdx = pos + i |
|
154 |
return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil |
|
155 |
} |
|
156 |
|
|
157 |
endIdx = strings.IndexAny(line, delimiters) |
|
158 |
if endIdx < 0 { |
|
159 |
return "", -1, ErrDelimiterNotFound{line} |
|
160 |
} |
|
161 |
return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil |
|
162 |
} |
|
163 |
|
|
164 |
func (p *parser) readMultilines(line, val, valQuote string) (string, error) { |
|
165 |
for { |
|
166 |
data, err := p.readUntil('\n') |
|
167 |
if err != nil { |
|
168 |
return "", err |
|
169 |
} |
|
170 |
next := string(data) |
|
171 |
|
|
172 |
pos := strings.LastIndex(next, valQuote) |
|
173 |
if pos > -1 { |
|
174 |
val += next[:pos] |
|
175 |
|
|
176 |
comment, has := cleanComment([]byte(next[pos:])) |
|
177 |
if has { |
|
178 |
p.comment.Write(bytes.TrimSpace(comment)) |
|
179 |
} |
|
180 |
break |
|
181 |
} |
|
182 |
val += next |
|
183 |
if p.isEOF { |
|
184 |
return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next) |
|
185 |
} |
|
186 |
} |
|
187 |
return val, nil |
|
188 |
} |
|
189 |
|
|
190 |
func (p *parser) readContinuationLines(val string) (string, error) { |
|
191 |
for { |
|
192 |
data, err := p.readUntil('\n') |
|
193 |
if err != nil { |
|
194 |
return "", err |
|
195 |
} |
|
196 |
next := strings.TrimSpace(string(data)) |
|
197 |
|
|
198 |
if len(next) == 0 { |
|
199 |
break |
|
200 |
} |
|
201 |
val += next |
|
202 |
if val[len(val)-1] != '\\' { |
|
203 |
break |
|
204 |
} |
|
205 |
val = val[:len(val)-1] |
|
206 |
} |
|
207 |
return val, nil |
|
208 |
} |
|
209 |
|
|
210 |
// hasSurroundedQuote check if and only if the first and last characters |
|
211 |
// are quotes \" or \'. |
|
212 |
// It returns false if any other parts also contain same kind of quotes. |
|
213 |
func hasSurroundedQuote(in string, quote byte) bool { |
|
214 |
return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote && |
|
215 |
strings.IndexByte(in[1:], quote) == len(in)-2 |
|
216 |
} |
|
217 |
|
|
218 |
func (p *parser) readValue(in []byte, bufferSize int) (string, error) { |
|
219 |
|
|
220 |
line := strings.TrimLeftFunc(string(in), unicode.IsSpace) |
|
221 |
if len(line) == 0 { |
|
222 |
if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' { |
|
223 |
return p.readPythonMultilines(line, bufferSize) |
|
224 |
} |
|
225 |
return "", nil |
|
226 |
} |
|
227 |
|
|
228 |
var valQuote string |
|
229 |
if len(line) > 3 && string(line[0:3]) == `"""` { |
|
230 |
valQuote = `"""` |
|
231 |
} else if line[0] == '`' { |
|
232 |
valQuote = "`" |
|
233 |
} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' { |
|
234 |
valQuote = `"` |
|
235 |
} |
|
236 |
|
|
237 |
if len(valQuote) > 0 { |
|
238 |
startIdx := len(valQuote) |
|
239 |
pos := strings.LastIndex(line[startIdx:], valQuote) |
|
240 |
// Check for multi-line value |
|
241 |
if pos == -1 { |
|
242 |
return p.readMultilines(line, line[startIdx:], valQuote) |
|
243 |
} |
|
244 |
|
|
245 |
if p.options.UnescapeValueDoubleQuotes && valQuote == `"` { |
|
246 |
return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil |
|
247 |
} |
|
248 |
return line[startIdx : pos+startIdx], nil |
|
249 |
} |
|
250 |
|
|
251 |
lastChar := line[len(line)-1] |
|
252 |
// Won't be able to reach here if value only contains whitespace |
|
253 |
line = strings.TrimSpace(line) |
|
254 |
trimmedLastChar := line[len(line)-1] |
|
255 |
|
|
256 |
// Check continuation lines when desired |
|
257 |
if !p.options.IgnoreContinuation && trimmedLastChar == '\\' { |
|
258 |
return p.readContinuationLines(line[:len(line)-1]) |
|
259 |
} |
|
260 |
|
|
261 |
// Check if ignore inline comment |
|
262 |
if !p.options.IgnoreInlineComment { |
|
263 |
var i int |
|
264 |
if p.options.SpaceBeforeInlineComment { |
|
265 |
i = strings.Index(line, " #") |
|
266 |
if i == -1 { |
|
267 |
i = strings.Index(line, " ;") |
|
268 |
} |
|
269 |
|
|
270 |
} else { |
|
271 |
i = strings.IndexAny(line, "#;") |
|
272 |
} |
|
273 |
|
|
274 |
if i > -1 { |
|
275 |
p.comment.WriteString(line[i:]) |
|
276 |
line = strings.TrimSpace(line[:i]) |
|
277 |
} |
|
278 |
|
|
279 |
} |
|
280 |
|
|
281 |
// Trim single and double quotes |
|
282 |
if (hasSurroundedQuote(line, '\'') || |
|
283 |
hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote { |
|
284 |
line = line[1 : len(line)-1] |
|
285 |
} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols { |
|
286 |
if strings.Contains(line, `\;`) { |
|
287 |
line = strings.Replace(line, `\;`, ";", -1) |
|
288 |
} |
|
289 |
if strings.Contains(line, `\#`) { |
|
290 |
line = strings.Replace(line, `\#`, "#", -1) |
|
291 |
} |
|
292 |
} else if p.options.AllowPythonMultilineValues && lastChar == '\n' { |
|
293 |
return p.readPythonMultilines(line, bufferSize) |
|
294 |
} |
|
295 |
|
|
296 |
return line, nil |
|
297 |
} |
|
298 |
|
|
299 |
func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) { |
|
300 |
parserBufferPeekResult, _ := p.buf.Peek(bufferSize) |
|
301 |
peekBuffer := bytes.NewBuffer(parserBufferPeekResult) |
|
302 |
|
|
303 |
indentSize := 0 |
|
304 |
for { |
|
305 |
peekData, peekErr := peekBuffer.ReadBytes('\n') |
|
306 |
if peekErr != nil { |
|
307 |
if peekErr == io.EOF { |
|
308 |
p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line) |
|
309 |
return line, nil |
|
310 |
} |
|
311 |
|
|
312 |
p.debug("readPythonMultilines: failed to peek with error: %v", peekErr) |
|
313 |
return "", peekErr |
|
314 |
} |
|
315 |
|
|
316 |
p.debug("readPythonMultilines: parsing %q", string(peekData)) |
|
317 |
|
|
318 |
peekMatches := pythonMultiline.FindStringSubmatch(string(peekData)) |
|
319 |
p.debug("readPythonMultilines: matched %d parts", len(peekMatches)) |
|
320 |
for n, v := range peekMatches { |
|
321 |
p.debug(" %d: %q", n, v) |
|
322 |
} |
|
323 |
|
|
324 |
// Return if not a Python multiline value. |
|
325 |
if len(peekMatches) != 3 { |
|
326 |
p.debug("readPythonMultilines: end of value, got: %q", line) |
|
327 |
return line, nil |
|
328 |
} |
|
329 |
|
|
330 |
// Determine indent size and line prefix. |
|
331 |
currentIndentSize := len(peekMatches[1]) |
|
332 |
if indentSize < 1 { |
|
333 |
indentSize = currentIndentSize |
|
334 |
p.debug("readPythonMultilines: indent size is %d", indentSize) |
|
335 |
} |
|
336 |
|
|
337 |
// Make sure each line is indented at least as far as first line. |
|
338 |
if currentIndentSize < indentSize { |
|
339 |
p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line) |
|
340 |
return line, nil |
|
341 |
} |
|
342 |
|
|
343 |
// Advance the parser reader (buffer) in-sync with the peek buffer. |
|
344 |
_, err := p.buf.Discard(len(peekData)) |
|
345 |
if err != nil { |
|
346 |
p.debug("readPythonMultilines: failed to skip to the end, returning error") |
|
347 |
return "", err |
|
348 |
} |
|
349 |
|
|
350 |
// Handle indented empty line. |
|
351 |
line += "\n" + peekMatches[1][indentSize:] + peekMatches[2] |
|
352 |
} |
|
353 |
} |
|
354 |
|
|
355 |
// parse parses data through an io.Reader. |
|
356 |
func (f *File) parse(reader io.Reader) (err error) { |
|
357 |
p := newParser(reader, parserOptions{ |
|
358 |
IgnoreContinuation: f.options.IgnoreContinuation, |
|
359 |
IgnoreInlineComment: f.options.IgnoreInlineComment, |
|
360 |
AllowPythonMultilineValues: f.options.AllowPythonMultilineValues, |
|
361 |
SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment, |
|
362 |
UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes, |
|
363 |
UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols, |
|
364 |
PreserveSurroundedQuote: f.options.PreserveSurroundedQuote, |
|
365 |
DebugFunc: f.options.DebugFunc, |
|
366 |
ReaderBufferSize: f.options.ReaderBufferSize, |
|
367 |
}) |
|
368 |
if err = p.BOM(); err != nil { |
|
369 |
return fmt.Errorf("BOM: %v", err) |
|
370 |
} |
|
371 |
|
|
372 |
// Ignore error because default section name is never empty string. |
|
373 |
name := DefaultSection |
|
374 |
if f.options.Insensitive { |
|
375 |
name = strings.ToLower(DefaultSection) |
|
376 |
} |
|
377 |
section, _ := f.NewSection(name) |
|
378 |
|
|
379 |
// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key |
|
380 |
var isLastValueEmpty bool |
|
381 |
var lastRegularKey *Key |
|
382 |
|
|
383 |
var line []byte |
|
384 |
var inUnparseableSection bool |
|
385 |
|
|
386 |
// NOTE: Iterate and increase `currentPeekSize` until |
|
387 |
// the size of the parser buffer is found. |
|
388 |
// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`. |
|
389 |
parserBufferSize := 0 |
|
390 |
// NOTE: Peek 4kb at a time. |
|
391 |
currentPeekSize := minReaderBufferSize |
|
392 |
|
|
393 |
if f.options.AllowPythonMultilineValues { |
|
394 |
for { |
|
395 |
peekBytes, _ := p.buf.Peek(currentPeekSize) |
|
396 |
peekBytesLength := len(peekBytes) |
|
397 |
|
|
398 |
if parserBufferSize >= peekBytesLength { |
|
399 |
break |
|
400 |
} |
|
401 |
|
|
402 |
currentPeekSize *= 2 |
|
403 |
parserBufferSize = peekBytesLength |
|
404 |
} |
|
405 |
} |
|
406 |
|
|
407 |
for !p.isEOF { |
|
408 |
line, err = p.readUntil('\n') |
|
409 |
if err != nil { |
|
410 |
return err |
|
411 |
} |
|
412 |
|
|
413 |
if f.options.AllowNestedValues && |
|
414 |
isLastValueEmpty && len(line) > 0 { |
|
415 |
if line[0] == ' ' || line[0] == '\t' { |
|
416 |
lastRegularKey.addNestedValue(string(bytes.TrimSpace(line))) |
|
417 |
continue |
|
418 |
} |
|
419 |
} |
|
420 |
|
|
421 |
line = bytes.TrimLeftFunc(line, unicode.IsSpace) |
|
422 |
if len(line) == 0 { |
|
423 |
continue |
|
424 |
} |
|
425 |
|
|
426 |
// Comments |
|
427 |
if line[0] == '#' || line[0] == ';' { |
|
428 |
// Note: we do not care ending line break, |
|
429 |
// it is needed for adding second line, |
|
430 |
// so just clean it once at the end when set to value. |
|
431 |
p.comment.Write(line) |
|
432 |
continue |
|
433 |
} |
|
434 |
|
|
435 |
// Section |
|
436 |
if line[0] == '[' { |
|
437 |
// Read to the next ']' (TODO: support quoted strings) |
|
438 |
closeIdx := bytes.LastIndexByte(line, ']') |
|
439 |
if closeIdx == -1 { |
|
440 |
return fmt.Errorf("unclosed section: %s", line) |
|
441 |
} |
|
442 |
|
|
443 |
name := string(line[1:closeIdx]) |
|
444 |
section, err = f.NewSection(name) |
|
445 |
if err != nil { |
|
446 |
return err |
|
447 |
} |
|
448 |
|
|
449 |
comment, has := cleanComment(line[closeIdx+1:]) |
|
450 |
if has { |
|
451 |
p.comment.Write(comment) |
|
452 |
} |
|
453 |
|
|
454 |
section.Comment = strings.TrimSpace(p.comment.String()) |
|
455 |
|
|
456 |
// Reset auto-counter and comments |
|
457 |
p.comment.Reset() |
|
458 |
p.count = 1 |
|
459 |
|
|
460 |
inUnparseableSection = false |
|
461 |
for i := range f.options.UnparseableSections { |
|
462 |
if f.options.UnparseableSections[i] == name || |
|
463 |
(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) { |
|
464 |
inUnparseableSection = true |
|
465 |
continue |
|
466 |
} |
|
467 |
} |
|
468 |
continue |
|
469 |
} |
|
470 |
|
|
471 |
if inUnparseableSection { |
|
472 |
section.isRawSection = true |
|
473 |
section.rawBody += string(line) |
|
474 |
continue |
|
475 |
} |
|
476 |
|
|
477 |
kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line) |
|
478 |
if err != nil { |
|
479 |
// Treat as boolean key when desired, and whole line is key name. |
|
480 |
if IsErrDelimiterNotFound(err) { |
|
481 |
switch { |
|
482 |
case f.options.AllowBooleanKeys: |
|
483 |
kname, err := p.readValue(line, parserBufferSize) |
|
484 |
if err != nil { |
|
485 |
return err |
|
486 |
} |
|
487 |
key, err := section.NewBooleanKey(kname) |
|
488 |
if err != nil { |
|
489 |
return err |
|
490 |
} |
|
491 |
key.Comment = strings.TrimSpace(p.comment.String()) |
|
492 |
p.comment.Reset() |
|
493 |
continue |
|
494 |
|
|
495 |
case f.options.SkipUnrecognizableLines: |
|
496 |
continue |
|
497 |
} |
|
498 |
} |
|
499 |
return err |
|
500 |
} |
|
501 |
|
|
502 |
// Auto increment. |
|
503 |
isAutoIncr := false |
|
504 |
if kname == "-" { |
|
505 |
isAutoIncr = true |
|
506 |
kname = "#" + strconv.Itoa(p.count) |
|
507 |
p.count++ |
|
508 |
} |
|
509 |
|
|
510 |
value, err := p.readValue(line[offset:], parserBufferSize) |
|
511 |
if err != nil { |
|
512 |
return err |
|
513 |
} |
|
514 |
isLastValueEmpty = len(value) == 0 |
|
515 |
|
|
516 |
key, err := section.NewKey(kname, value) |
|
517 |
if err != nil { |
|
518 |
return err |
|
519 |
} |
|
520 |
key.isAutoIncrement = isAutoIncr |
|
521 |
key.Comment = strings.TrimSpace(p.comment.String()) |
|
522 |
p.comment.Reset() |
|
523 |
lastRegularKey = key |
|
524 |
} |
|
525 |
return nil |
|
526 |
} |