vendor/gopkg.in/ini.v1/parser.go
changeset 251 1c52a0eeb952
child 256 6d9efbef00a9
equal deleted inserted replaced
250:c040f992052f 251:1c52a0eeb952
       
     1 // Copyright 2015 Unknwon
       
     2 //
       
     3 // Licensed under the Apache License, Version 2.0 (the "License"): you may
       
     4 // not use this file except in compliance with the License. You may obtain
       
     5 // a copy of the License at
       
     6 //
       
     7 //     http://www.apache.org/licenses/LICENSE-2.0
       
     8 //
       
     9 // Unless required by applicable law or agreed to in writing, software
       
    10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
       
    11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
       
    12 // License for the specific language governing permissions and limitations
       
    13 // under the License.
       
    14 
       
    15 package ini
       
    16 
       
    17 import (
       
    18 	"bufio"
       
    19 	"bytes"
       
    20 	"fmt"
       
    21 	"io"
       
    22 	"regexp"
       
    23 	"strconv"
       
    24 	"strings"
       
    25 	"unicode"
       
    26 )
       
    27 
       
    28 const minReaderBufferSize = 4096
       
    29 
       
    30 var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
       
    31 
       
    32 type parserOptions struct {
       
    33 	IgnoreContinuation          bool
       
    34 	IgnoreInlineComment         bool
       
    35 	AllowPythonMultilineValues  bool
       
    36 	SpaceBeforeInlineComment    bool
       
    37 	UnescapeValueDoubleQuotes   bool
       
    38 	UnescapeValueCommentSymbols bool
       
    39 	PreserveSurroundedQuote     bool
       
    40 	DebugFunc                   DebugFunc
       
    41 	ReaderBufferSize            int
       
    42 }
       
    43 
       
    44 type parser struct {
       
    45 	buf     *bufio.Reader
       
    46 	options parserOptions
       
    47 
       
    48 	isEOF   bool
       
    49 	count   int
       
    50 	comment *bytes.Buffer
       
    51 }
       
    52 
       
    53 func (p *parser) debug(format string, args ...interface{}) {
       
    54 	if p.options.DebugFunc != nil {
       
    55 		p.options.DebugFunc(fmt.Sprintf(format, args...))
       
    56 	}
       
    57 }
       
    58 
       
    59 func newParser(r io.Reader, opts parserOptions) *parser {
       
    60 	size := opts.ReaderBufferSize
       
    61 	if size < minReaderBufferSize {
       
    62 		size = minReaderBufferSize
       
    63 	}
       
    64 
       
    65 	return &parser{
       
    66 		buf:     bufio.NewReaderSize(r, size),
       
    67 		options: opts,
       
    68 		count:   1,
       
    69 		comment: &bytes.Buffer{},
       
    70 	}
       
    71 }
       
    72 
       
    73 // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
       
    74 // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
       
    75 func (p *parser) BOM() error {
       
    76 	mask, err := p.buf.Peek(2)
       
    77 	if err != nil && err != io.EOF {
       
    78 		return err
       
    79 	} else if len(mask) < 2 {
       
    80 		return nil
       
    81 	}
       
    82 
       
    83 	switch {
       
    84 	case mask[0] == 254 && mask[1] == 255:
       
    85 		fallthrough
       
    86 	case mask[0] == 255 && mask[1] == 254:
       
    87 		p.buf.Read(mask)
       
    88 	case mask[0] == 239 && mask[1] == 187:
       
    89 		mask, err := p.buf.Peek(3)
       
    90 		if err != nil && err != io.EOF {
       
    91 			return err
       
    92 		} else if len(mask) < 3 {
       
    93 			return nil
       
    94 		}
       
    95 		if mask[2] == 191 {
       
    96 			p.buf.Read(mask)
       
    97 		}
       
    98 	}
       
    99 	return nil
       
   100 }
       
   101 
       
   102 func (p *parser) readUntil(delim byte) ([]byte, error) {
       
   103 	data, err := p.buf.ReadBytes(delim)
       
   104 	if err != nil {
       
   105 		if err == io.EOF {
       
   106 			p.isEOF = true
       
   107 		} else {
       
   108 			return nil, err
       
   109 		}
       
   110 	}
       
   111 	return data, nil
       
   112 }
       
   113 
       
   114 func cleanComment(in []byte) ([]byte, bool) {
       
   115 	i := bytes.IndexAny(in, "#;")
       
   116 	if i == -1 {
       
   117 		return nil, false
       
   118 	}
       
   119 	return in[i:], true
       
   120 }
       
   121 
       
   122 func readKeyName(delimiters string, in []byte) (string, int, error) {
       
   123 	line := string(in)
       
   124 
       
   125 	// Check if key name surrounded by quotes.
       
   126 	var keyQuote string
       
   127 	if line[0] == '"' {
       
   128 		if len(line) > 6 && string(line[0:3]) == `"""` {
       
   129 			keyQuote = `"""`
       
   130 		} else {
       
   131 			keyQuote = `"`
       
   132 		}
       
   133 	} else if line[0] == '`' {
       
   134 		keyQuote = "`"
       
   135 	}
       
   136 
       
   137 	// Get out key name
       
   138 	endIdx := -1
       
   139 	if len(keyQuote) > 0 {
       
   140 		startIdx := len(keyQuote)
       
   141 		// FIXME: fail case -> """"""name"""=value
       
   142 		pos := strings.Index(line[startIdx:], keyQuote)
       
   143 		if pos == -1 {
       
   144 			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
       
   145 		}
       
   146 		pos += startIdx
       
   147 
       
   148 		// Find key-value delimiter
       
   149 		i := strings.IndexAny(line[pos+startIdx:], delimiters)
       
   150 		if i < 0 {
       
   151 			return "", -1, ErrDelimiterNotFound{line}
       
   152 		}
       
   153 		endIdx = pos + i
       
   154 		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
       
   155 	}
       
   156 
       
   157 	endIdx = strings.IndexAny(line, delimiters)
       
   158 	if endIdx < 0 {
       
   159 		return "", -1, ErrDelimiterNotFound{line}
       
   160 	}
       
   161 	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
       
   162 }
       
   163 
       
   164 func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
       
   165 	for {
       
   166 		data, err := p.readUntil('\n')
       
   167 		if err != nil {
       
   168 			return "", err
       
   169 		}
       
   170 		next := string(data)
       
   171 
       
   172 		pos := strings.LastIndex(next, valQuote)
       
   173 		if pos > -1 {
       
   174 			val += next[:pos]
       
   175 
       
   176 			comment, has := cleanComment([]byte(next[pos:]))
       
   177 			if has {
       
   178 				p.comment.Write(bytes.TrimSpace(comment))
       
   179 			}
       
   180 			break
       
   181 		}
       
   182 		val += next
       
   183 		if p.isEOF {
       
   184 			return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
       
   185 		}
       
   186 	}
       
   187 	return val, nil
       
   188 }
       
   189 
       
   190 func (p *parser) readContinuationLines(val string) (string, error) {
       
   191 	for {
       
   192 		data, err := p.readUntil('\n')
       
   193 		if err != nil {
       
   194 			return "", err
       
   195 		}
       
   196 		next := strings.TrimSpace(string(data))
       
   197 
       
   198 		if len(next) == 0 {
       
   199 			break
       
   200 		}
       
   201 		val += next
       
   202 		if val[len(val)-1] != '\\' {
       
   203 			break
       
   204 		}
       
   205 		val = val[:len(val)-1]
       
   206 	}
       
   207 	return val, nil
       
   208 }
       
   209 
       
   210 // hasSurroundedQuote check if and only if the first and last characters
       
   211 // are quotes \" or \'.
       
   212 // It returns false if any other parts also contain same kind of quotes.
       
   213 func hasSurroundedQuote(in string, quote byte) bool {
       
   214 	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
       
   215 		strings.IndexByte(in[1:], quote) == len(in)-2
       
   216 }
       
   217 
       
   218 func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
       
   219 
       
   220 	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
       
   221 	if len(line) == 0 {
       
   222 		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
       
   223 			return p.readPythonMultilines(line, bufferSize)
       
   224 		}
       
   225 		return "", nil
       
   226 	}
       
   227 
       
   228 	var valQuote string
       
   229 	if len(line) > 3 && string(line[0:3]) == `"""` {
       
   230 		valQuote = `"""`
       
   231 	} else if line[0] == '`' {
       
   232 		valQuote = "`"
       
   233 	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
       
   234 		valQuote = `"`
       
   235 	}
       
   236 
       
   237 	if len(valQuote) > 0 {
       
   238 		startIdx := len(valQuote)
       
   239 		pos := strings.LastIndex(line[startIdx:], valQuote)
       
   240 		// Check for multi-line value
       
   241 		if pos == -1 {
       
   242 			return p.readMultilines(line, line[startIdx:], valQuote)
       
   243 		}
       
   244 
       
   245 		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
       
   246 			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
       
   247 		}
       
   248 		return line[startIdx : pos+startIdx], nil
       
   249 	}
       
   250 
       
   251 	lastChar := line[len(line)-1]
       
   252 	// Won't be able to reach here if value only contains whitespace
       
   253 	line = strings.TrimSpace(line)
       
   254 	trimmedLastChar := line[len(line)-1]
       
   255 
       
   256 	// Check continuation lines when desired
       
   257 	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
       
   258 		return p.readContinuationLines(line[:len(line)-1])
       
   259 	}
       
   260 
       
   261 	// Check if ignore inline comment
       
   262 	if !p.options.IgnoreInlineComment {
       
   263 		var i int
       
   264 		if p.options.SpaceBeforeInlineComment {
       
   265 			i = strings.Index(line, " #")
       
   266 			if i == -1 {
       
   267 				i = strings.Index(line, " ;")
       
   268 			}
       
   269 
       
   270 		} else {
       
   271 			i = strings.IndexAny(line, "#;")
       
   272 		}
       
   273 
       
   274 		if i > -1 {
       
   275 			p.comment.WriteString(line[i:])
       
   276 			line = strings.TrimSpace(line[:i])
       
   277 		}
       
   278 
       
   279 	}
       
   280 
       
   281 	// Trim single and double quotes
       
   282 	if (hasSurroundedQuote(line, '\'') ||
       
   283 		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
       
   284 		line = line[1 : len(line)-1]
       
   285 	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
       
   286 		if strings.Contains(line, `\;`) {
       
   287 			line = strings.Replace(line, `\;`, ";", -1)
       
   288 		}
       
   289 		if strings.Contains(line, `\#`) {
       
   290 			line = strings.Replace(line, `\#`, "#", -1)
       
   291 		}
       
   292 	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
       
   293 		return p.readPythonMultilines(line, bufferSize)
       
   294 	}
       
   295 
       
   296 	return line, nil
       
   297 }
       
   298 
       
   299 func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
       
   300 	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
       
   301 	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
       
   302 
       
   303 	indentSize := 0
       
   304 	for {
       
   305 		peekData, peekErr := peekBuffer.ReadBytes('\n')
       
   306 		if peekErr != nil {
       
   307 			if peekErr == io.EOF {
       
   308 				p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
       
   309 				return line, nil
       
   310 			}
       
   311 
       
   312 			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
       
   313 			return "", peekErr
       
   314 		}
       
   315 
       
   316 		p.debug("readPythonMultilines: parsing %q", string(peekData))
       
   317 
       
   318 		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
       
   319 		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
       
   320 		for n, v := range peekMatches {
       
   321 			p.debug("   %d: %q", n, v)
       
   322 		}
       
   323 
       
   324 		// Return if not a Python multiline value.
       
   325 		if len(peekMatches) != 3 {
       
   326 			p.debug("readPythonMultilines: end of value, got: %q", line)
       
   327 			return line, nil
       
   328 		}
       
   329 
       
   330 		// Determine indent size and line prefix.
       
   331 		currentIndentSize := len(peekMatches[1])
       
   332 		if indentSize < 1 {
       
   333 			indentSize = currentIndentSize
       
   334 			p.debug("readPythonMultilines: indent size is %d", indentSize)
       
   335 		}
       
   336 
       
   337 		// Make sure each line is indented at least as far as first line.
       
   338 		if currentIndentSize < indentSize {
       
   339 			p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
       
   340 			return line, nil
       
   341 		}
       
   342 
       
   343 		// Advance the parser reader (buffer) in-sync with the peek buffer.
       
   344 		_, err := p.buf.Discard(len(peekData))
       
   345 		if err != nil {
       
   346 			p.debug("readPythonMultilines: failed to skip to the end, returning error")
       
   347 			return "", err
       
   348 		}
       
   349 
       
   350 		// Handle indented empty line.
       
   351 		line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
       
   352 	}
       
   353 }
       
   354 
       
   355 // parse parses data through an io.Reader.
       
   356 func (f *File) parse(reader io.Reader) (err error) {
       
   357 	p := newParser(reader, parserOptions{
       
   358 		IgnoreContinuation:          f.options.IgnoreContinuation,
       
   359 		IgnoreInlineComment:         f.options.IgnoreInlineComment,
       
   360 		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
       
   361 		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
       
   362 		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
       
   363 		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
       
   364 		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
       
   365 		DebugFunc:                   f.options.DebugFunc,
       
   366 		ReaderBufferSize:            f.options.ReaderBufferSize,
       
   367 	})
       
   368 	if err = p.BOM(); err != nil {
       
   369 		return fmt.Errorf("BOM: %v", err)
       
   370 	}
       
   371 
       
   372 	// Ignore error because default section name is never empty string.
       
   373 	name := DefaultSection
       
   374 	if f.options.Insensitive {
       
   375 		name = strings.ToLower(DefaultSection)
       
   376 	}
       
   377 	section, _ := f.NewSection(name)
       
   378 
       
   379 	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
       
   380 	var isLastValueEmpty bool
       
   381 	var lastRegularKey *Key
       
   382 
       
   383 	var line []byte
       
   384 	var inUnparseableSection bool
       
   385 
       
   386 	// NOTE: Iterate and increase `currentPeekSize` until
       
   387 	// the size of the parser buffer is found.
       
   388 	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
       
   389 	parserBufferSize := 0
       
   390 	// NOTE: Peek 4kb at a time.
       
   391 	currentPeekSize := minReaderBufferSize
       
   392 
       
   393 	if f.options.AllowPythonMultilineValues {
       
   394 		for {
       
   395 			peekBytes, _ := p.buf.Peek(currentPeekSize)
       
   396 			peekBytesLength := len(peekBytes)
       
   397 
       
   398 			if parserBufferSize >= peekBytesLength {
       
   399 				break
       
   400 			}
       
   401 
       
   402 			currentPeekSize *= 2
       
   403 			parserBufferSize = peekBytesLength
       
   404 		}
       
   405 	}
       
   406 
       
   407 	for !p.isEOF {
       
   408 		line, err = p.readUntil('\n')
       
   409 		if err != nil {
       
   410 			return err
       
   411 		}
       
   412 
       
   413 		if f.options.AllowNestedValues &&
       
   414 			isLastValueEmpty && len(line) > 0 {
       
   415 			if line[0] == ' ' || line[0] == '\t' {
       
   416 				lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
       
   417 				continue
       
   418 			}
       
   419 		}
       
   420 
       
   421 		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
       
   422 		if len(line) == 0 {
       
   423 			continue
       
   424 		}
       
   425 
       
   426 		// Comments
       
   427 		if line[0] == '#' || line[0] == ';' {
       
   428 			// Note: we do not care ending line break,
       
   429 			// it is needed for adding second line,
       
   430 			// so just clean it once at the end when set to value.
       
   431 			p.comment.Write(line)
       
   432 			continue
       
   433 		}
       
   434 
       
   435 		// Section
       
   436 		if line[0] == '[' {
       
   437 			// Read to the next ']' (TODO: support quoted strings)
       
   438 			closeIdx := bytes.LastIndexByte(line, ']')
       
   439 			if closeIdx == -1 {
       
   440 				return fmt.Errorf("unclosed section: %s", line)
       
   441 			}
       
   442 
       
   443 			name := string(line[1:closeIdx])
       
   444 			section, err = f.NewSection(name)
       
   445 			if err != nil {
       
   446 				return err
       
   447 			}
       
   448 
       
   449 			comment, has := cleanComment(line[closeIdx+1:])
       
   450 			if has {
       
   451 				p.comment.Write(comment)
       
   452 			}
       
   453 
       
   454 			section.Comment = strings.TrimSpace(p.comment.String())
       
   455 
       
   456 			// Reset auto-counter and comments
       
   457 			p.comment.Reset()
       
   458 			p.count = 1
       
   459 
       
   460 			inUnparseableSection = false
       
   461 			for i := range f.options.UnparseableSections {
       
   462 				if f.options.UnparseableSections[i] == name ||
       
   463 					(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
       
   464 					inUnparseableSection = true
       
   465 					continue
       
   466 				}
       
   467 			}
       
   468 			continue
       
   469 		}
       
   470 
       
   471 		if inUnparseableSection {
       
   472 			section.isRawSection = true
       
   473 			section.rawBody += string(line)
       
   474 			continue
       
   475 		}
       
   476 
       
   477 		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
       
   478 		if err != nil {
       
   479 			// Treat as boolean key when desired, and whole line is key name.
       
   480 			if IsErrDelimiterNotFound(err) {
       
   481 				switch {
       
   482 				case f.options.AllowBooleanKeys:
       
   483 					kname, err := p.readValue(line, parserBufferSize)
       
   484 					if err != nil {
       
   485 						return err
       
   486 					}
       
   487 					key, err := section.NewBooleanKey(kname)
       
   488 					if err != nil {
       
   489 						return err
       
   490 					}
       
   491 					key.Comment = strings.TrimSpace(p.comment.String())
       
   492 					p.comment.Reset()
       
   493 					continue
       
   494 
       
   495 				case f.options.SkipUnrecognizableLines:
       
   496 					continue
       
   497 				}
       
   498 			}
       
   499 			return err
       
   500 		}
       
   501 
       
   502 		// Auto increment.
       
   503 		isAutoIncr := false
       
   504 		if kname == "-" {
       
   505 			isAutoIncr = true
       
   506 			kname = "#" + strconv.Itoa(p.count)
       
   507 			p.count++
       
   508 		}
       
   509 
       
   510 		value, err := p.readValue(line[offset:], parserBufferSize)
       
   511 		if err != nil {
       
   512 			return err
       
   513 		}
       
   514 		isLastValueEmpty = len(value) == 0
       
   515 
       
   516 		key, err := section.NewKey(kname, value)
       
   517 		if err != nil {
       
   518 			return err
       
   519 		}
       
   520 		key.isAutoIncrement = isAutoIncr
       
   521 		key.Comment = strings.TrimSpace(p.comment.String())
       
   522 		p.comment.Reset()
       
   523 		lastRegularKey = key
       
   524 	}
       
   525 	return nil
       
   526 }