vendor/github.com/magiconair/properties/lex.go
changeset 242 2a9ec03fe5a1
child 260 445e01aede7e
equal deleted inserted replaced
241:e77dad242f4c 242:2a9ec03fe5a1
       
     1 // Copyright 2018 Frank Schroeder. All rights reserved.
       
     2 // Use of this source code is governed by a BSD-style
       
     3 // license that can be found in the LICENSE file.
       
     4 //
       
     5 // Parts of the lexer are from the template/text/parser package
       
     6 // For these parts the following applies:
       
     7 //
       
     8 // Copyright 2011 The Go Authors. All rights reserved.
       
     9 // Use of this source code is governed by a BSD-style
       
    10 // license that can be found in the LICENSE file of the go 1.2
       
    11 // distribution.
       
    12 
       
    13 package properties
       
    14 
       
    15 import (
       
    16 	"fmt"
       
    17 	"strconv"
       
    18 	"strings"
       
    19 	"unicode/utf8"
       
    20 )
       
    21 
       
    22 // item represents a token or text string returned from the scanner.
       
    23 type item struct {
       
    24 	typ itemType // The type of this item.
       
    25 	pos int      // The starting position, in bytes, of this item in the input string.
       
    26 	val string   // The value of this item.
       
    27 }
       
    28 
       
    29 func (i item) String() string {
       
    30 	switch {
       
    31 	case i.typ == itemEOF:
       
    32 		return "EOF"
       
    33 	case i.typ == itemError:
       
    34 		return i.val
       
    35 	case len(i.val) > 10:
       
    36 		return fmt.Sprintf("%.10q...", i.val)
       
    37 	}
       
    38 	return fmt.Sprintf("%q", i.val)
       
    39 }
       
    40 
       
    41 // itemType identifies the type of lex items.
       
    42 type itemType int
       
    43 
       
    44 const (
       
    45 	itemError itemType = iota // error occurred; value is text of error
       
    46 	itemEOF
       
    47 	itemKey     // a key
       
    48 	itemValue   // a value
       
    49 	itemComment // a comment
       
    50 )
       
    51 
       
    52 // defines a constant for EOF
       
    53 const eof = -1
       
    54 
       
    55 // permitted whitespace characters space, FF and TAB
       
    56 const whitespace = " \f\t"
       
    57 
       
    58 // stateFn represents the state of the scanner as a function that returns the next state.
       
    59 type stateFn func(*lexer) stateFn
       
    60 
       
    61 // lexer holds the state of the scanner.
       
    62 type lexer struct {
       
    63 	input   string    // the string being scanned
       
    64 	state   stateFn   // the next lexing function to enter
       
    65 	pos     int       // current position in the input
       
    66 	start   int       // start position of this item
       
    67 	width   int       // width of last rune read from input
       
    68 	lastPos int       // position of most recent item returned by nextItem
       
    69 	runes   []rune    // scanned runes for this item
       
    70 	items   chan item // channel of scanned items
       
    71 }
       
    72 
       
    73 // next returns the next rune in the input.
       
    74 func (l *lexer) next() rune {
       
    75 	if l.pos >= len(l.input) {
       
    76 		l.width = 0
       
    77 		return eof
       
    78 	}
       
    79 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
       
    80 	l.width = w
       
    81 	l.pos += l.width
       
    82 	return r
       
    83 }
       
    84 
       
    85 // peek returns but does not consume the next rune in the input.
       
    86 func (l *lexer) peek() rune {
       
    87 	r := l.next()
       
    88 	l.backup()
       
    89 	return r
       
    90 }
       
    91 
       
    92 // backup steps back one rune. Can only be called once per call of next.
       
    93 func (l *lexer) backup() {
       
    94 	l.pos -= l.width
       
    95 }
       
    96 
       
    97 // emit passes an item back to the client.
       
    98 func (l *lexer) emit(t itemType) {
       
    99 	i := item{t, l.start, string(l.runes)}
       
   100 	l.items <- i
       
   101 	l.start = l.pos
       
   102 	l.runes = l.runes[:0]
       
   103 }
       
   104 
       
   105 // ignore skips over the pending input before this point.
       
   106 func (l *lexer) ignore() {
       
   107 	l.start = l.pos
       
   108 }
       
   109 
       
   110 // appends the rune to the current value
       
   111 func (l *lexer) appendRune(r rune) {
       
   112 	l.runes = append(l.runes, r)
       
   113 }
       
   114 
       
   115 // accept consumes the next rune if it's from the valid set.
       
   116 func (l *lexer) accept(valid string) bool {
       
   117 	if strings.ContainsRune(valid, l.next()) {
       
   118 		return true
       
   119 	}
       
   120 	l.backup()
       
   121 	return false
       
   122 }
       
   123 
       
   124 // acceptRun consumes a run of runes from the valid set.
       
   125 func (l *lexer) acceptRun(valid string) {
       
   126 	for strings.ContainsRune(valid, l.next()) {
       
   127 	}
       
   128 	l.backup()
       
   129 }
       
   130 
       
   131 // acceptRunUntil consumes a run of runes up to a terminator.
       
   132 func (l *lexer) acceptRunUntil(term rune) {
       
   133 	for term != l.next() {
       
   134 	}
       
   135 	l.backup()
       
   136 }
       
   137 
       
   138 // hasText returns true if the current parsed text is not empty.
       
   139 func (l *lexer) isNotEmpty() bool {
       
   140 	return l.pos > l.start
       
   141 }
       
   142 
       
   143 // lineNumber reports which line we're on, based on the position of
       
   144 // the previous item returned by nextItem. Doing it this way
       
   145 // means we don't have to worry about peek double counting.
       
   146 func (l *lexer) lineNumber() int {
       
   147 	return 1 + strings.Count(l.input[:l.lastPos], "\n")
       
   148 }
       
   149 
       
   150 // errorf returns an error token and terminates the scan by passing
       
   151 // back a nil pointer that will be the next state, terminating l.nextItem.
       
   152 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
       
   153 	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
       
   154 	return nil
       
   155 }
       
   156 
       
   157 // nextItem returns the next item from the input.
       
   158 func (l *lexer) nextItem() item {
       
   159 	i := <-l.items
       
   160 	l.lastPos = i.pos
       
   161 	return i
       
   162 }
       
   163 
       
   164 // lex creates a new scanner for the input string.
       
   165 func lex(input string) *lexer {
       
   166 	l := &lexer{
       
   167 		input: input,
       
   168 		items: make(chan item),
       
   169 		runes: make([]rune, 0, 32),
       
   170 	}
       
   171 	go l.run()
       
   172 	return l
       
   173 }
       
   174 
       
   175 // run runs the state machine for the lexer.
       
   176 func (l *lexer) run() {
       
   177 	for l.state = lexBeforeKey(l); l.state != nil; {
       
   178 		l.state = l.state(l)
       
   179 	}
       
   180 }
       
   181 
       
   182 // state functions
       
   183 
       
   184 // lexBeforeKey scans until a key begins.
       
   185 func lexBeforeKey(l *lexer) stateFn {
       
   186 	switch r := l.next(); {
       
   187 	case isEOF(r):
       
   188 		l.emit(itemEOF)
       
   189 		return nil
       
   190 
       
   191 	case isEOL(r):
       
   192 		l.ignore()
       
   193 		return lexBeforeKey
       
   194 
       
   195 	case isComment(r):
       
   196 		return lexComment
       
   197 
       
   198 	case isWhitespace(r):
       
   199 		l.ignore()
       
   200 		return lexBeforeKey
       
   201 
       
   202 	default:
       
   203 		l.backup()
       
   204 		return lexKey
       
   205 	}
       
   206 }
       
   207 
       
   208 // lexComment scans a comment line. The comment character has already been scanned.
       
   209 func lexComment(l *lexer) stateFn {
       
   210 	l.acceptRun(whitespace)
       
   211 	l.ignore()
       
   212 	for {
       
   213 		switch r := l.next(); {
       
   214 		case isEOF(r):
       
   215 			l.ignore()
       
   216 			l.emit(itemEOF)
       
   217 			return nil
       
   218 		case isEOL(r):
       
   219 			l.emit(itemComment)
       
   220 			return lexBeforeKey
       
   221 		default:
       
   222 			l.appendRune(r)
       
   223 		}
       
   224 	}
       
   225 }
       
   226 
       
   227 // lexKey scans the key up to a delimiter
       
   228 func lexKey(l *lexer) stateFn {
       
   229 	var r rune
       
   230 
       
   231 Loop:
       
   232 	for {
       
   233 		switch r = l.next(); {
       
   234 
       
   235 		case isEscape(r):
       
   236 			err := l.scanEscapeSequence()
       
   237 			if err != nil {
       
   238 				return l.errorf(err.Error())
       
   239 			}
       
   240 
       
   241 		case isEndOfKey(r):
       
   242 			l.backup()
       
   243 			break Loop
       
   244 
       
   245 		case isEOF(r):
       
   246 			break Loop
       
   247 
       
   248 		default:
       
   249 			l.appendRune(r)
       
   250 		}
       
   251 	}
       
   252 
       
   253 	if len(l.runes) > 0 {
       
   254 		l.emit(itemKey)
       
   255 	}
       
   256 
       
   257 	if isEOF(r) {
       
   258 		l.emit(itemEOF)
       
   259 		return nil
       
   260 	}
       
   261 
       
   262 	return lexBeforeValue
       
   263 }
       
   264 
       
   265 // lexBeforeValue scans the delimiter between key and value.
       
   266 // Leading and trailing whitespace is ignored.
       
   267 // We expect to be just after the key.
       
   268 func lexBeforeValue(l *lexer) stateFn {
       
   269 	l.acceptRun(whitespace)
       
   270 	l.accept(":=")
       
   271 	l.acceptRun(whitespace)
       
   272 	l.ignore()
       
   273 	return lexValue
       
   274 }
       
   275 
       
   276 // lexValue scans text until the end of the line. We expect to be just after the delimiter.
       
   277 func lexValue(l *lexer) stateFn {
       
   278 	for {
       
   279 		switch r := l.next(); {
       
   280 		case isEscape(r):
       
   281 			if isEOL(l.peek()) {
       
   282 				l.next()
       
   283 				l.acceptRun(whitespace)
       
   284 			} else {
       
   285 				err := l.scanEscapeSequence()
       
   286 				if err != nil {
       
   287 					return l.errorf(err.Error())
       
   288 				}
       
   289 			}
       
   290 
       
   291 		case isEOL(r):
       
   292 			l.emit(itemValue)
       
   293 			l.ignore()
       
   294 			return lexBeforeKey
       
   295 
       
   296 		case isEOF(r):
       
   297 			l.emit(itemValue)
       
   298 			l.emit(itemEOF)
       
   299 			return nil
       
   300 
       
   301 		default:
       
   302 			l.appendRune(r)
       
   303 		}
       
   304 	}
       
   305 }
       
   306 
       
   307 // scanEscapeSequence scans either one of the escaped characters
       
   308 // or a unicode literal. We expect to be after the escape character.
       
   309 func (l *lexer) scanEscapeSequence() error {
       
   310 	switch r := l.next(); {
       
   311 
       
   312 	case isEscapedCharacter(r):
       
   313 		l.appendRune(decodeEscapedCharacter(r))
       
   314 		return nil
       
   315 
       
   316 	case atUnicodeLiteral(r):
       
   317 		return l.scanUnicodeLiteral()
       
   318 
       
   319 	case isEOF(r):
       
   320 		return fmt.Errorf("premature EOF")
       
   321 
       
   322 	// silently drop the escape character and append the rune as is
       
   323 	default:
       
   324 		l.appendRune(r)
       
   325 		return nil
       
   326 	}
       
   327 }
       
   328 
       
   329 // scans a unicode literal in the form \uXXXX. We expect to be after the \u.
       
   330 func (l *lexer) scanUnicodeLiteral() error {
       
   331 	// scan the digits
       
   332 	d := make([]rune, 4)
       
   333 	for i := 0; i < 4; i++ {
       
   334 		d[i] = l.next()
       
   335 		if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
       
   336 			return fmt.Errorf("invalid unicode literal")
       
   337 		}
       
   338 	}
       
   339 
       
   340 	// decode the digits into a rune
       
   341 	r, err := strconv.ParseInt(string(d), 16, 0)
       
   342 	if err != nil {
       
   343 		return err
       
   344 	}
       
   345 
       
   346 	l.appendRune(rune(r))
       
   347 	return nil
       
   348 }
       
   349 
       
   350 // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
       
   351 func decodeEscapedCharacter(r rune) rune {
       
   352 	switch r {
       
   353 	case 'f':
       
   354 		return '\f'
       
   355 	case 'n':
       
   356 		return '\n'
       
   357 	case 'r':
       
   358 		return '\r'
       
   359 	case 't':
       
   360 		return '\t'
       
   361 	default:
       
   362 		return r
       
   363 	}
       
   364 }
       
   365 
       
   366 // atUnicodeLiteral reports whether we are at a unicode literal.
       
   367 // The escape character has already been consumed.
       
   368 func atUnicodeLiteral(r rune) bool {
       
   369 	return r == 'u'
       
   370 }
       
   371 
       
   372 // isComment reports whether we are at the start of a comment.
       
   373 func isComment(r rune) bool {
       
   374 	return r == '#' || r == '!'
       
   375 }
       
   376 
       
   377 // isEndOfKey reports whether the rune terminates the current key.
       
   378 func isEndOfKey(r rune) bool {
       
   379 	return strings.ContainsRune(" \f\t\r\n:=", r)
       
   380 }
       
   381 
       
   382 // isEOF reports whether we are at EOF.
       
   383 func isEOF(r rune) bool {
       
   384 	return r == eof
       
   385 }
       
   386 
       
   387 // isEOL reports whether we are at a new line character.
       
   388 func isEOL(r rune) bool {
       
   389 	return r == '\n' || r == '\r'
       
   390 }
       
   391 
       
   392 // isEscape reports whether the rune is the escape character which
       
   393 // prefixes unicode literals and other escaped characters.
       
   394 func isEscape(r rune) bool {
       
   395 	return r == '\\'
       
   396 }
       
   397 
       
   398 // isEscapedCharacter reports whether we are at one of the characters that need escaping.
       
   399 // The escape character has already been consumed.
       
   400 func isEscapedCharacter(r rune) bool {
       
   401 	return strings.ContainsRune(" :=fnrt", r)
       
   402 }
       
   403 
       
   404 // isWhitespace reports whether the rune is a whitespace character.
       
   405 func isWhitespace(r rune) bool {
       
   406 	return strings.ContainsRune(whitespace, r)
       
   407 }