vendor/github.com/magiconair/properties/lex.go
author rjp <zimpenfish@gmail.com>
Mon, 23 Jan 2023 16:39:02 +0000
changeset 267 5b91a65ba95a
parent 260 445e01aede7e
child 265 05c40b36d3b2
permissions -rw-r--r--
Update to handle non-int64 IDs Pleroma/Akkoma and GotoSocial use opaque IDs rather than `int64`s like Mastodon which means that `madon` can't talk to either of those. This commit updates everything that can be an ID to `madon.ActivityID` which is an alias for `string` - can't create a specific type for it since there's more than a few places where they're concatenated directly to strings for URLs, etc. Which means it could just as easily be a direct `string` type itself but I find that having distinct types can often make the code more readable and understandable. One extra bit is that `statusOpts` has grown a `_hasReplyTo` boolean to indicate whether the `--in-reply-to` flag was given or not because we can't distinguish because "empty because default" or "empty because given and empty". Another way around this would be to set the default to some theoretically impossible or unlikely string but you never know when someone might spin up an instance where, e.g., admin posts have negative integer IDs.

// Copyright 2018 Frank Schroeder. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// Parts of the lexer are from the template/text/parser package
// For these parts the following applies:
//
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file of the go 1.2
// distribution.

package properties

import (
	"fmt"
	"strconv"
	"strings"
	"unicode/utf8"
)

// item represents a token or text string returned from the scanner.
type item struct {
	typ itemType // The type of this item.
	pos int      // The starting position, in bytes, of this item in the input string.
	val string   // The value of this item.
}

func (i item) String() string {
	switch {
	case i.typ == itemEOF:
		return "EOF"
	case i.typ == itemError:
		return i.val
	case len(i.val) > 10:
		return fmt.Sprintf("%.10q...", i.val)
	}
	return fmt.Sprintf("%q", i.val)
}

// itemType identifies the type of lex items.
type itemType int

const (
	itemError itemType = iota // error occurred; value is text of error
	itemEOF
	itemKey     // a key
	itemValue   // a value
	itemComment // a comment
)

// defines a constant for EOF
const eof = -1

// permitted whitespace characters space, FF and TAB
const whitespace = " \f\t"

// stateFn represents the state of the scanner as a function that returns the next state.
type stateFn func(*lexer) stateFn

// lexer holds the state of the scanner.
type lexer struct {
	input   string    // the string being scanned
	state   stateFn   // the next lexing function to enter
	pos     int       // current position in the input
	start   int       // start position of this item
	width   int       // width of last rune read from input
	lastPos int       // position of most recent item returned by nextItem
	runes   []rune    // scanned runes for this item
	items   chan item // channel of scanned items
}

// next returns the next rune in the input.
func (l *lexer) next() rune {
	if l.pos >= len(l.input) {
		l.width = 0
		return eof
	}
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
	l.width = w
	l.pos += l.width
	return r
}

// peek returns but does not consume the next rune in the input.
func (l *lexer) peek() rune {
	r := l.next()
	l.backup()
	return r
}

// backup steps back one rune. Can only be called once per call of next.
func (l *lexer) backup() {
	l.pos -= l.width
}

// emit passes an item back to the client.
func (l *lexer) emit(t itemType) {
	i := item{t, l.start, string(l.runes)}
	l.items <- i
	l.start = l.pos
	l.runes = l.runes[:0]
}

// ignore skips over the pending input before this point.
func (l *lexer) ignore() {
	l.start = l.pos
}

// appends the rune to the current value
func (l *lexer) appendRune(r rune) {
	l.runes = append(l.runes, r)
}

// accept consumes the next rune if it's from the valid set.
func (l *lexer) accept(valid string) bool {
	if strings.ContainsRune(valid, l.next()) {
		return true
	}
	l.backup()
	return false
}

// acceptRun consumes a run of runes from the valid set.
func (l *lexer) acceptRun(valid string) {
	for strings.ContainsRune(valid, l.next()) {
	}
	l.backup()
}

// lineNumber reports which line we're on, based on the position of
// the previous item returned by nextItem. Doing it this way
// means we don't have to worry about peek double counting.
func (l *lexer) lineNumber() int {
	return 1 + strings.Count(l.input[:l.lastPos], "\n")
}

// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
	return nil
}

// nextItem returns the next item from the input.
func (l *lexer) nextItem() item {
	i := <-l.items
	l.lastPos = i.pos
	return i
}

// lex creates a new scanner for the input string.
func lex(input string) *lexer {
	l := &lexer{
		input: input,
		items: make(chan item),
		runes: make([]rune, 0, 32),
	}
	go l.run()
	return l
}

// run runs the state machine for the lexer.
func (l *lexer) run() {
	for l.state = lexBeforeKey(l); l.state != nil; {
		l.state = l.state(l)
	}
}

// state functions

// lexBeforeKey scans until a key begins.
func lexBeforeKey(l *lexer) stateFn {
	switch r := l.next(); {
	case isEOF(r):
		l.emit(itemEOF)
		return nil

	case isEOL(r):
		l.ignore()
		return lexBeforeKey

	case isComment(r):
		return lexComment

	case isWhitespace(r):
		l.ignore()
		return lexBeforeKey

	default:
		l.backup()
		return lexKey
	}
}

// lexComment scans a comment line. The comment character has already been scanned.
func lexComment(l *lexer) stateFn {
	l.acceptRun(whitespace)
	l.ignore()
	for {
		switch r := l.next(); {
		case isEOF(r):
			l.ignore()
			l.emit(itemEOF)
			return nil
		case isEOL(r):
			l.emit(itemComment)
			return lexBeforeKey
		default:
			l.appendRune(r)
		}
	}
}

// lexKey scans the key up to a delimiter
func lexKey(l *lexer) stateFn {
	var r rune

Loop:
	for {
		switch r = l.next(); {

		case isEscape(r):
			err := l.scanEscapeSequence()
			if err != nil {
				return l.errorf(err.Error())
			}

		case isEndOfKey(r):
			l.backup()
			break Loop

		case isEOF(r):
			break Loop

		default:
			l.appendRune(r)
		}
	}

	if len(l.runes) > 0 {
		l.emit(itemKey)
	}

	if isEOF(r) {
		l.emit(itemEOF)
		return nil
	}

	return lexBeforeValue
}

// lexBeforeValue scans the delimiter between key and value.
// Leading and trailing whitespace is ignored.
// We expect to be just after the key.
func lexBeforeValue(l *lexer) stateFn {
	l.acceptRun(whitespace)
	l.accept(":=")
	l.acceptRun(whitespace)
	l.ignore()
	return lexValue
}

// lexValue scans text until the end of the line. We expect to be just after the delimiter.
func lexValue(l *lexer) stateFn {
	for {
		switch r := l.next(); {
		case isEscape(r):
			if isEOL(l.peek()) {
				l.next()
				l.acceptRun(whitespace)
			} else {
				err := l.scanEscapeSequence()
				if err != nil {
					return l.errorf(err.Error())
				}
			}

		case isEOL(r):
			l.emit(itemValue)
			l.ignore()
			return lexBeforeKey

		case isEOF(r):
			l.emit(itemValue)
			l.emit(itemEOF)
			return nil

		default:
			l.appendRune(r)
		}
	}
}

// scanEscapeSequence scans either one of the escaped characters
// or a unicode literal. We expect to be after the escape character.
func (l *lexer) scanEscapeSequence() error {
	switch r := l.next(); {

	case isEscapedCharacter(r):
		l.appendRune(decodeEscapedCharacter(r))
		return nil

	case atUnicodeLiteral(r):
		return l.scanUnicodeLiteral()

	case isEOF(r):
		return fmt.Errorf("premature EOF")

	// silently drop the escape character and append the rune as is
	default:
		l.appendRune(r)
		return nil
	}
}

// scans a unicode literal in the form \uXXXX. We expect to be after the \u.
func (l *lexer) scanUnicodeLiteral() error {
	// scan the digits
	d := make([]rune, 4)
	for i := 0; i < 4; i++ {
		d[i] = l.next()
		if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
			return fmt.Errorf("invalid unicode literal")
		}
	}

	// decode the digits into a rune
	r, err := strconv.ParseInt(string(d), 16, 0)
	if err != nil {
		return err
	}

	l.appendRune(rune(r))
	return nil
}

// decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
func decodeEscapedCharacter(r rune) rune {
	switch r {
	case 'f':
		return '\f'
	case 'n':
		return '\n'
	case 'r':
		return '\r'
	case 't':
		return '\t'
	default:
		return r
	}
}

// atUnicodeLiteral reports whether we are at a unicode literal.
// The escape character has already been consumed.
func atUnicodeLiteral(r rune) bool {
	return r == 'u'
}

// isComment reports whether we are at the start of a comment.
func isComment(r rune) bool {
	return r == '#' || r == '!'
}

// isEndOfKey reports whether the rune terminates the current key.
func isEndOfKey(r rune) bool {
	return strings.ContainsRune(" \f\t\r\n:=", r)
}

// isEOF reports whether we are at EOF.
func isEOF(r rune) bool {
	return r == eof
}

// isEOL reports whether we are at a new line character.
func isEOL(r rune) bool {
	return r == '\n' || r == '\r'
}

// isEscape reports whether the rune is the escape character which
// prefixes unicode literals and other escaped characters.
func isEscape(r rune) bool {
	return r == '\\'
}

// isEscapedCharacter reports whether we are at one of the characters that need escaping.
// The escape character has already been consumed.
func isEscapedCharacter(r rune) bool {
	return strings.ContainsRune(" :=fnrt", r)
}

// isWhitespace reports whether the rune is a whitespace character.
func isWhitespace(r rune) bool {
	return strings.ContainsRune(whitespace, r)
}