golang/mikael/madonctl: comparison vendor/golang.org/x/text/unicode/norm/normalize.go

equal deleted inserted replaced

-:e77dad242f4c
+:2a9ec03fe5a1
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Note: the file data_test.go that is generated should not be checked in.
+//go:generate go run maketables.go triegen.go
+//go:generate go test -tags test
+// Package norm contains types and functions for normalizing Unicode strings.
+package norm // import "golang.org/x/text/unicode/norm"
+import (
+	"unicode/utf8"
+	"golang.org/x/text/transform"
+)
+// A Form denotes a canonical representation of Unicode code points.
+// The Unicode-defined normalization and equivalence forms are:
+//
+//   NFC   Unicode Normalization Form C
+//   NFD   Unicode Normalization Form D
+//   NFKC  Unicode Normalization Form KC
+//   NFKD  Unicode Normalization Form KD
+//
+// For a Form f, this documentation uses the notation f(x) to mean
+// the bytes or string x converted to the given form.
+// A position n in x is called a boundary if conversion to the form can
+// proceed independently on both sides:
+//   f(x) == append(f(x[0:n]), f(x[n:])...)
+//
+// References: http://unicode.org/reports/tr15/ and
+// http://unicode.org/notes/tn5/.
+type Form int
+const (
+	NFC Form = iota
+	NFD
+	NFKC
+	NFKD
+)
+// Bytes returns f(b). May return b if f(b) = b.
+func (f Form) Bytes(b []byte) []byte {
+	src := inputBytes(b)
+	ft := formTable[f]
+	n, ok := ft.quickSpan(src, 0, len(b), true)
+	if ok {
+		return b
+	}
+	out := make([]byte, n, len(b))
+	copy(out, b[0:n])
+	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
+	return doAppendInner(&rb, n)
+}
+// String returns f(s).
+func (f Form) String(s string) string {
+	src := inputString(s)
+	ft := formTable[f]
+	n, ok := ft.quickSpan(src, 0, len(s), true)
+	if ok {
+		return s
+	}
+	out := make([]byte, n, len(s))
+	copy(out, s[0:n])
+	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
+	return string(doAppendInner(&rb, n))
+}
+// IsNormal returns true if b == f(b).
+func (f Form) IsNormal(b []byte) bool {
+	src := inputBytes(b)
+	ft := formTable[f]
+	bp, ok := ft.quickSpan(src, 0, len(b), true)
+	if ok {
+		return true
+	}
+	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
+	rb.setFlusher(nil, cmpNormalBytes)
+	for bp < len(b) {
+		rb.out = b[bp:]
+		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
+			return false
+		}
+		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
+	}
+	return true
+}
+func cmpNormalBytes(rb *reorderBuffer) bool {
+	b := rb.out
+	for i := 0; i < rb.nrune; i++ {
+		info := rb.rune[i]
+		if int(info.size) > len(b) {
+			return false
+		}
+		p := info.pos
+		pe := p + info.size
+		for ; p < pe; p++ {
+			if b[0] != rb.byte[p] {
+				return false
+			}
+			b = b[1:]
+		}
+	}
+	return true
+}
+// IsNormalString returns true if s == f(s).
+func (f Form) IsNormalString(s string) bool {
+	src := inputString(s)
+	ft := formTable[f]
+	bp, ok := ft.quickSpan(src, 0, len(s), true)
+	if ok {
+		return true
+	}
+	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
+	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
+		for i := 0; i < rb.nrune; i++ {
+			info := rb.rune[i]
+			if bp+int(info.size) > len(s) {
+				return false
+			}
+			p := info.pos
+			pe := p + info.size
+			for ; p < pe; p++ {
+				if s[bp] != rb.byte[p] {
+					return false
+				}
+				bp++
+			}
+		}
+		return true
+	})
+	for bp < len(s) {
+		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
+			return false
+		}
+		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
+	}
+	return true
+}
+// patchTail fixes a case where a rune may be incorrectly normalized
+// if it is followed by illegal continuation bytes. It returns the
+// patched buffer and whether the decomposition is still in progress.
+func patchTail(rb *reorderBuffer) bool {
+	info, p := lastRuneStart(&rb.f, rb.out)
+	if p == -1 || info.size == 0 {
+		return true
+	}
+	end := p + int(info.size)
+	extra := len(rb.out) - end
+	if extra > 0 {
+		// Potentially allocating memory. However, this only
+		// happens with ill-formed UTF-8.
+		x := make([]byte, 0)
+		x = append(x, rb.out[len(rb.out)-extra:]...)
+		rb.out = rb.out[:end]
+		decomposeToLastBoundary(rb)
+		rb.doFlush()
+		rb.out = append(rb.out, x...)
+		return false
+	}
+	buf := rb.out[p:]
+	rb.out = rb.out[:p]
+	decomposeToLastBoundary(rb)
+	if s := rb.ss.next(info); s == ssStarter {
+		rb.doFlush()
+		rb.ss.first(info)
+	} else if s == ssOverflow {
+		rb.doFlush()
+		rb.insertCGJ()
+		rb.ss = 0
+	}
+	rb.insertUnsafe(inputBytes(buf), 0, info)
+	return true
+}
+func appendQuick(rb *reorderBuffer, i int) int {
+	if rb.nsrc == i {
+		return i
+	}
+	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
+	rb.out = rb.src.appendSlice(rb.out, i, end)
+	return end
+}
+// Append returns f(append(out, b...)).
+// The buffer out must be nil, empty, or equal to f(out).
+func (f Form) Append(out []byte, src ...byte) []byte {
+	return f.doAppend(out, inputBytes(src), len(src))
+}
+func (f Form) doAppend(out []byte, src input, n int) []byte {
+	if n == 0 {
+		return out
+	}
+	ft := formTable[f]
+	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
+	if len(out) == 0 {
+		p, _ := ft.quickSpan(src, 0, n, true)
+		out = src.appendSlice(out, 0, p)
+		if p == n {
+			return out
+		}
+		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
+		return doAppendInner(&rb, p)
+	}
+	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
+	return doAppend(&rb, out, 0)
+}
+func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
+	rb.setFlusher(out, appendFlush)
+	src, n := rb.src, rb.nsrc
+	doMerge := len(out) > 0
+	if q := src.skipContinuationBytes(p); q > p {
+		// Move leading non-starters to destination.
+		rb.out = src.appendSlice(rb.out, p, q)
+		p = q
+		doMerge = patchTail(rb)
+	}
+	fd := &rb.f
+	if doMerge {
+		var info Properties
+		if p < n {
+			info = fd.info(src, p)
+			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
+				if p == 0 {
+					decomposeToLastBoundary(rb)
+				}
+				p = decomposeSegment(rb, p, true)
+			}
+		}
+		if info.size == 0 {
+			rb.doFlush()
+			// Append incomplete UTF-8 encoding.
+			return src.appendSlice(rb.out, p, n)
+		}
+		if rb.nrune > 0 {
+			return doAppendInner(rb, p)
+		}
+	}
+	p = appendQuick(rb, p)
+	return doAppendInner(rb, p)
+}
+func doAppendInner(rb *reorderBuffer, p int) []byte {
+	for n := rb.nsrc; p < n; {
+		p = decomposeSegment(rb, p, true)
+		p = appendQuick(rb, p)
+	}
+	return rb.out
+}
+// AppendString returns f(append(out, []byte(s))).
+// The buffer out must be nil, empty, or equal to f(out).
+func (f Form) AppendString(out []byte, src string) []byte {
+	return f.doAppend(out, inputString(src), len(src))
+}
+// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
+// It is not guaranteed to return the largest such n.
+func (f Form) QuickSpan(b []byte) int {
+	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
+	return n
+}
+// Span implements transform.SpanningTransformer. It returns a boundary n such
+// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
+func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
+	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
+	if n < len(b) {
+		if !ok {
+			err = transform.ErrEndOfSpan
+		} else {
+			err = transform.ErrShortSrc
+		}
+	}
+	return n, err
+}
+// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
+// It is not guaranteed to return the largest such n.
+func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
+	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
+	if n < len(s) {
+		if !ok {
+			err = transform.ErrEndOfSpan
+		} else {
+			err = transform.ErrShortSrc
+		}
+	}
+	return n, err
+}
+// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
+// whether any non-normalized parts were found. If atEOF is false, n will
+// not point past the last segment if this segment might be become
+// non-normalized by appending other runes.
+func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
+	var lastCC uint8
+	ss := streamSafe(0)
+	lastSegStart := i
+	for n = end; i < n; {
+		if j := src.skipASCII(i, n); i != j {
+			i = j
+			lastSegStart = i - 1
+			lastCC = 0
+			ss = 0
+			continue
+		}
+		info := f.info(src, i)
+		if info.size == 0 {
+			if atEOF {
+				// include incomplete runes
+				return n, true
+			}
+			return lastSegStart, true
+		}
+		// This block needs to be before the next, because it is possible to
+		// have an overflow for runes that are starters (e.g. with U+FF9E).
+		switch ss.next(info) {
+		case ssStarter:
+			lastSegStart = i
+		case ssOverflow:
+			return lastSegStart, false
+		case ssSuccess:
+			if lastCC > info.ccc {
+				return lastSegStart, false
+			}
+		}
+		if f.composing {
+			if !info.isYesC() {
+				break
+			}
+		} else {
+			if !info.isYesD() {
+				break
+			}
+		}
+		lastCC = info.ccc
+		i += int(info.size)
+	}
+	if i == n {
+		if !atEOF {
+			n = lastSegStart
+		}
+		return n, true
+	}
+	return lastSegStart, false
+}
+// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
+// It is not guaranteed to return the largest such n.
+func (f Form) QuickSpanString(s string) int {
+	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
+	return n
+}
+// FirstBoundary returns the position i of the first boundary in b
+// or -1 if b contains no boundary.
+func (f Form) FirstBoundary(b []byte) int {
+	return f.firstBoundary(inputBytes(b), len(b))
+}
+func (f Form) firstBoundary(src input, nsrc int) int {
+	i := src.skipContinuationBytes(0)
+	if i >= nsrc {
+		return -1
+	}
+	fd := formTable[f]
+	ss := streamSafe(0)
+	// We should call ss.first here, but we can't as the first rune is
+	// skipped already. This means FirstBoundary can't really determine
+	// CGJ insertion points correctly. Luckily it doesn't have to.
+	for {
+		info := fd.info(src, i)
+		if info.size == 0 {
+			return -1
+		}
+		if s := ss.next(info); s != ssSuccess {
+			return i
+		}
+		i += int(info.size)
+		if i >= nsrc {
+			if !info.BoundaryAfter() && !ss.isMax() {
+				return -1
+			}
+			return nsrc
+		}
+	}
+}
+// FirstBoundaryInString returns the position i of the first boundary in s
+// or -1 if s contains no boundary.
+func (f Form) FirstBoundaryInString(s string) int {
+	return f.firstBoundary(inputString(s), len(s))
+}
+// NextBoundary reports the index of the boundary between the first and next
+// segment in b or -1 if atEOF is false and there are not enough bytes to
+// determine this boundary.
+func (f Form) NextBoundary(b []byte, atEOF bool) int {
+	return f.nextBoundary(inputBytes(b), len(b), atEOF)
+}
+// NextBoundaryInString reports the index of the boundary between the first and
+// next segment in b or -1 if atEOF is false and there are not enough bytes to
+// determine this boundary.
+func (f Form) NextBoundaryInString(s string, atEOF bool) int {
+	return f.nextBoundary(inputString(s), len(s), atEOF)
+}
+func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
+	if nsrc == 0 {
+		if atEOF {
+			return 0
+		}
+		return -1
+	}
+	fd := formTable[f]
+	info := fd.info(src, 0)
+	if info.size == 0 {
+		if atEOF {
+			return 1
+		}
+		return -1
+	}
+	ss := streamSafe(0)
+	ss.first(info)
+	for i := int(info.size); i < nsrc; i += int(info.size) {
+		info = fd.info(src, i)
+		if info.size == 0 {
+			if atEOF {
+				return i
+			}
+			return -1
+		}
+		// TODO: Using streamSafe to determine the boundary isn't the same as
+		// using BoundaryBefore. Determine which should be used.
+		if s := ss.next(info); s != ssSuccess {
+			return i
+		}
+	}
+	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
+		return -1
+	}
+	return nsrc
+}
+// LastBoundary returns the position i of the last boundary in b
+// or -1 if b contains no boundary.
+func (f Form) LastBoundary(b []byte) int {
+	return lastBoundary(formTable[f], b)
+}
+func lastBoundary(fd *formInfo, b []byte) int {
+	i := len(b)
+	info, p := lastRuneStart(fd, b)
+	if p == -1 {
+		return -1
+	}
+	if info.size == 0 { // ends with incomplete rune
+		if p == 0 { // starts with incomplete rune
+			return -1
+		}
+		i = p
+		info, p = lastRuneStart(fd, b[:i])
+		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
+			return i
+		}
+	}
+	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
+		return i
+	}
+	if info.BoundaryAfter() {
+		return i
+	}
+	ss := streamSafe(0)
+	v := ss.backwards(info)
+	for i = p; i >= 0 && v != ssStarter; i = p {
+		info, p = lastRuneStart(fd, b[:i])
+		if v = ss.backwards(info); v == ssOverflow {
+			break
+		}
+		if p+int(info.size) != i {
+			if p == -1 { // no boundary found
+				return -1
+			}
+			return i // boundary after an illegal UTF-8 encoding
+		}
+	}
+	return i
+}
+// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
+// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
+// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
+func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
+	// Force one character to be consumed.
+	info := rb.f.info(rb.src, sp)
+	if info.size == 0 {
+		return 0
+	}
+	if s := rb.ss.next(info); s == ssStarter {
+		// TODO: this could be removed if we don't support merging.
+		if rb.nrune > 0 {
+			goto end
+		}
+	} else if s == ssOverflow {
+		rb.insertCGJ()
+		goto end
+	}
+	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
+		return int(err)
+	}
+	for {
+		sp += int(info.size)
+		if sp >= rb.nsrc {
+			if !atEOF && !info.BoundaryAfter() {
+				return int(iShortSrc)
+			}
+			break
+		}
+		info = rb.f.info(rb.src, sp)
+		if info.size == 0 {
+			if !atEOF {
+				return int(iShortSrc)
+			}
+			break
+		}
+		if s := rb.ss.next(info); s == ssStarter {
+			break
+		} else if s == ssOverflow {
+			rb.insertCGJ()
+			break
+		}
+		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
+			return int(err)
+		}
+	}
+end:
+	if !rb.doFlush() {
+		return int(iShortDst)
+	}
+	return sp
+}
+// lastRuneStart returns the runeInfo and position of the last
+// rune in buf or the zero runeInfo and -1 if no rune was found.
+func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
+	p := len(buf) - 1
+	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
+	}
+	if p < 0 {
+		return Properties{}, -1
+	}
+	return fd.info(inputBytes(buf), p), p
+}
+// decomposeToLastBoundary finds an open segment at the end of the buffer
+// and scans it into rb. Returns the buffer minus the last segment.
+func decomposeToLastBoundary(rb *reorderBuffer) {
+	fd := &rb.f
+	info, i := lastRuneStart(fd, rb.out)
+	if int(info.size) != len(rb.out)-i {
+		// illegal trailing continuation bytes
+		return
+	}
+	if info.BoundaryAfter() {
+		return
+	}
+	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
+	padd := 0
+	ss := streamSafe(0)
+	p := len(rb.out)
+	for {
+		add[padd] = info
+		v := ss.backwards(info)
+		if v == ssOverflow {
+			// Note that if we have an overflow, it the string we are appending to
+			// is not correctly normalized. In this case the behavior is undefined.
+			break
+		}
+		padd++
+		p -= int(info.size)
+		if v == ssStarter || p < 0 {
+			break
+		}
+		info, i = lastRuneStart(fd, rb.out[:p])
+		if int(info.size) != p-i {
+			break
+		}
+	}
+	rb.ss = ss
+	// Copy bytes for insertion as we may need to overwrite rb.out.
+	var buf [maxBufferSize * utf8.UTFMax]byte
+	cp := buf[:copy(buf[:], rb.out[p:])]
+	rb.out = rb.out[:p]
+	for padd--; padd >= 0; padd-- {
+		info = add[padd]
+		rb.insertUnsafe(inputBytes(cp), 0, info)
+		cp = cp[info.size:]
+	}
+}

changeset 242	2a9ec03fe5a1
child 251	1c52a0eeb952