rclone/vendor/golang.org/x/text/internal/colltab/contract.go

// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package colltab

import "unicode/utf8"

// For a description of ContractTrieSet, see text/collate/build/contract.go.

type ContractTrieSet []struct{ L, H, N, I uint8 }

// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
	states ContractTrieSet
	s      []byte
	n      int
	index  int
	pindex int
	done   bool
}

type ctScannerString struct {
	states ContractTrieSet
	s      string
	n      int
	index  int
	pindex int
	done   bool
}

func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
	return ctScanner{s: b, states: t[index:], n: n}
}

func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
	return ctScannerString{s: str, states: t[index:], n: n}
}

// result returns the offset i and bytes consumed p so far.  If no suffix
// matched, i and p will be 0.
func (s *ctScanner) result() (i, p int) {
	return s.index, s.pindex
}

func (s *ctScannerString) result() (i, p int) {
	return s.index, s.pindex
}

const (
	final   = 0
	noIndex = 0xFF
)

// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func (s *ctScanner) scan(p int) int {
	pr := p // the p at the rune start
	str := s.s
	states, n := s.states, s.n
	for i := 0; i < n && p < len(str); {
		e := states[i]
		c := str[p]
		// TODO: a significant number of contractions are of a form that
		// cannot match discontiguous UTF-8 in a normalized string. We could let
		// a negative value of e.n mean that we can set s.done = true and avoid
		// the need for additional matches.
		if c >= e.L {
			if e.L == c {
				p++
				if e.I != noIndex {
					s.index = int(e.I)
					s.pindex = p
				}
				if e.N != final {
					i, states, n = 0, states[int(e.H)+n:], int(e.N)
					if p >= len(str) || utf8.RuneStart(str[p]) {
						s.states, s.n, pr = states, n, p
					}
				} else {
					s.done = true
					return p
				}
				continue
			} else if e.N == final && c <= e.H {
				p++
				s.done = true
				s.index = int(c-e.L) + int(e.I)
				s.pindex = p
				return p
			}
		}
		i++
	}
	return pr
}

// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
	pr := p // the p at the rune start
	str := s.s
	states, n := s.states, s.n
	for i := 0; i < n && p < len(str); {
		e := states[i]
		c := str[p]
		// TODO: a significant number of contractions are of a form that
		// cannot match discontiguous UTF-8 in a normalized string. We could let
		// a negative value of e.n mean that we can set s.done = true and avoid
		// the need for additional matches.
		if c >= e.L {
			if e.L == c {
				p++
				if e.I != noIndex {
					s.index = int(e.I)
					s.pindex = p
				}
				if e.N != final {
					i, states, n = 0, states[int(e.H)+n:], int(e.N)
					if p >= len(str) || utf8.RuneStart(str[p]) {
						s.states, s.n, pr = states, n, p
					}
				} else {
					s.done = true
					return p
				}
				continue
			} else if e.N == final && c <= e.H {
				p++
				s.done = true
				s.index = int(c-e.L) + int(e.I)
				s.pindex = p
				return p
			}
		}
		i++
	}
	return pr
}
Switch to using the dep tool and update all the dependencies 2017-05-11 16:39:54 +02:00			`// Copyright 2012 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`package colltab`

			`import "unicode/utf8"`

			`// For a description of ContractTrieSet, see text/collate/build/contract.go.`

			`type ContractTrieSet []struct{ L, H, N, I uint8 }`

			`// ctScanner is used to match a trie to an input sequence.`
			`// A contraction may match a non-contiguous sequence of bytes in an input string.`
			`// For example, if there is a contraction for <a, combining_ring>, it should match`
			`// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does`
			`// not block combining_ring.`
			`// ctScanner does not automatically skip over non-blocking non-starters, but rather`
			`// retains the state of the last match and leaves it up to the user to continue`
			`// the match at the appropriate points.`
			`type ctScanner struct {`
			`states ContractTrieSet`
			`s []byte`
			`n int`
			`index int`
			`pindex int`
			`done bool`
			`}`

			`type ctScannerString struct {`
			`states ContractTrieSet`
			`s string`
			`n int`
			`index int`
			`pindex int`
			`done bool`
			`}`

			`func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {`
			`return ctScanner{s: b, states: t[index:], n: n}`
			`}`

			`func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {`
			`return ctScannerString{s: str, states: t[index:], n: n}`
			`}`

			`// result returns the offset i and bytes consumed p so far. If no suffix`
			`// matched, i and p will be 0.`
			`func (s *ctScanner) result() (i, p int) {`
			`return s.index, s.pindex`
			`}`

			`func (s *ctScannerString) result() (i, p int) {`
			`return s.index, s.pindex`
			`}`

			`const (`
			`final = 0`
			`noIndex = 0xFF`
			`)`

			`// scan matches the longest suffix at the current location in the input`
			`// and returns the number of bytes consumed.`
			`func (s *ctScanner) scan(p int) int {`
			`pr := p // the p at the rune start`
			`str := s.s`
			`states, n := s.states, s.n`
			`for i := 0; i < n && p < len(str); {`
			`e := states[i]`
			`c := str[p]`
			`// TODO: a significant number of contractions are of a form that`
			`// cannot match discontiguous UTF-8 in a normalized string. We could let`
			`// a negative value of e.n mean that we can set s.done = true and avoid`
			`// the need for additional matches.`
			`if c >= e.L {`
			`if e.L == c {`
			`p++`
			`if e.I != noIndex {`
			`s.index = int(e.I)`
			`s.pindex = p`
			`}`
			`if e.N != final {`
			`i, states, n = 0, states[int(e.H)+n:], int(e.N)`
			`if p >= len(str) \|\| utf8.RuneStart(str[p]) {`
			`s.states, s.n, pr = states, n, p`
			`}`
			`} else {`
			`s.done = true`
			`return p`
			`}`
			`continue`
			`} else if e.N == final && c <= e.H {`
			`p++`
			`s.done = true`
			`s.index = int(c-e.L) + int(e.I)`
			`s.pindex = p`
			`return p`
			`}`
			`}`
			`i++`
			`}`
			`return pr`
			`}`

			`// scan is a verbatim copy of ctScanner.scan.`
			`func (s *ctScannerString) scan(p int) int {`
			`pr := p // the p at the rune start`
			`str := s.s`
			`states, n := s.states, s.n`
			`for i := 0; i < n && p < len(str); {`
			`e := states[i]`
			`c := str[p]`
			`// TODO: a significant number of contractions are of a form that`
			`// cannot match discontiguous UTF-8 in a normalized string. We could let`
			`// a negative value of e.n mean that we can set s.done = true and avoid`
			`// the need for additional matches.`
			`if c >= e.L {`
			`if e.L == c {`
			`p++`
			`if e.I != noIndex {`
			`s.index = int(e.I)`
			`s.pindex = p`
			`}`
			`if e.N != final {`
			`i, states, n = 0, states[int(e.H)+n:], int(e.N)`
			`if p >= len(str) \|\| utf8.RuneStart(str[p]) {`
			`s.states, s.n, pr = states, n, p`
			`}`
			`} else {`
			`s.done = true`
			`return p`
			`}`
			`continue`
			`} else if e.N == final && c <= e.H {`
			`p++`
			`s.done = true`
			`s.index = int(c-e.L) + int(e.I)`
			`s.pindex = p`
			`return p`
			`}`
			`}`
			`i++`
			`}`
			`return pr`
			`}`