2018-11-02 13:12:31 +01:00
|
|
|
|
/*
|
|
|
|
|
Translate file names for usage on restrictive storage systems
|
|
|
|
|
|
|
|
|
|
The restricted set of characters are mapped to a unicode equivalent version
|
|
|
|
|
(most to their FULLWIDTH variant) to increase compatability with other
|
|
|
|
|
storage systems.
|
|
|
|
|
See: http://unicode-search.net/unicode-namesearch.pl?term=FULLWIDTH
|
|
|
|
|
|
|
|
|
|
Encoders will also quote reserved characters to differentiate between
|
|
|
|
|
the raw and encoded forms.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package encoder
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
"strconv"
|
|
|
|
|
"strings"
|
|
|
|
|
"unicode/utf8"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// adding this to any printable ASCII character turns it into the
|
|
|
|
|
// FULLWIDTH variant
|
|
|
|
|
fullOffset = 0xFEE0
|
|
|
|
|
// the first rune of the SYMBOL FOR block for control characters
|
|
|
|
|
symbolOffset = '␀' // SYMBOL FOR NULL
|
|
|
|
|
// QuoteRune is the rune used for quoting reserved characters
|
|
|
|
|
QuoteRune = '‛' // SINGLE HIGH-REVERSED-9 QUOTATION MARK
|
|
|
|
|
)
|
|
|
|
|
|
2019-07-25 16:16:39 +02:00
|
|
|
|
// NB keep the tests in fstests/fstests/fstests.go FsEncoding up to date with this
|
|
|
|
|
|
2018-11-02 13:12:31 +01:00
|
|
|
|
// Possible flags for the MultiEncoder
|
|
|
|
|
const (
|
2019-04-29 12:23:57 +02:00
|
|
|
|
EncodeZero uint = 0 // NUL(0x00)
|
|
|
|
|
EncodeSlash uint = 1 << iota // /
|
|
|
|
|
EncodeWin // :?"*<>|
|
|
|
|
|
EncodeBackSlash // \
|
2019-09-21 00:04:31 +02:00
|
|
|
|
EncodeCrLf // CR(0x0D), LF(0x0A)
|
2019-04-29 12:23:57 +02:00
|
|
|
|
EncodeHashPercent // #%
|
|
|
|
|
EncodeDel // DEL(0x7F)
|
|
|
|
|
EncodeCtl // CTRL(0x01-0x1F)
|
|
|
|
|
EncodeLeftSpace // Leading SPACE
|
|
|
|
|
EncodeLeftTilde // Leading ~
|
|
|
|
|
EncodeLeftCrLfHtVt // Leading CR LF HT VT
|
|
|
|
|
EncodeRightSpace // Trailing SPACE
|
|
|
|
|
EncodeRightPeriod // Trailing .
|
|
|
|
|
EncodeRightCrLfHtVt // Trailing CR LF HT VT
|
|
|
|
|
EncodeInvalidUtf8 // Invalid UTF-8 bytes
|
|
|
|
|
EncodeDot // . and .. names
|
2018-11-02 13:12:31 +01:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Encoder can transform names to and from the original and translated version.
|
|
|
|
|
type Encoder interface {
|
|
|
|
|
// Encode takes a raw name and substitutes any reserved characters and
|
|
|
|
|
// patterns in it
|
|
|
|
|
Encode(string) string
|
|
|
|
|
// Decode takes a name and undoes any substitutions made by Encode
|
|
|
|
|
Decode(string) string
|
|
|
|
|
|
|
|
|
|
// FromStandardPath takes a / separated path in Standard encoding
|
|
|
|
|
// and converts it to a / separated path in this encoding.
|
|
|
|
|
FromStandardPath(string) string
|
|
|
|
|
// FromStandardName takes name in Standard encoding and converts
|
|
|
|
|
// it in this encoding.
|
|
|
|
|
FromStandardName(string) string
|
|
|
|
|
// ToStandardPath takes a / separated path in this encoding
|
|
|
|
|
// and converts it to a / separated path in Standard encoding.
|
|
|
|
|
ToStandardPath(string) string
|
|
|
|
|
// ToStandardName takes name in this encoding and converts
|
|
|
|
|
// it in Standard encoding.
|
|
|
|
|
ToStandardName(string) string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// MultiEncoder is a configurable Encoder. The Encode* constants in this
|
|
|
|
|
// package can be combined using bitwise or (|) to enable handling of multiple
|
|
|
|
|
// character classes
|
|
|
|
|
type MultiEncoder uint
|
|
|
|
|
|
|
|
|
|
// Encode takes a raw name and substitutes any reserved characters and
|
|
|
|
|
// patterns in it
|
|
|
|
|
func (mask MultiEncoder) Encode(in string) string {
|
|
|
|
|
var (
|
|
|
|
|
encodeWin = uint(mask)&EncodeWin != 0
|
|
|
|
|
encodeSlash = uint(mask)&EncodeSlash != 0
|
|
|
|
|
encodeBackSlash = uint(mask)&EncodeBackSlash != 0
|
2019-09-21 00:04:31 +02:00
|
|
|
|
encodeCrLf = uint(mask)&EncodeCrLf != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeHashPercent = uint(mask)&EncodeHashPercent != 0
|
|
|
|
|
encodeDel = uint(mask)&EncodeDel != 0
|
|
|
|
|
encodeCtl = uint(mask)&EncodeCtl != 0
|
|
|
|
|
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
|
|
|
|
|
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
|
2019-05-17 15:41:38 +02:00
|
|
|
|
encodeLeftCrLfHtVt = uint(mask)&EncodeLeftCrLfHtVt != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
|
|
|
|
|
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
|
2019-05-17 15:41:38 +02:00
|
|
|
|
encodeRightCrLfHtVt = uint(mask)&EncodeRightCrLfHtVt != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
|
2019-04-30 11:07:02 +02:00
|
|
|
|
encodeDot = uint(mask)&EncodeDot != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
)
|
|
|
|
|
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if in == "" {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-30 11:07:02 +02:00
|
|
|
|
if encodeDot {
|
|
|
|
|
switch in {
|
|
|
|
|
case ".":
|
|
|
|
|
return "."
|
|
|
|
|
case "..":
|
|
|
|
|
return ".."
|
|
|
|
|
case ".":
|
|
|
|
|
return string(QuoteRune) + "."
|
|
|
|
|
case "..":
|
|
|
|
|
return string(QuoteRune) + "." + string(QuoteRune) + "."
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-02 13:12:31 +01:00
|
|
|
|
// handle prefix only replacements
|
|
|
|
|
prefix := ""
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if encodeLeftSpace { // Leading SPACE
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if in[0] == ' ' {
|
|
|
|
|
prefix, in = "␠", in[1:] // SYMBOL FOR SPACE
|
|
|
|
|
} else if r, l := utf8.DecodeRuneInString(in); r == '␠' { // SYMBOL FOR SPACE
|
|
|
|
|
prefix, in = string(QuoteRune)+"␠", in[l:] // SYMBOL FOR SPACE
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if encodeLeftTilde && prefix == "" { // Leading ~
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if in[0] == '~' {
|
|
|
|
|
prefix, in = string('~'+fullOffset), in[1:] // FULLWIDTH TILDE
|
|
|
|
|
} else if r, l := utf8.DecodeRuneInString(in); r == '~'+fullOffset {
|
|
|
|
|
prefix, in = string(QuoteRune)+string('~'+fullOffset), in[l:] // FULLWIDTH TILDE
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if encodeLeftCrLfHtVt && prefix == "" { // Leading CR LF HT VT
|
|
|
|
|
switch c := in[0]; c {
|
|
|
|
|
case '\t', '\n', '\v', '\r':
|
|
|
|
|
prefix, in = string('␀'+rune(c)), in[1:] // SYMBOL FOR NULL
|
|
|
|
|
default:
|
|
|
|
|
switch r, l := utf8.DecodeRuneInString(in); r {
|
|
|
|
|
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
|
|
|
|
|
prefix, in = string(QuoteRune)+string(r), in[l:]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
// handle suffix only replacements
|
|
|
|
|
suffix := ""
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if in != "" {
|
|
|
|
|
if encodeRightSpace { // Trailing SPACE
|
|
|
|
|
if in[len(in)-1] == ' ' {
|
|
|
|
|
suffix, in = "␠", in[:len(in)-1] // SYMBOL FOR SPACE
|
|
|
|
|
} else if r, l := utf8.DecodeLastRuneInString(in); r == '␠' {
|
|
|
|
|
suffix, in = string(QuoteRune)+"␠", in[:len(in)-l] // SYMBOL FOR SPACE
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
}
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if encodeRightPeriod && suffix == "" { // Trailing .
|
|
|
|
|
if in[len(in)-1] == '.' {
|
|
|
|
|
suffix, in = ".", in[:len(in)-1] // FULLWIDTH FULL STOP
|
|
|
|
|
} else if r, l := utf8.DecodeLastRuneInString(in); r == '.' {
|
|
|
|
|
suffix, in = string(QuoteRune)+".", in[:len(in)-l] // FULLWIDTH FULL STOP
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeRightCrLfHtVt && suffix == "" { // Trailing .
|
|
|
|
|
switch c := in[len(in)-1]; c {
|
|
|
|
|
case '\t', '\n', '\v', '\r':
|
|
|
|
|
suffix, in = string('␀'+rune(c)), in[:len(in)-1] // FULLWIDTH FULL STOP
|
|
|
|
|
default:
|
|
|
|
|
switch r, l := utf8.DecodeLastRuneInString(in); r {
|
|
|
|
|
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
|
|
|
|
|
suffix, in = string(QuoteRune)+string(r), in[:len(in)-l]
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-05-17 15:41:38 +02:00
|
|
|
|
|
2018-11-02 13:12:31 +01:00
|
|
|
|
index := 0
|
|
|
|
|
if prefix == "" && suffix == "" {
|
|
|
|
|
// find the first rune which (most likely) needs to be replaced
|
|
|
|
|
index = strings.IndexFunc(in, func(r rune) bool {
|
|
|
|
|
switch r {
|
|
|
|
|
case 0, '␀', QuoteRune, utf8.RuneError:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if encodeWin { // :?"*<>|
|
|
|
|
|
switch r {
|
|
|
|
|
case '*', '<', '>', '?', ':', '|', '"',
|
|
|
|
|
'*', '<', '>', '?', ':', '|', '"':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeSlash { // /
|
|
|
|
|
switch r {
|
|
|
|
|
case '/',
|
|
|
|
|
'/':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeBackSlash { // \
|
|
|
|
|
switch r {
|
|
|
|
|
case '\\',
|
|
|
|
|
'\':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-09-21 00:04:31 +02:00
|
|
|
|
if encodeCrLf { // CR LF
|
|
|
|
|
switch r {
|
|
|
|
|
case rune(0x0D), rune(0x0A),
|
|
|
|
|
'␍', '␊':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if encodeHashPercent { // #%
|
|
|
|
|
switch r {
|
|
|
|
|
case '#', '%',
|
|
|
|
|
'#', '%':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeDel { // DEL(0x7F)
|
|
|
|
|
switch r {
|
|
|
|
|
case rune(0x7F), '␡':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeCtl { // CTRL(0x01-0x1F)
|
|
|
|
|
if r >= 1 && r <= 0x1F {
|
|
|
|
|
return true
|
|
|
|
|
} else if r > symbolOffset && r <= symbolOffset+0x1F {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
// nothing to replace, return input
|
|
|
|
|
if index == -1 {
|
|
|
|
|
return in
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var out bytes.Buffer
|
|
|
|
|
out.Grow(len(in) + len(prefix) + len(suffix))
|
|
|
|
|
out.WriteString(prefix)
|
|
|
|
|
// copy the clean part of the input and skip it
|
|
|
|
|
out.WriteString(in[:index])
|
|
|
|
|
in = in[index:]
|
|
|
|
|
|
|
|
|
|
for i, r := range in {
|
|
|
|
|
switch r {
|
|
|
|
|
case 0:
|
|
|
|
|
out.WriteRune(symbolOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '␀', QuoteRune:
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
case utf8.RuneError:
|
|
|
|
|
if encodeInvalidUnicode {
|
|
|
|
|
// only encode invalid sequences and not utf8.RuneError
|
|
|
|
|
if i+3 > len(in) || in[i:i+3] != string(utf8.RuneError) {
|
|
|
|
|
_, l := utf8.DecodeRuneInString(in[i:])
|
|
|
|
|
appendQuotedBytes(&out, in[i:i+l])
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// append the real bytes instead of utf8.RuneError
|
|
|
|
|
_, l := utf8.DecodeRuneInString(in[i:])
|
|
|
|
|
out.WriteString(in[i : i+l])
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeWin { // :?"*<>|
|
|
|
|
|
switch r {
|
|
|
|
|
case '*', '<', '>', '?', ':', '|', '"':
|
|
|
|
|
out.WriteRune(r + fullOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '*', '<', '>', '?', ':', '|', '"':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeSlash { // /
|
|
|
|
|
switch r {
|
|
|
|
|
case '/':
|
|
|
|
|
out.WriteRune(r + fullOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '/':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeBackSlash { // \
|
|
|
|
|
switch r {
|
|
|
|
|
case '\\':
|
|
|
|
|
out.WriteRune(r + fullOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '\':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-09-21 00:04:31 +02:00
|
|
|
|
if encodeCrLf { // CR LF
|
|
|
|
|
switch r {
|
|
|
|
|
case rune(0x0D), rune(0x0A):
|
|
|
|
|
out.WriteRune(r + symbolOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '␍', '␊':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if encodeHashPercent { // #%
|
|
|
|
|
switch r {
|
|
|
|
|
case '#', '%':
|
|
|
|
|
out.WriteRune(r + fullOffset)
|
|
|
|
|
continue
|
|
|
|
|
case '#', '%':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeDel { // DEL(0x7F)
|
|
|
|
|
switch r {
|
|
|
|
|
case rune(0x7F):
|
|
|
|
|
out.WriteRune('␡') // SYMBOL FOR DELETE
|
|
|
|
|
continue
|
|
|
|
|
case '␡':
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeCtl { // CTRL(0x01-0x1F)
|
|
|
|
|
if r >= 1 && r <= 0x1F {
|
|
|
|
|
out.WriteRune('␀' + r) // SYMBOL FOR NULL
|
|
|
|
|
continue
|
|
|
|
|
} else if r > symbolOffset && r <= symbolOffset+0x1F {
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
}
|
|
|
|
|
out.WriteString(suffix)
|
|
|
|
|
return out.String()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Decode takes a name and undoes any substitutions made by Encode
|
|
|
|
|
func (mask MultiEncoder) Decode(in string) string {
|
|
|
|
|
var (
|
|
|
|
|
encodeWin = uint(mask)&EncodeWin != 0
|
|
|
|
|
encodeSlash = uint(mask)&EncodeSlash != 0
|
|
|
|
|
encodeBackSlash = uint(mask)&EncodeBackSlash != 0
|
2019-09-21 00:04:31 +02:00
|
|
|
|
encodeCrLf = uint(mask)&EncodeCrLf != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeHashPercent = uint(mask)&EncodeHashPercent != 0
|
|
|
|
|
encodeDel = uint(mask)&EncodeDel != 0
|
|
|
|
|
encodeCtl = uint(mask)&EncodeCtl != 0
|
|
|
|
|
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
|
|
|
|
|
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
|
2019-05-17 15:41:38 +02:00
|
|
|
|
encodeLeftCrLfHtVt = uint(mask)&EncodeLeftCrLfHtVt != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
|
|
|
|
|
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
|
2019-05-17 15:41:38 +02:00
|
|
|
|
encodeRightCrLfHtVt = uint(mask)&EncodeRightCrLfHtVt != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
|
2019-04-30 11:07:02 +02:00
|
|
|
|
encodeDot = uint(mask)&EncodeDot != 0
|
2018-11-02 13:12:31 +01:00
|
|
|
|
)
|
|
|
|
|
|
2019-04-30 11:07:02 +02:00
|
|
|
|
if encodeDot {
|
|
|
|
|
switch in {
|
|
|
|
|
case ".":
|
|
|
|
|
return "."
|
|
|
|
|
case "..":
|
|
|
|
|
return ".."
|
|
|
|
|
case string(QuoteRune) + ".":
|
|
|
|
|
return "."
|
|
|
|
|
case string(QuoteRune) + "." + string(QuoteRune) + ".":
|
|
|
|
|
return ".."
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-02 13:12:31 +01:00
|
|
|
|
// handle prefix only replacements
|
|
|
|
|
prefix := ""
|
|
|
|
|
if r, l1 := utf8.DecodeRuneInString(in); encodeLeftSpace && r == '␠' { // SYMBOL FOR SPACE
|
|
|
|
|
prefix, in = " ", in[l1:]
|
|
|
|
|
} else if encodeLeftTilde && r == '~' { // FULLWIDTH TILDE
|
|
|
|
|
prefix, in = "~", in[l1:]
|
2019-05-17 15:41:38 +02:00
|
|
|
|
} else if encodeLeftCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
|
|
|
|
|
prefix, in = string(r-'␀'), in[l1:]
|
2018-11-02 13:12:31 +01:00
|
|
|
|
} else if r == QuoteRune {
|
|
|
|
|
if r, l2 := utf8.DecodeRuneInString(in[l1:]); encodeLeftSpace && r == '␠' { // SYMBOL FOR SPACE
|
|
|
|
|
prefix, in = "␠", in[l1+l2:]
|
|
|
|
|
} else if encodeLeftTilde && r == '~' { // FULLWIDTH TILDE
|
|
|
|
|
prefix, in = "~", in[l1+l2:]
|
2019-05-17 15:41:38 +02:00
|
|
|
|
} else if encodeLeftCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
|
|
|
|
|
prefix, in = string(r), in[l1+l2:]
|
2018-11-02 13:12:31 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// handle suffix only replacements
|
|
|
|
|
suffix := ""
|
|
|
|
|
if r, l := utf8.DecodeLastRuneInString(in); encodeRightSpace && r == '␠' { // SYMBOL FOR SPACE
|
|
|
|
|
in = in[:len(in)-l]
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
|
2018-11-02 13:12:31 +01:00
|
|
|
|
suffix, in = "␠", in[:len(in)-l2]
|
|
|
|
|
} else {
|
|
|
|
|
suffix = " "
|
|
|
|
|
}
|
|
|
|
|
} else if encodeRightPeriod && r == '.' { // FULLWIDTH FULL STOP
|
|
|
|
|
in = in[:len(in)-l]
|
2019-05-17 15:41:38 +02:00
|
|
|
|
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
|
2018-11-02 13:12:31 +01:00
|
|
|
|
suffix, in = ".", in[:len(in)-l2]
|
|
|
|
|
} else {
|
|
|
|
|
suffix = "."
|
|
|
|
|
}
|
2019-05-17 15:41:38 +02:00
|
|
|
|
} else if encodeRightCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
|
|
|
|
|
in = in[:len(in)-l]
|
|
|
|
|
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
|
|
|
|
|
suffix, in = string(r), in[:len(in)-l2]
|
|
|
|
|
} else {
|
|
|
|
|
suffix = string(r - '␀')
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
}
|
|
|
|
|
index := 0
|
|
|
|
|
if prefix == "" && suffix == "" {
|
|
|
|
|
// find the first rune which (most likely) needs to be replaced
|
|
|
|
|
index = strings.IndexFunc(in, func(r rune) bool {
|
|
|
|
|
switch r {
|
|
|
|
|
case '␀', QuoteRune:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if encodeWin { // :?"*<>|
|
|
|
|
|
switch r {
|
|
|
|
|
case '*', '<', '>', '?', ':', '|', '"':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeSlash { // /
|
|
|
|
|
switch r {
|
|
|
|
|
case '/':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeBackSlash { // \
|
|
|
|
|
switch r {
|
|
|
|
|
case '\':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-09-21 00:04:31 +02:00
|
|
|
|
if encodeCrLf { // CR LF
|
|
|
|
|
switch r {
|
|
|
|
|
case '␍', '␊':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if encodeHashPercent { // #%
|
|
|
|
|
switch r {
|
|
|
|
|
case '#', '%':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeDel { // DEL(0x7F)
|
|
|
|
|
switch r {
|
|
|
|
|
case '␡':
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeCtl { // CTRL(0x01-0x1F)
|
|
|
|
|
if r > symbolOffset && r <= symbolOffset+0x1F {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
// nothing to replace, return input
|
|
|
|
|
if index == -1 {
|
|
|
|
|
return in
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var out bytes.Buffer
|
|
|
|
|
out.Grow(len(in))
|
|
|
|
|
out.WriteString(prefix)
|
|
|
|
|
// copy the clean part of the input and skip it
|
|
|
|
|
out.WriteString(in[:index])
|
|
|
|
|
in = in[index:]
|
|
|
|
|
var unquote, unquoteNext, skipNext bool
|
|
|
|
|
|
|
|
|
|
for i, r := range in {
|
|
|
|
|
if skipNext {
|
|
|
|
|
skipNext = false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
unquote, unquoteNext = unquoteNext, false
|
|
|
|
|
switch r {
|
|
|
|
|
case '␀': // SYMBOL FOR NULL
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(0)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
case QuoteRune:
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
unquoteNext = true
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if encodeWin { // :?"*<>|
|
|
|
|
|
switch r {
|
|
|
|
|
case '*', '<', '>', '?', ':', '|', '"':
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - fullOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeSlash { // /
|
|
|
|
|
switch r {
|
|
|
|
|
case '/': // FULLWIDTH SOLIDUS
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - fullOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeBackSlash { // \
|
|
|
|
|
switch r {
|
|
|
|
|
case '\': // FULLWIDTH REVERSE SOLIDUS
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - fullOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-09-21 00:04:31 +02:00
|
|
|
|
if encodeCrLf { // CR LF
|
|
|
|
|
switch r {
|
|
|
|
|
case '␍', '␊':
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - symbolOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-11-02 13:12:31 +01:00
|
|
|
|
if encodeHashPercent { // #%
|
|
|
|
|
switch r {
|
|
|
|
|
case '#', '%':
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - fullOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeDel { // DEL(0x7F)
|
|
|
|
|
switch r {
|
|
|
|
|
case '␡': // SYMBOL FOR DELETE
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(0x7F)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if encodeCtl { // CTRL(0x01-0x1F)
|
|
|
|
|
if r > symbolOffset && r <= symbolOffset+0x1F {
|
|
|
|
|
if unquote {
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
} else {
|
|
|
|
|
out.WriteRune(r - symbolOffset)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if unquote {
|
|
|
|
|
if encodeInvalidUnicode {
|
|
|
|
|
skipNext = appendUnquotedByte(&out, in[i:])
|
|
|
|
|
if skipNext {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
}
|
|
|
|
|
switch r {
|
|
|
|
|
case utf8.RuneError:
|
|
|
|
|
// append the real bytes instead of utf8.RuneError
|
|
|
|
|
_, l := utf8.DecodeRuneInString(in[i:])
|
|
|
|
|
out.WriteString(in[i : i+l])
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out.WriteRune(r)
|
|
|
|
|
}
|
|
|
|
|
if unquoteNext {
|
|
|
|
|
out.WriteRune(QuoteRune)
|
|
|
|
|
}
|
|
|
|
|
out.WriteString(suffix)
|
|
|
|
|
return out.String()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FromStandardPath takes a / separated path in Standard encoding
|
|
|
|
|
// and converts it to a / separated path in this encoding.
|
|
|
|
|
func (mask MultiEncoder) FromStandardPath(s string) string {
|
|
|
|
|
return FromStandardPath(mask, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FromStandardName takes name in Standard encoding and converts
|
|
|
|
|
// it in this encoding.
|
|
|
|
|
func (mask MultiEncoder) FromStandardName(s string) string {
|
|
|
|
|
return FromStandardName(mask, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ToStandardPath takes a / separated path in this encoding
|
|
|
|
|
// and converts it to a / separated path in Standard encoding.
|
|
|
|
|
func (mask MultiEncoder) ToStandardPath(s string) string {
|
|
|
|
|
return ToStandardPath(mask, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ToStandardName takes name in this encoding and converts
|
|
|
|
|
// it in Standard encoding.
|
|
|
|
|
func (mask MultiEncoder) ToStandardName(s string) string {
|
|
|
|
|
return ToStandardName(mask, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func appendQuotedBytes(w io.Writer, s string) {
|
|
|
|
|
for _, b := range []byte(s) {
|
|
|
|
|
_, _ = fmt.Fprintf(w, string(QuoteRune)+"%02X", b)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
func appendUnquotedByte(w io.Writer, s string) bool {
|
|
|
|
|
if len(s) < 2 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
u, err := strconv.ParseUint(s[:2], 16, 8)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
n, _ := w.Write([]byte{byte(u)})
|
|
|
|
|
return n == 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type identity struct{}
|
|
|
|
|
|
|
|
|
|
func (identity) Encode(in string) string { return in }
|
|
|
|
|
func (identity) Decode(in string) string { return in }
|
|
|
|
|
|
|
|
|
|
func (i identity) FromStandardPath(s string) string {
|
|
|
|
|
return FromStandardPath(i, s)
|
|
|
|
|
}
|
|
|
|
|
func (i identity) FromStandardName(s string) string {
|
|
|
|
|
return FromStandardName(i, s)
|
|
|
|
|
}
|
|
|
|
|
func (i identity) ToStandardPath(s string) string {
|
|
|
|
|
return ToStandardPath(i, s)
|
|
|
|
|
}
|
|
|
|
|
func (i identity) ToStandardName(s string) string {
|
|
|
|
|
return ToStandardName(i, s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Identity returns a Encoder that always returns the input value
|
|
|
|
|
func Identity() Encoder {
|
|
|
|
|
return identity{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FromStandardPath takes a / separated path in Standard encoding
|
|
|
|
|
// and converts it to a / separated path in the given encoding.
|
|
|
|
|
func FromStandardPath(e Encoder, s string) string {
|
|
|
|
|
if e == Standard {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
parts := strings.Split(s, "/")
|
|
|
|
|
encoded := make([]string, len(parts))
|
|
|
|
|
changed := false
|
|
|
|
|
for i, p := range parts {
|
|
|
|
|
enc := FromStandardName(e, p)
|
|
|
|
|
changed = changed || enc != p
|
|
|
|
|
encoded[i] = enc
|
|
|
|
|
}
|
|
|
|
|
if !changed {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
return strings.Join(encoded, "/")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FromStandardName takes name in Standard encoding and converts
|
|
|
|
|
// it in the given encoding.
|
|
|
|
|
func FromStandardName(e Encoder, s string) string {
|
|
|
|
|
if e == Standard {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
return e.Encode(Standard.Decode(s))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ToStandardPath takes a / separated path in the given encoding
|
|
|
|
|
// and converts it to a / separated path in Standard encoding.
|
|
|
|
|
func ToStandardPath(e Encoder, s string) string {
|
|
|
|
|
if e == Standard {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
parts := strings.Split(s, "/")
|
|
|
|
|
encoded := make([]string, len(parts))
|
|
|
|
|
changed := false
|
|
|
|
|
for i, p := range parts {
|
|
|
|
|
dec := ToStandardName(e, p)
|
|
|
|
|
changed = changed || dec != p
|
|
|
|
|
encoded[i] = dec
|
|
|
|
|
}
|
|
|
|
|
if !changed {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
return strings.Join(encoded, "/")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ToStandardName takes name in the given encoding and converts
|
|
|
|
|
// it in Standard encoding.
|
|
|
|
|
func ToStandardName(e Encoder, s string) string {
|
|
|
|
|
if e == Standard {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
return Standard.Encode(e.Decode(s))
|
|
|
|
|
}
|