encoder: add edge control characters and fix edge test generation

This commit is contained in:
Fabian Möller 2019-05-17 15:41:38 +02:00 committed by Nick Craig-Wood
parent 6ba08b8612
commit f0c2249086
4 changed files with 10706 additions and 880 deletions

View File

@ -89,12 +89,18 @@ func (mask MultiEncoder) Encode(in string) string {
encodeCtl = uint(mask)&EncodeCtl != 0
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
encodeLeftCrLfHtVt = uint(mask)&EncodeLeftCrLfHtVt != 0
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
encodeRightCrLfHtVt = uint(mask)&EncodeRightCrLfHtVt != 0
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
encodeDot = uint(mask)&EncodeDot != 0
)
if in == "" {
return ""
}
if encodeDot {
switch in {
case ".":
@ -110,36 +116,61 @@ func (mask MultiEncoder) Encode(in string) string {
// handle prefix only replacements
prefix := ""
if encodeLeftSpace && len(in) > 0 { // Leading SPACE
if encodeLeftSpace { // Leading SPACE
if in[0] == ' ' {
prefix, in = "␠", in[1:] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeRuneInString(in); r == '␠' { // SYMBOL FOR SPACE
prefix, in = string(QuoteRune)+"␠", in[l:] // SYMBOL FOR SPACE
}
}
if encodeLeftTilde && len(in) > 0 { // Leading ~
if encodeLeftTilde && prefix == "" { // Leading ~
if in[0] == '~' {
prefix, in = string('~'+fullOffset), in[1:] // FULLWIDTH TILDE
} else if r, l := utf8.DecodeRuneInString(in); r == '~'+fullOffset {
prefix, in = string(QuoteRune)+string('~'+fullOffset), in[l:] // FULLWIDTH TILDE
}
}
if encodeLeftCrLfHtVt && prefix == "" { // Leading CR LF HT VT
switch c := in[0]; c {
case '\t', '\n', '\v', '\r':
prefix, in = string('␀'+rune(c)), in[1:] // SYMBOL FOR NULL
default:
switch r, l := utf8.DecodeRuneInString(in); r {
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
prefix, in = string(QuoteRune)+string(r), in[l:]
}
}
}
// handle suffix only replacements
suffix := ""
if encodeRightSpace && len(in) > 0 { // Trailing SPACE
if in != "" {
if encodeRightSpace { // Trailing SPACE
if in[len(in)-1] == ' ' {
suffix, in = "␠", in[:len(in)-1] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeLastRuneInString(in); r == '␠' {
suffix, in = string(QuoteRune)+"␠", in[:len(in)-l] // SYMBOL FOR SPACE
}
}
if encodeRightPeriod && len(in) > 0 { // Trailing .
if encodeRightPeriod && suffix == "" { // Trailing .
if in[len(in)-1] == '.' {
suffix, in = "", in[:len(in)-1] // FULLWIDTH FULL STOP
} else if r, l := utf8.DecodeLastRuneInString(in); r == '' {
suffix, in = string(QuoteRune)+"", in[:len(in)-l] // FULLWIDTH FULL STOP
}
}
if encodeRightCrLfHtVt && suffix == "" { // Trailing .
switch c := in[len(in)-1]; c {
case '\t', '\n', '\v', '\r':
suffix, in = string('␀'+rune(c)), in[:len(in)-1] // FULLWIDTH FULL STOP
default:
switch r, l := utf8.DecodeLastRuneInString(in); r {
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
suffix, in = string(QuoteRune)+string(r), in[:len(in)-l]
}
}
}
}
index := 0
if prefix == "" && suffix == "" {
// find the first rune which (most likely) needs to be replaced
@ -310,8 +341,10 @@ func (mask MultiEncoder) Decode(in string) string {
encodeCtl = uint(mask)&EncodeCtl != 0
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
encodeLeftCrLfHtVt = uint(mask)&EncodeLeftCrLfHtVt != 0
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
encodeRightCrLfHtVt = uint(mask)&EncodeRightCrLfHtVt != 0
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
encodeDot = uint(mask)&EncodeDot != 0
)
@ -335,11 +368,15 @@ func (mask MultiEncoder) Decode(in string) string {
prefix, in = " ", in[l1:]
} else if encodeLeftTilde && r == '' { // FULLWIDTH TILDE
prefix, in = "~", in[l1:]
} else if encodeLeftCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
prefix, in = string(r-'␀'), in[l1:]
} else if r == QuoteRune {
if r, l2 := utf8.DecodeRuneInString(in[l1:]); encodeLeftSpace && r == '␠' { // SYMBOL FOR SPACE
prefix, in = "␠", in[l1+l2:]
} else if encodeLeftTilde && r == '' { // FULLWIDTH TILDE
prefix, in = "", in[l1+l2:]
} else if encodeLeftCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
prefix, in = string(r), in[l1+l2:]
}
}
@ -347,18 +384,25 @@ func (mask MultiEncoder) Decode(in string) string {
suffix := ""
if r, l := utf8.DecodeLastRuneInString(in); encodeRightSpace && r == '␠' { // SYMBOL FOR SPACE
in = in[:len(in)-l]
if r, l2 := utf8.DecodeLastRuneInString(in); r == QuoteRune {
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = "␠", in[:len(in)-l2]
} else {
suffix = " "
}
} else if encodeRightPeriod && r == '' { // FULLWIDTH FULL STOP
in = in[:len(in)-l]
if r, l2 := utf8.DecodeLastRuneInString(in); r == QuoteRune {
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = "", in[:len(in)-l2]
} else {
suffix = "."
}
} else if encodeRightCrLfHtVt && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
in = in[:len(in)-l]
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = string(r), in[:len(in)-l2]
} else {
suffix = string(r - '␀')
}
}
index := 0
if prefix == "" && suffix == "" {

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,22 @@ func TestEncodeSingleMaskEdge(t *testing.T) {
}
}
func TestEncodeDoubleMaskEdge(t *testing.T) {
for i, tc := range testCasesDoubleEdge {
e := MultiEncoder(tc.mask)
t.Run(strconv.FormatInt(int64(i), 10), func(t *testing.T) {
got := e.Encode(tc.in)
if got != tc.out {
t.Errorf("Encode(%q) want %q got %q", tc.in, tc.out, got)
}
got2 := e.Decode(got)
if got2 != tc.in {
t.Errorf("Decode(%q) want %q got %q", got, tc.in, got2)
}
})
}
}
func TestEncodeInvalidUnicode(t *testing.T) {
for i, tc := range []testCase{
{

View File

@ -3,6 +3,7 @@
package main
import (
"flag"
"fmt"
"log"
"math/rand"
@ -47,21 +48,34 @@ var maskBits = []struct {
{encoder.EncodeCtl, "EncodeCtl"},
{encoder.EncodeLeftSpace, "EncodeLeftSpace"},
{encoder.EncodeLeftTilde, "EncodeLeftTilde"},
{encoder.EncodeLeftCrLfHtVt, "EncodeLeftCrLfHtVt"},
{encoder.EncodeRightSpace, "EncodeRightSpace"},
{encoder.EncodeRightPeriod, "EncodeRightPeriod"},
{encoder.EncodeRightCrLfHtVt, "EncodeRightCrLfHtVt"},
{encoder.EncodeInvalidUtf8, "EncodeInvalidUtf8"},
}
var edges = []struct {
type edge struct {
mask uint
name string
edge int
orig rune
replace rune
}{
{encoder.EncodeLeftSpace, "EncodeLeftSpace", edgeLeft, ' ', '␠'},
{encoder.EncodeLeftTilde, "EncodeLeftTilde", edgeLeft, '~', ''},
{encoder.EncodeRightSpace, "EncodeRightSpace", edgeRight, ' ', '␠'},
{encoder.EncodeRightPeriod, "EncodeRightPeriod", edgeRight, '.', ''},
orig []rune
replace []rune
}
var allEdges = []edge{
{encoder.EncodeLeftSpace, "EncodeLeftSpace", edgeLeft, []rune{' '}, []rune{'␠'}},
{encoder.EncodeLeftTilde, "EncodeLeftTilde", edgeLeft, []rune{'~'}, []rune{''}},
{encoder.EncodeLeftCrLfHtVt, "EncodeLeftCrLfHtVt", edgeLeft,
[]rune{'\t', '\n', '\v', '\r'},
[]rune{'␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r'},
},
{encoder.EncodeRightSpace, "EncodeRightSpace", edgeRight, []rune{' '}, []rune{'␠'}},
{encoder.EncodeRightPeriod, "EncodeRightPeriod", edgeRight, []rune{'.'}, []rune{''}},
{encoder.EncodeRightCrLfHtVt, "EncodeRightCrLfHtVt", edgeRight,
[]rune{'\t', '\n', '\v', '\r'},
[]rune{'␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r'},
},
}
var allMappings = []mapping{{
@ -101,7 +115,7 @@ var allMappings = []mapping{{
}}
var (
rng = rand.New(rand.NewSource(42))
rng *rand.Rand
printables = runeRange(0x20, 0x7E)
fullwidthPrintables = runeRange(0xFF00, 0xFF5E)
@ -111,6 +125,10 @@ var (
)
func main() {
seed := flag.Int64("s", 42, "random seed")
flag.Parse()
rng = rand.New(rand.NewSource(*seed))
fd, err := os.Create("encoder_cases_test.go")
fatal(err, "Unable to open encoder_cases_test.go:")
defer func() {
@ -147,7 +165,8 @@ func main() {
var testCasesSingleEdge = []testCase{
`))("Write:")
_i = 0
for _, e := range edges {
for _, e := range allEdges {
for idx, orig := range e.orig {
if _i != 0 {
fatalW(fd.WriteString(" "))("Write:")
}
@ -155,15 +174,63 @@ var testCasesSingleEdge = []testCase{
mask: %s,
in: %s,
out: %s,
},`, i(), e.name, strconv.Quote(string(e.orig)), strconv.Quote(string(e.replace))))("Error writing test case:")
},`, i(), e.name, strconv.Quote(string(orig)), strconv.Quote(string(e.replace[idx]))))("Error writing test case:")
}
for _, m := range maskBits {
if len(getMapping(m.mask).src) == 0 {
if len(getMapping(m.mask).src) == 0 || invalidMask(e.mask|m.mask) {
continue
}
for idx, orig := range e.orig {
replace := e.replace[idx]
pairs := buildEdgeTestString(
e.edge, e.orig, e.replace,
[]mapping{getMapping(0), getMapping(m.mask)}, // quote
printables, fullwidthPrintables, encodables, encoded, greek) // fill
[]edge{e}, []mapping{getMapping(0), getMapping(m.mask)}, // quote
[][]rune{printables, fullwidthPrintables, encodables, encoded, greek}, // fill
func(rIn, rOut []rune, quoteOut []bool, testMappings []mapping) (out []stringPair) {
testL := len(rIn)
skipOrig := false
for _, m := range testMappings {
if runePos(orig, m.src) != -1 || runePos(orig, m.dst) != -1 {
skipOrig = true
break
}
}
if !skipOrig {
rIn[10], rOut[10], quoteOut[10] = orig, orig, false
}
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
for _, i := range []int{0, 1, testL - 2, testL - 1} {
for _, j := range []int{1, testL - 2, testL - 1} {
if j < i {
continue
}
rIn := append([]rune{}, rIn...)
rOut := append([]rune{}, rOut...)
quoteOut := append([]bool{}, quoteOut...)
for _, in := range []rune{orig, replace} {
expect, quote := in, false
if i == 0 && e.edge == edgeLeft ||
i == testL-1 && e.edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[i], rOut[i], quoteOut[i] = in, expect, quote
if i != j {
for _, in := range []rune{orig, replace} {
expect, quote = in, false
if j == testL-1 && e.edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[j], rOut[j], quoteOut[j] = in, expect, quote
}
}
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
}
}
}
return
})
for _, p := range pairs {
fatalW(fmt.Fprintf(fd, ` { // %d
mask: %s | %s,
@ -173,6 +240,7 @@ var testCasesSingleEdge = []testCase{
}
}
}
}
fatalW(fmt.Fprintf(fd, ` { // %d
mask: EncodeLeftSpace,
in: " ",
@ -205,9 +273,70 @@ var testCasesSingleEdge = []testCase{
mask: EncodeLeftSpace | EncodeRightSpace,
in: " ",
out: "␠ ␠",
}, { // %d
mask: EncodeRightPeriod | EncodeRightSpace,
in: "a. ",
out: "a.␠",
}, { // %d
mask: EncodeRightPeriod | EncodeRightSpace,
in: "a .",
out: "a ",
},
}
`, i(), i(), i(), i(), i(), i(), i(), i()))("Error writing test case:")
var testCasesDoubleEdge = []testCase{
`, i(), i(), i(), i(), i(), i(), i(), i(), i(), i()))("Error writing test case:")
_i = 0
for _, e1 := range allEdges {
for _, e2 := range allEdges {
if e1.mask == e2.mask {
continue
}
for _, m := range maskBits {
if len(getMapping(m.mask).src) == 0 || invalidMask(m.mask|e1.mask|e2.mask) {
continue
}
orig, replace := e1.orig[0], e1.replace[0]
edges := []edge{e1, e2}
pairs := buildEdgeTestString(
edges, []mapping{getMapping(0), getMapping(m.mask)}, // quote
[][]rune{printables, fullwidthPrintables, encodables, encoded, greek}, // fill
func(rIn, rOut []rune, quoteOut []bool, testMappings []mapping) (out []stringPair) {
testL := len(rIn)
for _, i := range []int{0, testL - 1} {
for _, secondOrig := range e2.orig {
rIn := append([]rune{}, rIn...)
rOut := append([]rune{}, rOut...)
quoteOut := append([]bool{}, quoteOut...)
rIn[1], rOut[1], quoteOut[1] = secondOrig, secondOrig, false
rIn[testL-2], rOut[testL-2], quoteOut[testL-2] = secondOrig, secondOrig, false
for _, in := range []rune{orig, replace} {
rIn[i], rOut[i], quoteOut[i] = in, in, false
fixEdges(rIn, rOut, quoteOut, edges)
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
}
}
}
return
})
for _, p := range pairs {
if _i != 0 {
fatalW(fd.WriteString(" "))("Write:")
}
fatalW(fmt.Fprintf(fd, `{ // %d
mask: %s | %s | %s,
in: %s,
out: %s,
},`, i(), m.name, e1.name, e2.name, strconv.Quote(p.a), strconv.Quote(p.b)))("Error writing test case:")
}
}
}
}
fatalW(fmt.Fprint(fd, "\n}\n"))("Error writing test case:")
}
func fatal(err error, s ...interface{}) {
@ -224,6 +353,10 @@ func fatalW(_ int, err error) func(...interface{}) {
return func(s ...interface{}) {}
}
func invalidMask(mask uint) bool {
return mask&encoder.EncodeCtl != 0 && mask&(encoder.EncodeLeftCrLfHtVt|encoder.EncodeRightCrLfHtVt) != 0
}
// construct a slice containing the runes between (l)ow (inclusive) and (h)igh (inclusive)
func runeRange(l, h rune) []rune {
if h < l {
@ -325,11 +458,13 @@ outer:
return string(rIn), bOut.String()
}
func buildEdgeTestString(edge int, orig, replace rune, testMappings []mapping, fill ...[]rune) (out []stringPair) {
func buildEdgeTestString(edges []edge, testMappings []mapping, fill [][]rune,
gen func(rIn, rOut []rune, quoteOut []bool, testMappings []mapping) []stringPair,
) []stringPair {
testL := 30
rIn := make([]rune, testL)
rOut := make([]rune, testL)
quoteOut := make([]bool, testL)
rIn := make([]rune, testL) // test input string
rOut := make([]rune, testL) // test output string without quote runes
quoteOut := make([]bool, testL) // if true insert quote rune before the output rune
set := func(i int, in, out rune, quote bool) {
rIn[i] = in
@ -337,6 +472,7 @@ func buildEdgeTestString(edge int, orig, replace rune, testMappings []mapping, f
quoteOut[i] = quote
}
// populate test strings with values from the `fill` set
outer:
for pos := 0; pos < testL; pos++ {
m := pos % len(fill)
@ -359,40 +495,26 @@ outer:
rOut[i], rOut[j] = rOut[j], rOut[i]
quoteOut[i], quoteOut[j] = quoteOut[j], quoteOut[i]
})
set(10, orig, orig, false)
fixEdges(rIn, rOut, quoteOut, edges)
return gen(rIn, rOut, quoteOut, testMappings)
}
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
for _, i := range []int{0, 1, testL - 2, testL - 1} {
for _, j := range []int{1, testL - 2, testL - 1} {
if j < i {
continue
}
rIn := append([]rune{}, rIn...)
rOut := append([]rune{}, rOut...)
quoteOut := append([]bool{}, quoteOut...)
for _, in := range []rune{orig, replace} {
expect, quote := in, false
if i == 0 && edge == edgeLeft ||
i == testL-1 && edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[i], rOut[i], quoteOut[i] = in, expect, quote
if i != j {
for _, in := range []rune{orig, replace} {
expect, quote = in, false
if j == testL-1 && edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[j], rOut[j], quoteOut[j] = in, expect, quote
}
}
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
func fixEdges(rIn, rOut []rune, quoteOut []bool, edges []edge) {
testL := len(rIn)
for _, e := range edges {
for idx, o := range e.orig {
r := e.replace[idx]
if e.edge == edgeLeft && rIn[0] == o {
rOut[0], quoteOut[0] = r, false
} else if e.edge == edgeLeft && rIn[0] == r {
quoteOut[0] = true
} else if e.edge == edgeRight && rIn[testL-1] == o {
rOut[testL-1], quoteOut[testL-1] = r, false
} else if e.edge == edgeRight && rIn[testL-1] == r {
quoteOut[testL-1] = true
}
}
}
return
}
func runePos(r rune, s []rune) int {