2015-09-27 17:13:20 +02:00
|
|
|
// rsync style glob parser
|
|
|
|
|
2018-01-12 17:30:54 +01:00
|
|
|
package filter
|
2015-09-27 17:13:20 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2021-11-04 11:12:57 +01:00
|
|
|
"fmt"
|
2015-09-27 17:13:20 +02:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
2023-02-01 15:21:45 +01:00
|
|
|
|
|
|
|
"github.com/rclone/rclone/fs"
|
2015-09-27 17:13:20 +02:00
|
|
|
)
|
|
|
|
|
2023-11-03 20:45:37 +01:00
|
|
|
// GlobPathToRegexp converts an rsync style glob path to a regexp
|
|
|
|
func GlobPathToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
|
|
|
return globToRegexp(glob, true, true, ignoreCase)
|
|
|
|
}
|
|
|
|
|
|
|
|
// GlobStringToRegexp converts an rsync style glob string to a regexp
|
2024-08-10 17:41:12 +02:00
|
|
|
//
|
|
|
|
// Without adding of anchors but with ignoring of case, i.e. called
|
|
|
|
// `GlobStringToRegexp(glob, false, true)`, it takes a lenient approach
|
|
|
|
// where the glob "sum" would match "CheckSum", more similar to text
|
|
|
|
// search functions than strict glob filtering.
|
|
|
|
//
|
|
|
|
// With adding of anchors and not ignoring case, i.e. called
|
|
|
|
// `GlobStringToRegexp(glob, true, false)`, it uses a strict glob
|
|
|
|
// interpretation where the previous example would have to be changed to
|
|
|
|
// "*Sum" to match "CheckSum".
|
|
|
|
func GlobStringToRegexp(glob string, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) {
|
|
|
|
return globToRegexp(glob, false, addAnchors, ignoreCase)
|
2023-11-03 20:45:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// globToRegexp converts an rsync style glob to a regexp
|
2015-09-27 17:13:20 +02:00
|
|
|
//
|
2023-11-03 20:45:37 +01:00
|
|
|
// Set pathMode true for matching of path/file names, e.g.
|
|
|
|
// special treatment of path separator `/` and double asterisk `**`,
|
|
|
|
// see filtering.md for details.
|
|
|
|
//
|
|
|
|
// Set addAnchors true to add start of string `^` and end of string `$` anchors.
|
|
|
|
func globToRegexp(glob string, pathMode bool, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) {
|
2015-09-27 17:13:20 +02:00
|
|
|
var re bytes.Buffer
|
2018-11-12 15:29:37 +01:00
|
|
|
if ignoreCase {
|
|
|
|
_, _ = re.WriteString("(?i)")
|
|
|
|
}
|
2023-11-03 20:45:37 +01:00
|
|
|
if addAnchors {
|
|
|
|
if pathMode {
|
|
|
|
if strings.HasPrefix(glob, "/") {
|
|
|
|
glob = glob[1:]
|
|
|
|
_ = re.WriteByte('^')
|
|
|
|
} else {
|
|
|
|
_, _ = re.WriteString("(^|/)")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
_, _ = re.WriteString("^")
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
|
|
|
consecutiveStars := 0
|
|
|
|
insertStars := func() error {
|
|
|
|
if consecutiveStars > 0 {
|
2023-11-03 20:45:37 +01:00
|
|
|
if pathMode {
|
|
|
|
switch consecutiveStars {
|
|
|
|
case 1:
|
|
|
|
_, _ = re.WriteString(`[^/]*`)
|
|
|
|
case 2:
|
|
|
|
_, _ = re.WriteString(`.*`)
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("too many stars in %q", glob)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch consecutiveStars {
|
|
|
|
case 1:
|
|
|
|
_, _ = re.WriteString(`.*`)
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("too many stars in %q", glob)
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
consecutiveStars = 0
|
|
|
|
return nil
|
|
|
|
}
|
2021-10-09 13:56:23 +02:00
|
|
|
overwriteLastChar := func(c byte) {
|
|
|
|
buf := re.Bytes()
|
|
|
|
buf[len(buf)-1] = c
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
inBraces := false
|
|
|
|
inBrackets := 0
|
|
|
|
slashed := false
|
2021-10-09 13:56:23 +02:00
|
|
|
inRegexp := false // inside {{ ... }}
|
|
|
|
inRegexpEnd := false // have received }} waiting for more
|
|
|
|
var next, last rune
|
2015-09-27 17:13:20 +02:00
|
|
|
for _, c := range glob {
|
2021-10-09 13:56:23 +02:00
|
|
|
next, last = c, next
|
2015-09-27 17:13:20 +02:00
|
|
|
if slashed {
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
slashed = false
|
|
|
|
continue
|
|
|
|
}
|
2021-10-09 13:56:23 +02:00
|
|
|
if inRegexpEnd {
|
|
|
|
if c == '}' {
|
|
|
|
// Regexp is ending with }} choose longest segment
|
|
|
|
// Replace final ) with }
|
|
|
|
overwriteLastChar('}')
|
|
|
|
_ = re.WriteByte(')')
|
|
|
|
continue
|
|
|
|
} else {
|
|
|
|
inRegexpEnd = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if inRegexp {
|
|
|
|
if c == '}' && last == '}' {
|
|
|
|
inRegexp = false
|
|
|
|
inRegexpEnd = true
|
|
|
|
// Replace final } with )
|
|
|
|
overwriteLastChar(')')
|
|
|
|
} else {
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
if c != '*' {
|
|
|
|
err := insertStars()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if inBrackets > 0 {
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
if c == '[' {
|
|
|
|
inBrackets++
|
|
|
|
}
|
|
|
|
if c == ']' {
|
|
|
|
inBrackets--
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
switch c {
|
|
|
|
case '\\':
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
slashed = true
|
|
|
|
case '*':
|
|
|
|
consecutiveStars++
|
|
|
|
case '?':
|
2023-11-03 20:45:37 +01:00
|
|
|
if pathMode {
|
|
|
|
_, _ = re.WriteString(`[^/]`)
|
|
|
|
} else {
|
|
|
|
_, _ = re.WriteString(`.`)
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
case '[':
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
inBrackets++
|
|
|
|
case ']':
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("mismatched ']' in glob %q", glob)
|
2015-09-27 17:13:20 +02:00
|
|
|
case '{':
|
|
|
|
if inBraces {
|
2021-10-09 13:56:23 +02:00
|
|
|
if last == '{' {
|
|
|
|
inRegexp = true
|
|
|
|
inBraces = false
|
|
|
|
} else {
|
|
|
|
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
inBraces = true
|
|
|
|
_ = re.WriteByte('(')
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
|
|
|
case '}':
|
|
|
|
if !inBraces {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
2021-10-09 13:56:23 +02:00
|
|
|
_ = re.WriteByte(')')
|
2015-09-27 17:13:20 +02:00
|
|
|
inBraces = false
|
|
|
|
case ',':
|
|
|
|
if inBraces {
|
2021-10-09 13:56:23 +02:00
|
|
|
_ = re.WriteByte('|')
|
2015-09-27 17:13:20 +02:00
|
|
|
} else {
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
}
|
|
|
|
case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
|
2021-10-09 13:56:23 +02:00
|
|
|
_ = re.WriteByte('\\')
|
2015-09-27 17:13:20 +02:00
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
default:
|
|
|
|
_, _ = re.WriteRune(c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
err := insertStars()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if inBrackets > 0 {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("mismatched '[' and ']' in glob %q", glob)
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
|
|
|
if inBraces {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
2021-10-09 13:56:23 +02:00
|
|
|
if inRegexp {
|
|
|
|
return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
|
|
|
|
}
|
2023-11-03 20:45:37 +01:00
|
|
|
if addAnchors {
|
|
|
|
_ = re.WriteByte('$')
|
|
|
|
}
|
2015-09-27 17:13:20 +02:00
|
|
|
result, err := regexp.Compile(re.String())
|
|
|
|
if err != nil {
|
2021-11-04 11:12:57 +01:00
|
|
|
return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)
|
2015-09-27 17:13:20 +02:00
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|
2016-05-16 18:14:04 +02:00
|
|
|
|
|
|
|
var (
|
2021-10-09 13:56:23 +02:00
|
|
|
// Can't deal with
|
|
|
|
// / or ** in {}
|
|
|
|
// {{ regexp }}
|
|
|
|
tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
|
2016-05-16 18:14:04 +02:00
|
|
|
|
|
|
|
// Squash all /
|
|
|
|
squashSlash = regexp.MustCompile(`/{2,}`)
|
|
|
|
)
|
|
|
|
|
|
|
|
// globToDirGlobs takes a file glob and turns it into a series of
|
|
|
|
// directory globs. When matched with a directory (with a trailing /)
|
|
|
|
// this should answer the question as to whether this glob could be in
|
|
|
|
// this directory.
|
|
|
|
func globToDirGlobs(glob string) (out []string) {
|
|
|
|
if tooHardRe.MatchString(glob) {
|
|
|
|
// Can't figure this one out so return any directory might match
|
2023-02-01 15:21:45 +01:00
|
|
|
fs.Infof(nil, "Can't figure out directory filters from %q: looking in all directories", glob)
|
2016-05-16 18:14:04 +02:00
|
|
|
out = append(out, "/**")
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get rid of multiple /s
|
|
|
|
glob = squashSlash.ReplaceAllString(glob, "/")
|
|
|
|
|
|
|
|
// Split on / or **
|
|
|
|
// (** can contain /)
|
|
|
|
for {
|
|
|
|
i := strings.LastIndex(glob, "/")
|
|
|
|
j := strings.LastIndex(glob, "**")
|
|
|
|
what := ""
|
|
|
|
if j > i {
|
|
|
|
i = j
|
|
|
|
what = "**"
|
|
|
|
}
|
|
|
|
if i < 0 {
|
|
|
|
if len(out) == 0 {
|
|
|
|
out = append(out, "/**")
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
glob = glob[:i]
|
|
|
|
newGlob := glob + what + "/"
|
|
|
|
if len(out) == 0 || out[len(out)-1] != newGlob {
|
|
|
|
out = append(out, newGlob)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|