filter: add metadata filters FIXME needs docs and tests

Fixes #6353
This commit is contained in:
Nick Craig-Wood 2022-08-04 18:19:05 +01:00
parent 918bd6d3c3
commit c520e52696
5 changed files with 322 additions and 192 deletions

View File

@ -2,14 +2,11 @@
package filter
import (
"bufio"
"context"
"errors"
"fmt"
"log"
"os"
"path"
"regexp"
"strings"
"time"
@ -22,73 +19,12 @@ import (
// This is accessed through GetConfig and AddConfig
var globalConfig = mustNewFilter(nil)
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Opt configures the filter
type Opt struct {
DeleteExcluded bool
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
RulesOpt
ExcludeFile []string
IncludeRule []string
IncludeFrom []string
MetaRules RulesOpt
FilesFrom []string
FilesFromRaw []string
MinAge fs.Duration
@ -106,6 +42,9 @@ var DefaultOpt = Opt{
MaxSize: fs.SizeSuffix(-1),
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Filter describes any filtering in operation
type Filter struct {
Opt Opt
@ -113,6 +52,7 @@ type Filter struct {
ModTimeTo time.Time
fileRules rules
dirRules rules
metaRules rules
files FilesMap // files if filesFrom
dirs FilesMap // dirs from filesFrom
}
@ -142,58 +82,15 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
}
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range f.Opt.IncludeRule {
err = f.Add(true, rule)
err = parseRules(&f.Opt.RulesOpt, f.Add, f.Clear)
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(true, line)
})
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.ExcludeRule {
err = f.Add(false, rule)
if err != nil {
return nil, err
}
foundExcludeRule = true
}
for _, rule := range f.Opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return f.Add(false, line)
})
if err != nil {
return nil, err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range f.Opt.FilterRule {
err = f.AddRule(rule)
err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear)
if err != nil {
return nil, err
}
}
for _, rule := range f.Opt.FilterFrom {
err := forEachLine(rule, false, f.AddRule)
if err != nil {
return nil, err
}
}
inActive := f.InActive()
@ -225,12 +122,6 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
}
}
if addImplicitExclude {
err = f.Add(false, "/**")
if err != nil {
return nil, err
}
}
if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 {
fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters())
@ -309,16 +200,7 @@ func (f *Filter) Add(Include bool, glob string) error {
//
// Line comments may be introduced with '#' or ';'
func (f *Filter) AddRule(rule string) error {
switch {
case rule == "!":
f.Clear()
return nil
case strings.HasPrefix(rule, "- "):
return f.Add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return f.Add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
return addRule(rule, f.Add, f.Clear)
}
// initAddFile creates f.files and f.dirs
@ -359,6 +241,7 @@ func (f *Filter) Files() FilesMap {
func (f *Filter) Clear() {
f.fileRules.clear()
f.dirRules.clear()
f.metaRules.clear()
}
// InActive returns false if any filters are active
@ -370,17 +253,13 @@ func (f *Filter) InActive() bool {
f.Opt.MaxSize < 0 &&
f.fileRules.len() == 0 &&
f.dirRules.len() == 0 &&
f.metaRules.len() == 0 &&
len(f.Opt.ExcludeFile) == 0)
}
// IncludeRemote returns whether this remote passes the filter rules.
func (f *Filter) IncludeRemote(remote string) bool {
for _, rule := range f.fileRules.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
return f.fileRules.include(remote)
}
// ListContainsExcludeFile checks if exclude file is present in the list.
@ -423,13 +302,7 @@ func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (b
return include, nil
}
remote += "/"
for _, rule := range f.dirRules.rules {
if rule.Match(remote) {
return rule.Include, nil
}
}
return true, nil
return f.dirRules.include(remote), nil
}
}
@ -453,7 +326,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo
// Include returns whether this object should be included into the
// sync or not
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool {
// filesFrom takes precedence
if f.files != nil {
_, include := f.files[remote]
@ -471,6 +344,15 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
return false
}
if f.metaRules.len() > 0 && len(metadata) > 0 {
metadatas := make([]string, 0, len(metadata))
for key, value := range metadata {
metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value))
}
if !f.metaRules.includeMany(metadatas) {
return false
}
}
return f.IncludeRemote(remote)
}
@ -485,39 +367,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
} else {
modTime = time.Unix(0, 0)
}
return f.Include(o.Remote(), o.Size(), modTime)
var metadata fs.Metadata
if f.metaRules.len() > 0 {
var err error
metadata, err = fs.GetMetadata(ctx, o)
if err != nil {
fs.Errorf(o, "Failed to read metadata: %v", err)
metadata = nil
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
return f.Include(o.Remote(), o.Size(), modTime, metadata)
}
// DumpFilters dumps the filters in textual form, 1 per line
@ -537,6 +397,12 @@ func (f *Filter) DumpFilters() string {
for _, dirRule := range f.dirRules.rules {
rules = append(rules, dirRule.String())
}
if f.metaRules.len() > 0 {
rules = append(rules, "--- Metadata filter rules ---")
for _, metaRule := range f.metaRules.rules {
rules = append(rules, metaRule.String())
}
}
return strings.Join(rules, "\n")
}

View File

@ -208,7 +208,7 @@ type includeTest struct {
func testInclude(t *testing.T, f *Filter, tests []includeTest) {
for _, test := range tests {
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0))
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil)
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
}
}
@ -714,7 +714,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
require.NoError(t, err)
err = f.Add(false, "*")
require.NoError(t, err)
included := f.Include(test.file, 0, time.Unix(0, 0))
included := f.Include(test.file, 0, time.Unix(0, 0), nil)
if included != test.included {
t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
}

View File

@ -3,6 +3,7 @@ package filterflags
import (
"context"
"fmt"
"github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/filter"
@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) {
return nil
}
// AddRuleFlags add a set of rules flags with prefix
func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) {
shortFilter := ""
if prefix == "" {
shortFilter = "f"
}
flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what))
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what))
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what))
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what))
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what))
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what))
}
// AddFlags adds the non filing system specific flags to the command
func AddFlags(flagSet *pflag.FlagSet) {
rc.AddOptionReload("filter", &Opt, Reload)
flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule")
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern")
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)")
AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "")
AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-")
flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")

253
fs/filter/rules.go Normal file
View File

@ -0,0 +1,253 @@
package filter
import (
"bufio"
"fmt"
"os"
"regexp"
"strings"
"github.com/rclone/rclone/fs"
)
// RulesOpt is configuration for a rule set
type RulesOpt struct {
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
IncludeRule []string
IncludeFrom []string
}
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
type addFn func(Include bool, glob string) error
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// Add adds a filter rule with include or exclude status indicated
func (rs *rules) Add(Include bool, glob string) error {
re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */)
if err != nil {
return err
}
rs.add(Include, re)
return nil
}
type clearFn func()
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// include returns whether this remote passes the filter rules.
func (rs *rules) include(remote string) bool {
for _, rule := range rs.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// include returns whether this collection of strings remote passes
// the filter rules.
//
// the first rule is evaluated on all the remotes and if it matches
// then the result is returned. If not the next rule is tested and so
// on.
func (rs *rules) includeMany(remotes []string) bool {
for _, rule := range rs.rules {
for _, remote := range remotes {
if rule.Match(remote) {
return rule.Include
}
}
}
return true
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';' if raw is false
func forEachLine(path string, raw bool, fn func(string) error) (err error) {
var scanner *bufio.Scanner
if path == "-" {
scanner = bufio.NewScanner(os.Stdin)
} else {
in, err := os.Open(path)
if err != nil {
return err
}
scanner = bufio.NewScanner(in)
defer fs.CheckClose(in, &err)
}
for scanner.Scan() {
line := scanner.Text()
if !raw {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func addRule(rule string, add addFn, clear clearFn) error {
switch {
case rule == "!":
clear()
return nil
case strings.HasPrefix(rule, "- "):
return add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return add(true, rule[2:])
}
return fmt.Errorf("malformed rule %q", rule)
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func (rs *rules) AddRule(rule string) error {
return addRule(rule, rs.Add, rs.clear)
}
// Parse the rules passed in and add them to the function
func parseRules(opt *RulesOpt, add addFn, clear clearFn) (err error) {
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range opt.IncludeRule {
err = add(true, rule)
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.IncludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(true, line)
})
if err != nil {
return err
}
addImplicitExclude = true
}
for _, rule := range opt.ExcludeRule {
err = add(false, rule)
if err != nil {
return err
}
foundExcludeRule = true
}
for _, rule := range opt.ExcludeFrom {
err := forEachLine(rule, false, func(line string) error {
return add(false, line)
})
if err != nil {
return err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range opt.FilterRule {
err = addRule(rule, add, clear)
if err != nil {
return err
}
}
for _, rule := range opt.FilterFrom {
err := forEachLine(rule, false, func(rule string) error {
return addRule(rule, add, clear)
})
if err != nil {
return err
}
}
if addImplicitExclude {
err = add(false, "/**")
if err != nil {
return err
}
}
return nil
}

View File

@ -1515,7 +1515,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error {
dir := toDelete[i]
// If a filter matches the directory then that
// directory is a candidate for deletion
if !fi.Include(dir+"/", 0, time.Now()) {
if !fi.IncludeRemote(dir + "/") {
continue
}
err = TryRmdir(ctx, f, dir)