From c520e52696350658de66445cf063c15a7b2e4b1d Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood <nick@craig-wood.com> Date: Thu, 4 Aug 2022 18:19:05 +0100 Subject: [PATCH] filter: add metadata filters FIXME needs docs and tests Fixes #6353 --- fs/filter/filter.go | 232 ++++++------------------ fs/filter/filter_test.go | 4 +- fs/filter/filterflags/filterflags.go | 23 ++- fs/filter/rules.go | 253 +++++++++++++++++++++++++++ fs/operations/operations.go | 2 +- 5 files changed, 322 insertions(+), 192 deletions(-) create mode 100644 fs/filter/rules.go diff --git a/fs/filter/filter.go b/fs/filter/filter.go index 741580fff..db7bedcb0 100644 --- a/fs/filter/filter.go +++ b/fs/filter/filter.go @@ -2,14 +2,11 @@ package filter import ( - "bufio" "context" "errors" "fmt" "log" - "os" "path" - "regexp" "strings" "time" @@ -22,80 +19,19 @@ import ( // This is accessed through GetConfig and AddConfig var globalConfig = mustNewFilter(nil) -// rule is one filter rule -type rule struct { - Include bool - Regexp *regexp.Regexp -} - -// Match returns true if rule matches path -func (r *rule) Match(path string) bool { - return r.Regexp.MatchString(path) -} - -// String the rule -func (r *rule) String() string { - c := "-" - if r.Include { - c = "+" - } - return fmt.Sprintf("%s %s", c, r.Regexp.String()) -} - -// rules is a slice of rules -type rules struct { - rules []rule - existing map[string]struct{} -} - -// add adds a rule if it doesn't exist already -func (rs *rules) add(Include bool, re *regexp.Regexp) { - if rs.existing == nil { - rs.existing = make(map[string]struct{}) - } - newRule := rule{ - Include: Include, - Regexp: re, - } - newRuleString := newRule.String() - if _, ok := rs.existing[newRuleString]; ok { - return // rule already exists - } - rs.rules = append(rs.rules, newRule) - rs.existing[newRuleString] = struct{}{} -} - -// clear clears all the rules -func (rs *rules) clear() { - rs.rules = nil - rs.existing = nil -} - -// len returns the number of rules -func (rs *rules) len() int { - return len(rs.rules) -} - -// FilesMap describes the map of files to transfer -type FilesMap map[string]struct{} - // Opt configures the filter type Opt struct { DeleteExcluded bool - FilterRule []string - FilterFrom []string - ExcludeRule []string - ExcludeFrom []string - ExcludeFile []string - IncludeRule []string - IncludeFrom []string - FilesFrom []string - FilesFromRaw []string - MinAge fs.Duration - MaxAge fs.Duration - MinSize fs.SizeSuffix - MaxSize fs.SizeSuffix - IgnoreCase bool + RulesOpt + ExcludeFile []string + MetaRules RulesOpt + FilesFrom []string + FilesFromRaw []string + MinAge fs.Duration + MaxAge fs.Duration + MinSize fs.SizeSuffix + MaxSize fs.SizeSuffix + IgnoreCase bool } // DefaultOpt is the default config for the filter @@ -106,6 +42,9 @@ var DefaultOpt = Opt{ MaxSize: fs.SizeSuffix(-1), } +// FilesMap describes the map of files to transfer +type FilesMap map[string]struct{} + // Filter describes any filtering in operation type Filter struct { Opt Opt @@ -113,6 +52,7 @@ type Filter struct { ModTimeTo time.Time fileRules rules dirRules rules + metaRules rules files FilesMap // files if filesFrom dirs FilesMap // dirs from filesFrom } @@ -142,57 +82,14 @@ func NewFilter(opt *Opt) (f *Filter, err error) { fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom) } - addImplicitExclude := false - foundExcludeRule := false - - for _, rule := range f.Opt.IncludeRule { - err = f.Add(true, rule) - if err != nil { - return nil, err - } - addImplicitExclude = true - } - for _, rule := range f.Opt.IncludeFrom { - err := forEachLine(rule, false, func(line string) error { - return f.Add(true, line) - }) - if err != nil { - return nil, err - } - addImplicitExclude = true - } - for _, rule := range f.Opt.ExcludeRule { - err = f.Add(false, rule) - if err != nil { - return nil, err - } - foundExcludeRule = true - } - for _, rule := range f.Opt.ExcludeFrom { - err := forEachLine(rule, false, func(line string) error { - return f.Add(false, line) - }) - if err != nil { - return nil, err - } - foundExcludeRule = true + err = parseRules(&f.Opt.RulesOpt, f.Add, f.Clear) + if err != nil { + return nil, err } - if addImplicitExclude && foundExcludeRule { - fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate") - } - - for _, rule := range f.Opt.FilterRule { - err = f.AddRule(rule) - if err != nil { - return nil, err - } - } - for _, rule := range f.Opt.FilterFrom { - err := forEachLine(rule, false, f.AddRule) - if err != nil { - return nil, err - } + err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear) + if err != nil { + return nil, err } inActive := f.InActive() @@ -225,12 +122,6 @@ func NewFilter(opt *Opt) (f *Filter, err error) { } } - if addImplicitExclude { - err = f.Add(false, "/**") - if err != nil { - return nil, err - } - } if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 { fmt.Println("--- start filters ---") fmt.Println(f.DumpFilters()) @@ -309,16 +200,7 @@ func (f *Filter) Add(Include bool, glob string) error { // // Line comments may be introduced with '#' or ';' func (f *Filter) AddRule(rule string) error { - switch { - case rule == "!": - f.Clear() - return nil - case strings.HasPrefix(rule, "- "): - return f.Add(false, rule[2:]) - case strings.HasPrefix(rule, "+ "): - return f.Add(true, rule[2:]) - } - return fmt.Errorf("malformed rule %q", rule) + return addRule(rule, f.Add, f.Clear) } // initAddFile creates f.files and f.dirs @@ -359,6 +241,7 @@ func (f *Filter) Files() FilesMap { func (f *Filter) Clear() { f.fileRules.clear() f.dirRules.clear() + f.metaRules.clear() } // InActive returns false if any filters are active @@ -370,17 +253,13 @@ func (f *Filter) InActive() bool { f.Opt.MaxSize < 0 && f.fileRules.len() == 0 && f.dirRules.len() == 0 && + f.metaRules.len() == 0 && len(f.Opt.ExcludeFile) == 0) } // IncludeRemote returns whether this remote passes the filter rules. func (f *Filter) IncludeRemote(remote string) bool { - for _, rule := range f.fileRules.rules { - if rule.Match(remote) { - return rule.Include - } - } - return true + return f.fileRules.include(remote) } // ListContainsExcludeFile checks if exclude file is present in the list. @@ -423,13 +302,7 @@ func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (b return include, nil } remote += "/" - for _, rule := range f.dirRules.rules { - if rule.Match(remote) { - return rule.Include, nil - } - } - - return true, nil + return f.dirRules.include(remote), nil } } @@ -453,7 +326,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo // Include returns whether this object should be included into the // sync or not -func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { +func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool { // filesFrom takes precedence if f.files != nil { _, include := f.files[remote] @@ -471,6 +344,15 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { return false } + if f.metaRules.len() > 0 && len(metadata) > 0 { + metadatas := make([]string, 0, len(metadata)) + for key, value := range metadata { + metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value)) + } + if !f.metaRules.includeMany(metadatas) { + return false + } + } return f.IncludeRemote(remote) } @@ -485,39 +367,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { } else { modTime = time.Unix(0, 0) } - - return f.Include(o.Remote(), o.Size(), modTime) -} - -// forEachLine calls fn on every line in the file pointed to by path -// -// It ignores empty lines and lines starting with '#' or ';' if raw is false -func forEachLine(path string, raw bool, fn func(string) error) (err error) { - var scanner *bufio.Scanner - if path == "-" { - scanner = bufio.NewScanner(os.Stdin) - } else { - in, err := os.Open(path) + var metadata fs.Metadata + if f.metaRules.len() > 0 { + var err error + metadata, err = fs.GetMetadata(ctx, o) if err != nil { - return err + fs.Errorf(o, "Failed to read metadata: %v", err) + metadata = nil } - scanner = bufio.NewScanner(in) - defer fs.CheckClose(in, &err) + } - for scanner.Scan() { - line := scanner.Text() - if !raw { - line = strings.TrimSpace(line) - if len(line) == 0 || line[0] == '#' || line[0] == ';' { - continue - } - } - err := fn(line) - if err != nil { - return err - } - } - return scanner.Err() + return f.Include(o.Remote(), o.Size(), modTime, metadata) } // DumpFilters dumps the filters in textual form, 1 per line @@ -537,6 +397,12 @@ func (f *Filter) DumpFilters() string { for _, dirRule := range f.dirRules.rules { rules = append(rules, dirRule.String()) } + if f.metaRules.len() > 0 { + rules = append(rules, "--- Metadata filter rules ---") + for _, metaRule := range f.metaRules.rules { + rules = append(rules, metaRule.String()) + } + } return strings.Join(rules, "\n") } diff --git a/fs/filter/filter_test.go b/fs/filter/filter_test.go index aada638fc..4f17d63fb 100644 --- a/fs/filter/filter_test.go +++ b/fs/filter/filter_test.go @@ -208,7 +208,7 @@ type includeTest struct { func testInclude(t *testing.T, f *Filter, tests []includeTest) { for _, test := range tests { - got := f.Include(test.in, test.size, time.Unix(test.modTime, 0)) + got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil) assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0))) } } @@ -714,7 +714,7 @@ func TestFilterMatchesFromDocs(t *testing.T) { require.NoError(t, err) err = f.Add(false, "*") require.NoError(t, err) - included := f.Include(test.file, 0, time.Unix(0, 0)) + included := f.Include(test.file, 0, time.Unix(0, 0), nil) if included != test.included { t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included) } diff --git a/fs/filter/filterflags/filterflags.go b/fs/filter/filterflags/filterflags.go index 3963e0c04..5bb17b8a3 100644 --- a/fs/filter/filterflags/filterflags.go +++ b/fs/filter/filterflags/filterflags.go @@ -3,6 +3,7 @@ package filterflags import ( "context" + "fmt" "github.com/rclone/rclone/fs/config/flags" "github.com/rclone/rclone/fs/filter" @@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) { return nil } +// AddRuleFlags add a set of rules flags with prefix +func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) { + shortFilter := "" + if prefix == "" { + shortFilter = "f" + } + flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what)) + flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what)) + flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what)) + flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what)) + flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what)) + flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what)) +} + // AddFlags adds the non filing system specific flags to the command func AddFlags(flagSet *pflag.FlagSet) { rc.AddOptionReload("filter", &Opt, Reload) flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync") - flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule") - flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)") - flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern") - flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)") + AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "") + AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-") flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present") - flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern") - flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)") flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y") diff --git a/fs/filter/rules.go b/fs/filter/rules.go new file mode 100644 index 000000000..010f26522 --- /dev/null +++ b/fs/filter/rules.go @@ -0,0 +1,253 @@ +package filter + +import ( + "bufio" + "fmt" + "os" + "regexp" + "strings" + + "github.com/rclone/rclone/fs" +) + +// RulesOpt is configuration for a rule set +type RulesOpt struct { + FilterRule []string + FilterFrom []string + ExcludeRule []string + ExcludeFrom []string + IncludeRule []string + IncludeFrom []string +} + +// rule is one filter rule +type rule struct { + Include bool + Regexp *regexp.Regexp +} + +// Match returns true if rule matches path +func (r *rule) Match(path string) bool { + return r.Regexp.MatchString(path) +} + +// String the rule +func (r *rule) String() string { + c := "-" + if r.Include { + c = "+" + } + return fmt.Sprintf("%s %s", c, r.Regexp.String()) +} + +// rules is a slice of rules +type rules struct { + rules []rule + existing map[string]struct{} +} + +type addFn func(Include bool, glob string) error + +// add adds a rule if it doesn't exist already +func (rs *rules) add(Include bool, re *regexp.Regexp) { + if rs.existing == nil { + rs.existing = make(map[string]struct{}) + } + newRule := rule{ + Include: Include, + Regexp: re, + } + newRuleString := newRule.String() + if _, ok := rs.existing[newRuleString]; ok { + return // rule already exists + } + rs.rules = append(rs.rules, newRule) + rs.existing[newRuleString] = struct{}{} +} + +// Add adds a filter rule with include or exclude status indicated +func (rs *rules) Add(Include bool, glob string) error { + re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */) + if err != nil { + return err + } + rs.add(Include, re) + return nil +} + +type clearFn func() + +// clear clears all the rules +func (rs *rules) clear() { + rs.rules = nil + rs.existing = nil +} + +// len returns the number of rules +func (rs *rules) len() int { + return len(rs.rules) +} + +// include returns whether this remote passes the filter rules. +func (rs *rules) include(remote string) bool { + for _, rule := range rs.rules { + if rule.Match(remote) { + return rule.Include + } + } + return true +} + +// include returns whether this collection of strings remote passes +// the filter rules. +// +// the first rule is evaluated on all the remotes and if it matches +// then the result is returned. If not the next rule is tested and so +// on. +func (rs *rules) includeMany(remotes []string) bool { + for _, rule := range rs.rules { + for _, remote := range remotes { + if rule.Match(remote) { + return rule.Include + } + } + } + return true +} + +// forEachLine calls fn on every line in the file pointed to by path +// +// It ignores empty lines and lines starting with '#' or ';' if raw is false +func forEachLine(path string, raw bool, fn func(string) error) (err error) { + var scanner *bufio.Scanner + if path == "-" { + scanner = bufio.NewScanner(os.Stdin) + } else { + in, err := os.Open(path) + if err != nil { + return err + } + scanner = bufio.NewScanner(in) + defer fs.CheckClose(in, &err) + } + for scanner.Scan() { + line := scanner.Text() + if !raw { + line = strings.TrimSpace(line) + if len(line) == 0 || line[0] == '#' || line[0] == ';' { + continue + } + } + err := fn(line) + if err != nil { + return err + } + } + return scanner.Err() +} + +// AddRule adds a filter rule with include/exclude indicated by the prefix +// +// These are +// +// + glob +// - glob +// ! +// +// '+' includes the glob, '-' excludes it and '!' resets the filter list +// +// Line comments may be introduced with '#' or ';' +func addRule(rule string, add addFn, clear clearFn) error { + switch { + case rule == "!": + clear() + return nil + case strings.HasPrefix(rule, "- "): + return add(false, rule[2:]) + case strings.HasPrefix(rule, "+ "): + return add(true, rule[2:]) + } + return fmt.Errorf("malformed rule %q", rule) +} + +// AddRule adds a filter rule with include/exclude indicated by the prefix +// +// These are +// +// + glob +// - glob +// ! +// +// '+' includes the glob, '-' excludes it and '!' resets the filter list +// +// Line comments may be introduced with '#' or ';' +func (rs *rules) AddRule(rule string) error { + return addRule(rule, rs.Add, rs.clear) +} + +// Parse the rules passed in and add them to the function +func parseRules(opt *RulesOpt, add addFn, clear clearFn) (err error) { + addImplicitExclude := false + foundExcludeRule := false + + for _, rule := range opt.IncludeRule { + err = add(true, rule) + if err != nil { + return err + } + addImplicitExclude = true + } + for _, rule := range opt.IncludeFrom { + err := forEachLine(rule, false, func(line string) error { + return add(true, line) + }) + if err != nil { + return err + } + addImplicitExclude = true + } + for _, rule := range opt.ExcludeRule { + err = add(false, rule) + if err != nil { + return err + } + foundExcludeRule = true + } + for _, rule := range opt.ExcludeFrom { + err := forEachLine(rule, false, func(line string) error { + return add(false, line) + }) + if err != nil { + return err + } + foundExcludeRule = true + } + + if addImplicitExclude && foundExcludeRule { + fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate") + } + + for _, rule := range opt.FilterRule { + err = addRule(rule, add, clear) + if err != nil { + return err + } + } + for _, rule := range opt.FilterFrom { + err := forEachLine(rule, false, func(rule string) error { + return addRule(rule, add, clear) + }) + if err != nil { + return err + } + } + + if addImplicitExclude { + err = add(false, "/**") + if err != nil { + return err + } + } + + return nil +} diff --git a/fs/operations/operations.go b/fs/operations/operations.go index 081ceb7a7..4429ac354 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -1515,7 +1515,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error { dir := toDelete[i] // If a filter matches the directory then that // directory is a candidate for deletion - if !fi.Include(dir+"/", 0, time.Now()) { + if !fi.IncludeRemote(dir + "/") { continue } err = TryRmdir(ctx, f, dir)