From c520e52696350658de66445cf063c15a7b2e4b1d Mon Sep 17 00:00:00 2001
From: Nick Craig-Wood <nick@craig-wood.com>
Date: Thu, 4 Aug 2022 18:19:05 +0100
Subject: [PATCH] filter: add metadata filters FIXME needs docs and tests

Fixes #6353
---
 fs/filter/filter.go                  | 232 ++++++------------------
 fs/filter/filter_test.go             |   4 +-
 fs/filter/filterflags/filterflags.go |  23 ++-
 fs/filter/rules.go                   | 253 +++++++++++++++++++++++++++
 fs/operations/operations.go          |   2 +-
 5 files changed, 322 insertions(+), 192 deletions(-)
 create mode 100644 fs/filter/rules.go

diff --git a/fs/filter/filter.go b/fs/filter/filter.go
index 741580fff..db7bedcb0 100644
--- a/fs/filter/filter.go
+++ b/fs/filter/filter.go
@@ -2,14 +2,11 @@
 package filter
 
 import (
-	"bufio"
 	"context"
 	"errors"
 	"fmt"
 	"log"
-	"os"
 	"path"
-	"regexp"
 	"strings"
 	"time"
 
@@ -22,80 +19,19 @@ import (
 // This is accessed through GetConfig and AddConfig
 var globalConfig = mustNewFilter(nil)
 
-// rule is one filter rule
-type rule struct {
-	Include bool
-	Regexp  *regexp.Regexp
-}
-
-// Match returns true if rule matches path
-func (r *rule) Match(path string) bool {
-	return r.Regexp.MatchString(path)
-}
-
-// String the rule
-func (r *rule) String() string {
-	c := "-"
-	if r.Include {
-		c = "+"
-	}
-	return fmt.Sprintf("%s %s", c, r.Regexp.String())
-}
-
-// rules is a slice of rules
-type rules struct {
-	rules    []rule
-	existing map[string]struct{}
-}
-
-// add adds a rule if it doesn't exist already
-func (rs *rules) add(Include bool, re *regexp.Regexp) {
-	if rs.existing == nil {
-		rs.existing = make(map[string]struct{})
-	}
-	newRule := rule{
-		Include: Include,
-		Regexp:  re,
-	}
-	newRuleString := newRule.String()
-	if _, ok := rs.existing[newRuleString]; ok {
-		return // rule already exists
-	}
-	rs.rules = append(rs.rules, newRule)
-	rs.existing[newRuleString] = struct{}{}
-}
-
-// clear clears all the rules
-func (rs *rules) clear() {
-	rs.rules = nil
-	rs.existing = nil
-}
-
-// len returns the number of rules
-func (rs *rules) len() int {
-	return len(rs.rules)
-}
-
-// FilesMap describes the map of files to transfer
-type FilesMap map[string]struct{}
-
 // Opt configures the filter
 type Opt struct {
 	DeleteExcluded bool
-	FilterRule     []string
-	FilterFrom     []string
-	ExcludeRule    []string
-	ExcludeFrom    []string
-	ExcludeFile    []string
-	IncludeRule    []string
-	IncludeFrom    []string
-	FilesFrom      []string
-	FilesFromRaw   []string
-	MinAge         fs.Duration
-	MaxAge         fs.Duration
-	MinSize        fs.SizeSuffix
-	MaxSize        fs.SizeSuffix
-	IgnoreCase     bool
+	RulesOpt
+	ExcludeFile  []string
+	MetaRules    RulesOpt
+	FilesFrom    []string
+	FilesFromRaw []string
+	MinAge       fs.Duration
+	MaxAge       fs.Duration
+	MinSize      fs.SizeSuffix
+	MaxSize      fs.SizeSuffix
+	IgnoreCase   bool
 }
 
 // DefaultOpt is the default config for the filter
@@ -106,6 +42,9 @@ var DefaultOpt = Opt{
 	MaxSize: fs.SizeSuffix(-1),
 }
 
+// FilesMap describes the map of files to transfer
+type FilesMap map[string]struct{}
+
 // Filter describes any filtering in operation
 type Filter struct {
 	Opt         Opt
@@ -113,6 +52,7 @@ type Filter struct {
 	ModTimeTo   time.Time
 	fileRules   rules
 	dirRules    rules
+	metaRules   rules
 	files       FilesMap // files if filesFrom
 	dirs        FilesMap // dirs from filesFrom
 }
@@ -142,57 +82,14 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
 		fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
 	}
 
-	addImplicitExclude := false
-	foundExcludeRule := false
-
-	for _, rule := range f.Opt.IncludeRule {
-		err = f.Add(true, rule)
-		if err != nil {
-			return nil, err
-		}
-		addImplicitExclude = true
-	}
-	for _, rule := range f.Opt.IncludeFrom {
-		err := forEachLine(rule, false, func(line string) error {
-			return f.Add(true, line)
-		})
-		if err != nil {
-			return nil, err
-		}
-		addImplicitExclude = true
-	}
-	for _, rule := range f.Opt.ExcludeRule {
-		err = f.Add(false, rule)
-		if err != nil {
-			return nil, err
-		}
-		foundExcludeRule = true
-	}
-	for _, rule := range f.Opt.ExcludeFrom {
-		err := forEachLine(rule, false, func(line string) error {
-			return f.Add(false, line)
-		})
-		if err != nil {
-			return nil, err
-		}
-		foundExcludeRule = true
+	err = parseRules(&f.Opt.RulesOpt, f.Add, f.Clear)
+	if err != nil {
+		return nil, err
 	}
 
-	if addImplicitExclude && foundExcludeRule {
-		fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
-	}
-
-	for _, rule := range f.Opt.FilterRule {
-		err = f.AddRule(rule)
-		if err != nil {
-			return nil, err
-		}
-	}
-	for _, rule := range f.Opt.FilterFrom {
-		err := forEachLine(rule, false, f.AddRule)
-		if err != nil {
-			return nil, err
-		}
+	err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear)
+	if err != nil {
+		return nil, err
 	}
 
 	inActive := f.InActive()
@@ -225,12 +122,6 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
 		}
 	}
 
-	if addImplicitExclude {
-		err = f.Add(false, "/**")
-		if err != nil {
-			return nil, err
-		}
-	}
 	if fs.GetConfig(context.Background()).Dump&fs.DumpFilters != 0 {
 		fmt.Println("--- start filters ---")
 		fmt.Println(f.DumpFilters())
@@ -309,16 +200,7 @@ func (f *Filter) Add(Include bool, glob string) error {
 //
 // Line comments may be introduced with '#' or ';'
 func (f *Filter) AddRule(rule string) error {
-	switch {
-	case rule == "!":
-		f.Clear()
-		return nil
-	case strings.HasPrefix(rule, "- "):
-		return f.Add(false, rule[2:])
-	case strings.HasPrefix(rule, "+ "):
-		return f.Add(true, rule[2:])
-	}
-	return fmt.Errorf("malformed rule %q", rule)
+	return addRule(rule, f.Add, f.Clear)
 }
 
 // initAddFile creates f.files and f.dirs
@@ -359,6 +241,7 @@ func (f *Filter) Files() FilesMap {
 func (f *Filter) Clear() {
 	f.fileRules.clear()
 	f.dirRules.clear()
+	f.metaRules.clear()
 }
 
 // InActive returns false if any filters are active
@@ -370,17 +253,13 @@ func (f *Filter) InActive() bool {
 		f.Opt.MaxSize < 0 &&
 		f.fileRules.len() == 0 &&
 		f.dirRules.len() == 0 &&
+		f.metaRules.len() == 0 &&
 		len(f.Opt.ExcludeFile) == 0)
 }
 
 // IncludeRemote returns whether this remote passes the filter rules.
 func (f *Filter) IncludeRemote(remote string) bool {
-	for _, rule := range f.fileRules.rules {
-		if rule.Match(remote) {
-			return rule.Include
-		}
-	}
-	return true
+	return f.fileRules.include(remote)
 }
 
 // ListContainsExcludeFile checks if exclude file is present in the list.
@@ -423,13 +302,7 @@ func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (b
 			return include, nil
 		}
 		remote += "/"
-		for _, rule := range f.dirRules.rules {
-			if rule.Match(remote) {
-				return rule.Include, nil
-			}
-		}
-
-		return true, nil
+		return f.dirRules.include(remote), nil
 	}
 }
 
@@ -453,7 +326,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo
 
 // Include returns whether this object should be included into the
 // sync or not
-func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
+func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool {
 	// filesFrom takes precedence
 	if f.files != nil {
 		_, include := f.files[remote]
@@ -471,6 +344,15 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
 	if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
 		return false
 	}
+	if f.metaRules.len() > 0 && len(metadata) > 0 {
+		metadatas := make([]string, 0, len(metadata))
+		for key, value := range metadata {
+			metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value))
+		}
+		if !f.metaRules.includeMany(metadatas) {
+			return false
+		}
+	}
 	return f.IncludeRemote(remote)
 }
 
@@ -485,39 +367,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
 	} else {
 		modTime = time.Unix(0, 0)
 	}
-
-	return f.Include(o.Remote(), o.Size(), modTime)
-}
-
-// forEachLine calls fn on every line in the file pointed to by path
-//
-// It ignores empty lines and lines starting with '#' or ';' if raw is false
-func forEachLine(path string, raw bool, fn func(string) error) (err error) {
-	var scanner *bufio.Scanner
-	if path == "-" {
-		scanner = bufio.NewScanner(os.Stdin)
-	} else {
-		in, err := os.Open(path)
+	var metadata fs.Metadata
+	if f.metaRules.len() > 0 {
+		var err error
+		metadata, err = fs.GetMetadata(ctx, o)
 		if err != nil {
-			return err
+			fs.Errorf(o, "Failed to read metadata: %v", err)
+			metadata = nil
 		}
-		scanner = bufio.NewScanner(in)
-		defer fs.CheckClose(in, &err)
+
 	}
-	for scanner.Scan() {
-		line := scanner.Text()
-		if !raw {
-			line = strings.TrimSpace(line)
-			if len(line) == 0 || line[0] == '#' || line[0] == ';' {
-				continue
-			}
-		}
-		err := fn(line)
-		if err != nil {
-			return err
-		}
-	}
-	return scanner.Err()
+	return f.Include(o.Remote(), o.Size(), modTime, metadata)
 }
 
 // DumpFilters dumps the filters in textual form, 1 per line
@@ -537,6 +397,12 @@ func (f *Filter) DumpFilters() string {
 	for _, dirRule := range f.dirRules.rules {
 		rules = append(rules, dirRule.String())
 	}
+	if f.metaRules.len() > 0 {
+		rules = append(rules, "--- Metadata filter rules ---")
+		for _, metaRule := range f.metaRules.rules {
+			rules = append(rules, metaRule.String())
+		}
+	}
 	return strings.Join(rules, "\n")
 }
 
diff --git a/fs/filter/filter_test.go b/fs/filter/filter_test.go
index aada638fc..4f17d63fb 100644
--- a/fs/filter/filter_test.go
+++ b/fs/filter/filter_test.go
@@ -208,7 +208,7 @@ type includeTest struct {
 
 func testInclude(t *testing.T, f *Filter, tests []includeTest) {
 	for _, test := range tests {
-		got := f.Include(test.in, test.size, time.Unix(test.modTime, 0))
+		got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil)
 		assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
 	}
 }
@@ -714,7 +714,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
 		require.NoError(t, err)
 		err = f.Add(false, "*")
 		require.NoError(t, err)
-		included := f.Include(test.file, 0, time.Unix(0, 0))
+		included := f.Include(test.file, 0, time.Unix(0, 0), nil)
 		if included != test.included {
 			t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
 		}
diff --git a/fs/filter/filterflags/filterflags.go b/fs/filter/filterflags/filterflags.go
index 3963e0c04..5bb17b8a3 100644
--- a/fs/filter/filterflags/filterflags.go
+++ b/fs/filter/filterflags/filterflags.go
@@ -3,6 +3,7 @@ package filterflags
 
 import (
 	"context"
+	"fmt"
 
 	"github.com/rclone/rclone/fs/config/flags"
 	"github.com/rclone/rclone/fs/filter"
@@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) {
 	return nil
 }
 
+// AddRuleFlags add a set of rules flags with prefix
+func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) {
+	shortFilter := ""
+	if prefix == "" {
+		shortFilter = "f"
+	}
+	flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what))
+	flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what))
+	flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what))
+	flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what))
+	flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what))
+	flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what))
+}
+
 // AddFlags adds the non filing system specific flags to the command
 func AddFlags(flagSet *pflag.FlagSet) {
 	rc.AddOptionReload("filter", &Opt, Reload)
 	flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
-	flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule")
-	flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)")
-	flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern")
-	flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)")
+	AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "")
+	AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-")
 	flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
-	flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
-	flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
 	flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
 	flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
 	flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")
diff --git a/fs/filter/rules.go b/fs/filter/rules.go
new file mode 100644
index 000000000..010f26522
--- /dev/null
+++ b/fs/filter/rules.go
@@ -0,0 +1,253 @@
+package filter
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"regexp"
+	"strings"
+
+	"github.com/rclone/rclone/fs"
+)
+
+// RulesOpt is configuration for a rule set
+type RulesOpt struct {
+	FilterRule  []string
+	FilterFrom  []string
+	ExcludeRule []string
+	ExcludeFrom []string
+	IncludeRule []string
+	IncludeFrom []string
+}
+
+// rule is one filter rule
+type rule struct {
+	Include bool
+	Regexp  *regexp.Regexp
+}
+
+// Match returns true if rule matches path
+func (r *rule) Match(path string) bool {
+	return r.Regexp.MatchString(path)
+}
+
+// String the rule
+func (r *rule) String() string {
+	c := "-"
+	if r.Include {
+		c = "+"
+	}
+	return fmt.Sprintf("%s %s", c, r.Regexp.String())
+}
+
+// rules is a slice of rules
+type rules struct {
+	rules    []rule
+	existing map[string]struct{}
+}
+
+type addFn func(Include bool, glob string) error
+
+// add adds a rule if it doesn't exist already
+func (rs *rules) add(Include bool, re *regexp.Regexp) {
+	if rs.existing == nil {
+		rs.existing = make(map[string]struct{})
+	}
+	newRule := rule{
+		Include: Include,
+		Regexp:  re,
+	}
+	newRuleString := newRule.String()
+	if _, ok := rs.existing[newRuleString]; ok {
+		return // rule already exists
+	}
+	rs.rules = append(rs.rules, newRule)
+	rs.existing[newRuleString] = struct{}{}
+}
+
+// Add adds a filter rule with include or exclude status indicated
+func (rs *rules) Add(Include bool, glob string) error {
+	re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */)
+	if err != nil {
+		return err
+	}
+	rs.add(Include, re)
+	return nil
+}
+
+type clearFn func()
+
+// clear clears all the rules
+func (rs *rules) clear() {
+	rs.rules = nil
+	rs.existing = nil
+}
+
+// len returns the number of rules
+func (rs *rules) len() int {
+	return len(rs.rules)
+}
+
+// include returns whether this remote passes the filter rules.
+func (rs *rules) include(remote string) bool {
+	for _, rule := range rs.rules {
+		if rule.Match(remote) {
+			return rule.Include
+		}
+	}
+	return true
+}
+
+// include returns whether this collection of strings remote passes
+// the filter rules.
+//
+// the first rule is evaluated on all the remotes and if it matches
+// then the result is returned. If not the next rule is tested and so
+// on.
+func (rs *rules) includeMany(remotes []string) bool {
+	for _, rule := range rs.rules {
+		for _, remote := range remotes {
+			if rule.Match(remote) {
+				return rule.Include
+			}
+		}
+	}
+	return true
+}
+
+// forEachLine calls fn on every line in the file pointed to by path
+//
+// It ignores empty lines and lines starting with '#' or ';' if raw is false
+func forEachLine(path string, raw bool, fn func(string) error) (err error) {
+	var scanner *bufio.Scanner
+	if path == "-" {
+		scanner = bufio.NewScanner(os.Stdin)
+	} else {
+		in, err := os.Open(path)
+		if err != nil {
+			return err
+		}
+		scanner = bufio.NewScanner(in)
+		defer fs.CheckClose(in, &err)
+	}
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !raw {
+			line = strings.TrimSpace(line)
+			if len(line) == 0 || line[0] == '#' || line[0] == ';' {
+				continue
+			}
+		}
+		err := fn(line)
+		if err != nil {
+			return err
+		}
+	}
+	return scanner.Err()
+}
+
+// AddRule adds a filter rule with include/exclude indicated by the prefix
+//
+// These are
+//
+//   + glob
+//   - glob
+//   !
+//
+// '+' includes the glob, '-' excludes it and '!' resets the filter list
+//
+// Line comments may be introduced with '#' or ';'
+func addRule(rule string, add addFn, clear clearFn) error {
+	switch {
+	case rule == "!":
+		clear()
+		return nil
+	case strings.HasPrefix(rule, "- "):
+		return add(false, rule[2:])
+	case strings.HasPrefix(rule, "+ "):
+		return add(true, rule[2:])
+	}
+	return fmt.Errorf("malformed rule %q", rule)
+}
+
+// AddRule adds a filter rule with include/exclude indicated by the prefix
+//
+// These are
+//
+//   + glob
+//   - glob
+//   !
+//
+// '+' includes the glob, '-' excludes it and '!' resets the filter list
+//
+// Line comments may be introduced with '#' or ';'
+func (rs *rules) AddRule(rule string) error {
+	return addRule(rule, rs.Add, rs.clear)
+}
+
+// Parse the rules passed in and add them to the function
+func parseRules(opt *RulesOpt, add addFn, clear clearFn) (err error) {
+	addImplicitExclude := false
+	foundExcludeRule := false
+
+	for _, rule := range opt.IncludeRule {
+		err = add(true, rule)
+		if err != nil {
+			return err
+		}
+		addImplicitExclude = true
+	}
+	for _, rule := range opt.IncludeFrom {
+		err := forEachLine(rule, false, func(line string) error {
+			return add(true, line)
+		})
+		if err != nil {
+			return err
+		}
+		addImplicitExclude = true
+	}
+	for _, rule := range opt.ExcludeRule {
+		err = add(false, rule)
+		if err != nil {
+			return err
+		}
+		foundExcludeRule = true
+	}
+	for _, rule := range opt.ExcludeFrom {
+		err := forEachLine(rule, false, func(line string) error {
+			return add(false, line)
+		})
+		if err != nil {
+			return err
+		}
+		foundExcludeRule = true
+	}
+
+	if addImplicitExclude && foundExcludeRule {
+		fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
+	}
+
+	for _, rule := range opt.FilterRule {
+		err = addRule(rule, add, clear)
+		if err != nil {
+			return err
+		}
+	}
+	for _, rule := range opt.FilterFrom {
+		err := forEachLine(rule, false, func(rule string) error {
+			return addRule(rule, add, clear)
+		})
+		if err != nil {
+			return err
+		}
+	}
+
+	if addImplicitExclude {
+		err = add(false, "/**")
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/fs/operations/operations.go b/fs/operations/operations.go
index 081ceb7a7..4429ac354 100644
--- a/fs/operations/operations.go
+++ b/fs/operations/operations.go
@@ -1515,7 +1515,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error {
 		dir := toDelete[i]
 		// If a filter matches the directory then that
 		// directory is a candidate for deletion
-		if !fi.Include(dir+"/", 0, time.Now()) {
+		if !fi.IncludeRemote(dir + "/") {
 			continue
 		}
 		err = TryRmdir(ctx, f, dir)