filter: add metadata filters --metadata-include/exclude/filter and friends

Fixes #6353
This commit is contained in:
Nick Craig-Wood 2022-08-04 18:19:05 +01:00
parent 4a31961c4f
commit 3a6f1f5cd7
7 changed files with 173 additions and 13 deletions

View File

@ -2355,6 +2355,12 @@ For the filtering options
* `--min-age` * `--min-age`
* `--max-age` * `--max-age`
* `--dump filters` * `--dump filters`
* `--metadata-include`
* `--metadata-include-from`
* `--metadata-exclude`
* `--metadata-exclude-from`
* `--metadata-filter`
* `--metadata-filter-from`
See the [filtering section](/filtering/). See the [filtering section](/filtering/).

View File

@ -32,7 +32,7 @@ you expect. Instead use a `--filter...` flag.
## Patterns for matching path/file names ## Patterns for matching path/file names
### Pattern syntax ### Pattern syntax {#patterns}
Here is a formal definition of the pattern syntax, Here is a formal definition of the pattern syntax,
[examples](#examples) are below. [examples](#examples) are below.
@ -194,7 +194,7 @@ them into regular expressions.
| Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` | | Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` |
| | | `/file.jpg` | `/dir/file.jpg` | | | | `/file.jpg` | `/dir/file.jpg` |
## How filter rules are applied to files ## How filter rules are applied to files {#how-filter-rules-work}
Rclone path/file name filters are made up of one or more of the following flags: Rclone path/file name filters are made up of one or more of the following flags:
@ -757,6 +757,43 @@ E.g. for the following directory structure:
The command `rclone ls --exclude-if-present .ignore dir1` does The command `rclone ls --exclude-if-present .ignore dir1` does
not list `dir3`, `file3` or `.ignore`. not list `dir3`, `file3` or `.ignore`.
## Metadata filters {#metadata}
The metadata filters work in a very similar way to the normal file
name filters, except they match [metadata](/docs/#metadata) on the
object.
The metadata should be specified as `key=value` patterns. This may be
wildcarded using the normal [filter patterns](#patterns) or [regular
expressions](#regexp).
For example if you wished to list only local files with a mode of
`100664` you could do that with:
rclone lsf -M --files-only --metadata-include "mode=100664" .
Or if you wished to show files with an `atime`, `mtime` or `btime` at a given date:
rclone lsf -M --files-only --metadata-include "[abm]time=2022-12-16*" .
Like file filtering, metadata filtering only applies to files not to
directories.
The filters can be applied using these flags.
- `--metadata-include` - Include metadatas matching pattern
- `--metadata-include-from` - Read metadata include patterns from file (use - to read from stdin)
- `--metadata-exclude` - Exclude metadatas matching pattern
- `--metadata-exclude-from` - Read metadata exclude patterns from file (use - to read from stdin)
- `--metadata-filter` - Add a metadata filtering rule
- `--metadata-filter-from` - Read metadata filtering patterns from a file (use - to read from stdin)
Each flag can be repeated. See the section on [how filter rules are
applied](#how-filter-rules-work) for more details - these flags work
in an identical way to the file name filtering flags, but instead of
file name patterns have metadata patterns.
## Common pitfalls ## Common pitfalls
The most frequent filter support issues on The most frequent filter support issues on

View File

@ -66,6 +66,22 @@ It can be triggered when you did a server-side copy.
Reading metadata will also provide custom (non-standard nor reserved) ones. Reading metadata will also provide custom (non-standard nor reserved) ones.
## Filtering auto generated files
The Internet Archive automatically creates metadata files after
upload. These can cause problems when doing an `rclone sync` as rclone
will try, and fail, to delete them. These metadata files are not
changeable, as they are created by the Internet Archive automatically.
These auto-created files can be excluded from the sync using [metadata
filtering](/filtering/#metadata).
rclone sync ... --metadata-exclude "source=metadata" --metadata-exclude "format=Metadata"
Which excludes from the sync any files which have the
`source=metadata` or `format=Metadata` flags which are added to
Internet Archive auto-created files.
## Configuration ## Configuration
Here is an example of making an internetarchive configuration. Here is an example of making an internetarchive configuration.

View File

@ -26,6 +26,7 @@ type Opt struct {
ExcludeFile []string ExcludeFile []string
FilesFrom []string FilesFrom []string
FilesFromRaw []string FilesFromRaw []string
MetaRules RulesOpt
MinAge fs.Duration MinAge fs.Duration
MaxAge fs.Duration MaxAge fs.Duration
MinSize fs.SizeSuffix MinSize fs.SizeSuffix
@ -51,6 +52,7 @@ type Filter struct {
ModTimeTo time.Time ModTimeTo time.Time
fileRules rules fileRules rules
dirRules rules dirRules rules
metaRules rules
files FilesMap // files if filesFrom files FilesMap // files if filesFrom
dirs FilesMap // dirs from filesFrom dirs FilesMap // dirs from filesFrom
} }
@ -85,6 +87,11 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
return nil, err return nil, err
} }
err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear)
if err != nil {
return nil, err
}
inActive := f.InActive() inActive := f.InActive()
for _, rule := range f.Opt.FilesFrom { for _, rule := range f.Opt.FilesFrom {
@ -234,6 +241,7 @@ func (f *Filter) Files() FilesMap {
func (f *Filter) Clear() { func (f *Filter) Clear() {
f.fileRules.clear() f.fileRules.clear()
f.dirRules.clear() f.dirRules.clear()
f.metaRules.clear()
} }
// InActive returns false if any filters are active // InActive returns false if any filters are active
@ -245,6 +253,7 @@ func (f *Filter) InActive() bool {
f.Opt.MaxSize < 0 && f.Opt.MaxSize < 0 &&
f.fileRules.len() == 0 && f.fileRules.len() == 0 &&
f.dirRules.len() == 0 && f.dirRules.len() == 0 &&
f.metaRules.len() == 0 &&
len(f.Opt.ExcludeFile) == 0) len(f.Opt.ExcludeFile) == 0)
} }
@ -322,7 +331,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo
// Include returns whether this object should be included into the // Include returns whether this object should be included into the
// sync or not // sync or not
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool {
// filesFrom takes precedence // filesFrom takes precedence
if f.files != nil { if f.files != nil {
_, include := f.files[remote] _, include := f.files[remote]
@ -340,6 +349,20 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
return false return false
} }
if f.metaRules.len() > 0 {
metadatas := make([]string, 0, len(metadata)+1)
for key, value := range metadata {
metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value))
}
if len(metadata) == 0 {
// If there is no metadata, add a null one
// otherwise the default action isn't taken
metadatas = append(metadatas, "\x00=\x00")
}
if !f.metaRules.includeMany(metadatas) {
return false
}
}
return f.IncludeRemote(remote) return f.IncludeRemote(remote)
} }
@ -354,7 +377,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
} else { } else {
modTime = time.Unix(0, 0) modTime = time.Unix(0, 0)
} }
return f.Include(o.Remote(), o.Size(), modTime) var metadata fs.Metadata
if f.metaRules.len() > 0 {
var err error
metadata, err = fs.GetMetadata(ctx, o)
if err != nil {
fs.Errorf(o, "Failed to read metadata: %v", err)
metadata = nil
}
}
return f.Include(o.Remote(), o.Size(), modTime, metadata)
} }
// DumpFilters dumps the filters in textual form, 1 per line // DumpFilters dumps the filters in textual form, 1 per line
@ -374,6 +407,12 @@ func (f *Filter) DumpFilters() string {
for _, dirRule := range f.dirRules.rules { for _, dirRule := range f.dirRules.rules {
rules = append(rules, dirRule.String()) rules = append(rules, dirRule.String())
} }
if f.metaRules.len() > 0 {
rules = append(rules, "--- Metadata filter rules ---")
for _, metaRule := range f.metaRules.rules {
rules = append(rules, metaRule.String())
}
}
return strings.Join(rules, "\n") return strings.Join(rules, "\n")
} }

View File

@ -23,6 +23,7 @@ func TestNewFilterDefault(t *testing.T) {
assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize) assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize)
assert.Len(t, f.fileRules.rules, 0) assert.Len(t, f.fileRules.rules, 0)
assert.Len(t, f.dirRules.rules, 0) assert.Len(t, f.dirRules.rules, 0)
assert.Len(t, f.metaRules.rules, 0)
assert.Nil(t, f.files) assert.Nil(t, f.files)
assert.True(t, f.InActive()) assert.True(t, f.InActive())
} }
@ -207,7 +208,7 @@ type includeTest struct {
func testInclude(t *testing.T, f *Filter, tests []includeTest) { func testInclude(t *testing.T, f *Filter, tests []includeTest) {
for _, test := range tests { for _, test := range tests {
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0)) got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil)
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0))) assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
} }
} }
@ -527,6 +528,56 @@ func TestNewFilterMatchesRegexp(t *testing.T) {
assert.False(t, f.InActive()) assert.False(t, f.InActive())
} }
type includeTestMetadata struct {
in string
metadata fs.Metadata
want bool
}
func testIncludeMetadata(t *testing.T, f *Filter, tests []includeTestMetadata) {
for _, test := range tests {
got := f.Include(test.in, 0, time.Time{}, test.metadata)
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, metadata=%+v", test.in, test.metadata))
}
}
func TestNewFilterMetadataInclude(t *testing.T) {
f, err := NewFilter(nil)
require.NoError(t, err)
add := func(s string) {
err := f.metaRules.AddRule(s)
require.NoError(t, err)
}
add(`+ t*=t*`)
add(`- *`)
testIncludeMetadata(t, f, []includeTestMetadata{
{"nil", nil, false},
{"empty", fs.Metadata{}, false},
{"ok1", fs.Metadata{"thing": "thang"}, true},
{"ok2", fs.Metadata{"thing1": "thang1"}, true},
{"missing", fs.Metadata{"Thing1": "Thang1"}, false},
})
assert.False(t, f.InActive())
}
func TestNewFilterMetadataExclude(t *testing.T) {
f, err := NewFilter(nil)
require.NoError(t, err)
add := func(s string) {
err := f.metaRules.AddRule(s)
require.NoError(t, err)
}
add(`- thing=thang`)
add(`+ *`)
testIncludeMetadata(t, f, []includeTestMetadata{
{"nil", nil, true},
{"empty", fs.Metadata{}, true},
{"ok1", fs.Metadata{"thing": "thang"}, false},
{"missing1", fs.Metadata{"thing1": "thang1"}, true},
})
assert.False(t, f.InActive())
}
func TestFilterAddDirRuleOrFileRule(t *testing.T) { func TestFilterAddDirRuleOrFileRule(t *testing.T) {
for _, test := range []struct { for _, test := range []struct {
included bool included bool
@ -713,7 +764,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
err = f.Add(false, "*") err = f.Add(false, "*")
require.NoError(t, err) require.NoError(t, err)
included := f.Include(test.file, 0, time.Unix(0, 0)) included := f.Include(test.file, 0, time.Unix(0, 0), nil)
if included != test.included { if included != test.included {
t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included) t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
} }

View File

@ -3,6 +3,7 @@ package filterflags
import ( import (
"context" "context"
"fmt"
"github.com/rclone/rclone/fs/config/flags" "github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/filter"
@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) {
return nil return nil
} }
// AddRuleFlags add a set of rules flags with prefix
func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) {
shortFilter := ""
if prefix == "" {
shortFilter = "f"
}
flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what))
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what))
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what))
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what))
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what))
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what))
}
// AddFlags adds the non filing system specific flags to the command // AddFlags adds the non filing system specific flags to the command
func AddFlags(flagSet *pflag.FlagSet) { func AddFlags(flagSet *pflag.FlagSet) {
rc.AddOptionReload("filter", &Opt, Reload) rc.AddOptionReload("filter", &Opt, Reload)
flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync") flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule") AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "")
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)") AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-")
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern")
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present") flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)") flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y") flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")

View File

@ -1469,7 +1469,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error {
dir := toDelete[i] dir := toDelete[i]
// If a filter matches the directory then that // If a filter matches the directory then that
// directory is a candidate for deletion // directory is a candidate for deletion
if !fi.Include(dir+"/", 0, time.Now()) { if !fi.IncludeRemote(dir + "/") {
continue continue
} }
err = TryRmdir(ctx, f, dir) err = TryRmdir(ctx, f, dir)