mirror of
https://github.com/rclone/rclone.git
synced 2025-01-21 21:58:58 +01:00
filter: add metadata filters --metadata-include/exclude/filter and friends
Fixes #6353
This commit is contained in:
parent
4a31961c4f
commit
3a6f1f5cd7
@ -2355,6 +2355,12 @@ For the filtering options
|
||||
* `--min-age`
|
||||
* `--max-age`
|
||||
* `--dump filters`
|
||||
* `--metadata-include`
|
||||
* `--metadata-include-from`
|
||||
* `--metadata-exclude`
|
||||
* `--metadata-exclude-from`
|
||||
* `--metadata-filter`
|
||||
* `--metadata-filter-from`
|
||||
|
||||
See the [filtering section](/filtering/).
|
||||
|
||||
|
@ -32,7 +32,7 @@ you expect. Instead use a `--filter...` flag.
|
||||
|
||||
## Patterns for matching path/file names
|
||||
|
||||
### Pattern syntax
|
||||
### Pattern syntax {#patterns}
|
||||
|
||||
Here is a formal definition of the pattern syntax,
|
||||
[examples](#examples) are below.
|
||||
@ -194,7 +194,7 @@ them into regular expressions.
|
||||
| Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` |
|
||||
| | | `/file.jpg` | `/dir/file.jpg` |
|
||||
|
||||
## How filter rules are applied to files
|
||||
## How filter rules are applied to files {#how-filter-rules-work}
|
||||
|
||||
Rclone path/file name filters are made up of one or more of the following flags:
|
||||
|
||||
@ -757,6 +757,43 @@ E.g. for the following directory structure:
|
||||
The command `rclone ls --exclude-if-present .ignore dir1` does
|
||||
not list `dir3`, `file3` or `.ignore`.
|
||||
|
||||
## Metadata filters {#metadata}
|
||||
|
||||
The metadata filters work in a very similar way to the normal file
|
||||
name filters, except they match [metadata](/docs/#metadata) on the
|
||||
object.
|
||||
|
||||
The metadata should be specified as `key=value` patterns. This may be
|
||||
wildcarded using the normal [filter patterns](#patterns) or [regular
|
||||
expressions](#regexp).
|
||||
|
||||
For example if you wished to list only local files with a mode of
|
||||
`100664` you could do that with:
|
||||
|
||||
rclone lsf -M --files-only --metadata-include "mode=100664" .
|
||||
|
||||
Or if you wished to show files with an `atime`, `mtime` or `btime` at a given date:
|
||||
|
||||
rclone lsf -M --files-only --metadata-include "[abm]time=2022-12-16*" .
|
||||
|
||||
Like file filtering, metadata filtering only applies to files not to
|
||||
directories.
|
||||
|
||||
The filters can be applied using these flags.
|
||||
|
||||
- `--metadata-include` - Include metadatas matching pattern
|
||||
- `--metadata-include-from` - Read metadata include patterns from file (use - to read from stdin)
|
||||
- `--metadata-exclude` - Exclude metadatas matching pattern
|
||||
- `--metadata-exclude-from` - Read metadata exclude patterns from file (use - to read from stdin)
|
||||
- `--metadata-filter` - Add a metadata filtering rule
|
||||
- `--metadata-filter-from` - Read metadata filtering patterns from a file (use - to read from stdin)
|
||||
|
||||
Each flag can be repeated. See the section on [how filter rules are
|
||||
applied](#how-filter-rules-work) for more details - these flags work
|
||||
in an identical way to the file name filtering flags, but instead of
|
||||
file name patterns have metadata patterns.
|
||||
|
||||
|
||||
## Common pitfalls
|
||||
|
||||
The most frequent filter support issues on
|
||||
|
@ -66,6 +66,22 @@ It can be triggered when you did a server-side copy.
|
||||
|
||||
Reading metadata will also provide custom (non-standard nor reserved) ones.
|
||||
|
||||
## Filtering auto generated files
|
||||
|
||||
The Internet Archive automatically creates metadata files after
|
||||
upload. These can cause problems when doing an `rclone sync` as rclone
|
||||
will try, and fail, to delete them. These metadata files are not
|
||||
changeable, as they are created by the Internet Archive automatically.
|
||||
|
||||
These auto-created files can be excluded from the sync using [metadata
|
||||
filtering](/filtering/#metadata).
|
||||
|
||||
rclone sync ... --metadata-exclude "source=metadata" --metadata-exclude "format=Metadata"
|
||||
|
||||
Which excludes from the sync any files which have the
|
||||
`source=metadata` or `format=Metadata` flags which are added to
|
||||
Internet Archive auto-created files.
|
||||
|
||||
## Configuration
|
||||
|
||||
Here is an example of making an internetarchive configuration.
|
||||
|
@ -26,6 +26,7 @@ type Opt struct {
|
||||
ExcludeFile []string
|
||||
FilesFrom []string
|
||||
FilesFromRaw []string
|
||||
MetaRules RulesOpt
|
||||
MinAge fs.Duration
|
||||
MaxAge fs.Duration
|
||||
MinSize fs.SizeSuffix
|
||||
@ -51,6 +52,7 @@ type Filter struct {
|
||||
ModTimeTo time.Time
|
||||
fileRules rules
|
||||
dirRules rules
|
||||
metaRules rules
|
||||
files FilesMap // files if filesFrom
|
||||
dirs FilesMap // dirs from filesFrom
|
||||
}
|
||||
@ -85,6 +87,11 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inActive := f.InActive()
|
||||
|
||||
for _, rule := range f.Opt.FilesFrom {
|
||||
@ -234,6 +241,7 @@ func (f *Filter) Files() FilesMap {
|
||||
func (f *Filter) Clear() {
|
||||
f.fileRules.clear()
|
||||
f.dirRules.clear()
|
||||
f.metaRules.clear()
|
||||
}
|
||||
|
||||
// InActive returns false if any filters are active
|
||||
@ -245,6 +253,7 @@ func (f *Filter) InActive() bool {
|
||||
f.Opt.MaxSize < 0 &&
|
||||
f.fileRules.len() == 0 &&
|
||||
f.dirRules.len() == 0 &&
|
||||
f.metaRules.len() == 0 &&
|
||||
len(f.Opt.ExcludeFile) == 0)
|
||||
}
|
||||
|
||||
@ -322,7 +331,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo
|
||||
|
||||
// Include returns whether this object should be included into the
|
||||
// sync or not
|
||||
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
|
||||
func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool {
|
||||
// filesFrom takes precedence
|
||||
if f.files != nil {
|
||||
_, include := f.files[remote]
|
||||
@ -340,6 +349,20 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
|
||||
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
|
||||
return false
|
||||
}
|
||||
if f.metaRules.len() > 0 {
|
||||
metadatas := make([]string, 0, len(metadata)+1)
|
||||
for key, value := range metadata {
|
||||
metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value))
|
||||
}
|
||||
if len(metadata) == 0 {
|
||||
// If there is no metadata, add a null one
|
||||
// otherwise the default action isn't taken
|
||||
metadatas = append(metadatas, "\x00=\x00")
|
||||
}
|
||||
if !f.metaRules.includeMany(metadatas) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return f.IncludeRemote(remote)
|
||||
}
|
||||
|
||||
@ -354,7 +377,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
|
||||
} else {
|
||||
modTime = time.Unix(0, 0)
|
||||
}
|
||||
return f.Include(o.Remote(), o.Size(), modTime)
|
||||
var metadata fs.Metadata
|
||||
if f.metaRules.len() > 0 {
|
||||
var err error
|
||||
metadata, err = fs.GetMetadata(ctx, o)
|
||||
if err != nil {
|
||||
fs.Errorf(o, "Failed to read metadata: %v", err)
|
||||
metadata = nil
|
||||
}
|
||||
|
||||
}
|
||||
return f.Include(o.Remote(), o.Size(), modTime, metadata)
|
||||
}
|
||||
|
||||
// DumpFilters dumps the filters in textual form, 1 per line
|
||||
@ -374,6 +407,12 @@ func (f *Filter) DumpFilters() string {
|
||||
for _, dirRule := range f.dirRules.rules {
|
||||
rules = append(rules, dirRule.String())
|
||||
}
|
||||
if f.metaRules.len() > 0 {
|
||||
rules = append(rules, "--- Metadata filter rules ---")
|
||||
for _, metaRule := range f.metaRules.rules {
|
||||
rules = append(rules, metaRule.String())
|
||||
}
|
||||
}
|
||||
return strings.Join(rules, "\n")
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ func TestNewFilterDefault(t *testing.T) {
|
||||
assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize)
|
||||
assert.Len(t, f.fileRules.rules, 0)
|
||||
assert.Len(t, f.dirRules.rules, 0)
|
||||
assert.Len(t, f.metaRules.rules, 0)
|
||||
assert.Nil(t, f.files)
|
||||
assert.True(t, f.InActive())
|
||||
}
|
||||
@ -207,7 +208,7 @@ type includeTest struct {
|
||||
|
||||
func testInclude(t *testing.T, f *Filter, tests []includeTest) {
|
||||
for _, test := range tests {
|
||||
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0))
|
||||
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil)
|
||||
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
|
||||
}
|
||||
}
|
||||
@ -527,6 +528,56 @@ func TestNewFilterMatchesRegexp(t *testing.T) {
|
||||
assert.False(t, f.InActive())
|
||||
}
|
||||
|
||||
type includeTestMetadata struct {
|
||||
in string
|
||||
metadata fs.Metadata
|
||||
want bool
|
||||
}
|
||||
|
||||
func testIncludeMetadata(t *testing.T, f *Filter, tests []includeTestMetadata) {
|
||||
for _, test := range tests {
|
||||
got := f.Include(test.in, 0, time.Time{}, test.metadata)
|
||||
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, metadata=%+v", test.in, test.metadata))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewFilterMetadataInclude(t *testing.T) {
|
||||
f, err := NewFilter(nil)
|
||||
require.NoError(t, err)
|
||||
add := func(s string) {
|
||||
err := f.metaRules.AddRule(s)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
add(`+ t*=t*`)
|
||||
add(`- *`)
|
||||
testIncludeMetadata(t, f, []includeTestMetadata{
|
||||
{"nil", nil, false},
|
||||
{"empty", fs.Metadata{}, false},
|
||||
{"ok1", fs.Metadata{"thing": "thang"}, true},
|
||||
{"ok2", fs.Metadata{"thing1": "thang1"}, true},
|
||||
{"missing", fs.Metadata{"Thing1": "Thang1"}, false},
|
||||
})
|
||||
assert.False(t, f.InActive())
|
||||
}
|
||||
|
||||
func TestNewFilterMetadataExclude(t *testing.T) {
|
||||
f, err := NewFilter(nil)
|
||||
require.NoError(t, err)
|
||||
add := func(s string) {
|
||||
err := f.metaRules.AddRule(s)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
add(`- thing=thang`)
|
||||
add(`+ *`)
|
||||
testIncludeMetadata(t, f, []includeTestMetadata{
|
||||
{"nil", nil, true},
|
||||
{"empty", fs.Metadata{}, true},
|
||||
{"ok1", fs.Metadata{"thing": "thang"}, false},
|
||||
{"missing1", fs.Metadata{"thing1": "thang1"}, true},
|
||||
})
|
||||
assert.False(t, f.InActive())
|
||||
}
|
||||
|
||||
func TestFilterAddDirRuleOrFileRule(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
included bool
|
||||
@ -713,7 +764,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
err = f.Add(false, "*")
|
||||
require.NoError(t, err)
|
||||
included := f.Include(test.file, 0, time.Unix(0, 0))
|
||||
included := f.Include(test.file, 0, time.Unix(0, 0), nil)
|
||||
if included != test.included {
|
||||
t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package filterflags
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/rclone/rclone/fs/config/flags"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddRuleFlags add a set of rules flags with prefix
|
||||
func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) {
|
||||
shortFilter := ""
|
||||
if prefix == "" {
|
||||
shortFilter = "f"
|
||||
}
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what))
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what))
|
||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what))
|
||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what))
|
||||
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what))
|
||||
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what))
|
||||
}
|
||||
|
||||
// AddFlags adds the non filing system specific flags to the command
|
||||
func AddFlags(flagSet *pflag.FlagSet) {
|
||||
rc.AddOptionReload("filter", &Opt, Reload)
|
||||
flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule")
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)")
|
||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern")
|
||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)")
|
||||
AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "")
|
||||
AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-")
|
||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
|
||||
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
|
||||
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
|
||||
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
|
||||
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")
|
||||
|
@ -1469,7 +1469,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error {
|
||||
dir := toDelete[i]
|
||||
// If a filter matches the directory then that
|
||||
// directory is a candidate for deletion
|
||||
if !fi.Include(dir+"/", 0, time.Now()) {
|
||||
if !fi.IncludeRemote(dir + "/") {
|
||||
continue
|
||||
}
|
||||
err = TryRmdir(ctx, f, dir)
|
||||
|
Loading…
Reference in New Issue
Block a user