mirror of
https://github.com/rclone/rclone.git
synced 2025-01-22 06:09:21 +01:00
filter: add metadata filters --metadata-include/exclude/filter and friends
Fixes #6353
This commit is contained in:
parent
4a31961c4f
commit
3a6f1f5cd7
@ -2355,6 +2355,12 @@ For the filtering options
|
|||||||
* `--min-age`
|
* `--min-age`
|
||||||
* `--max-age`
|
* `--max-age`
|
||||||
* `--dump filters`
|
* `--dump filters`
|
||||||
|
* `--metadata-include`
|
||||||
|
* `--metadata-include-from`
|
||||||
|
* `--metadata-exclude`
|
||||||
|
* `--metadata-exclude-from`
|
||||||
|
* `--metadata-filter`
|
||||||
|
* `--metadata-filter-from`
|
||||||
|
|
||||||
See the [filtering section](/filtering/).
|
See the [filtering section](/filtering/).
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ you expect. Instead use a `--filter...` flag.
|
|||||||
|
|
||||||
## Patterns for matching path/file names
|
## Patterns for matching path/file names
|
||||||
|
|
||||||
### Pattern syntax
|
### Pattern syntax {#patterns}
|
||||||
|
|
||||||
Here is a formal definition of the pattern syntax,
|
Here is a formal definition of the pattern syntax,
|
||||||
[examples](#examples) are below.
|
[examples](#examples) are below.
|
||||||
@ -194,7 +194,7 @@ them into regular expressions.
|
|||||||
| Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` |
|
| Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` |
|
||||||
| | | `/file.jpg` | `/dir/file.jpg` |
|
| | | `/file.jpg` | `/dir/file.jpg` |
|
||||||
|
|
||||||
## How filter rules are applied to files
|
## How filter rules are applied to files {#how-filter-rules-work}
|
||||||
|
|
||||||
Rclone path/file name filters are made up of one or more of the following flags:
|
Rclone path/file name filters are made up of one or more of the following flags:
|
||||||
|
|
||||||
@ -757,6 +757,43 @@ E.g. for the following directory structure:
|
|||||||
The command `rclone ls --exclude-if-present .ignore dir1` does
|
The command `rclone ls --exclude-if-present .ignore dir1` does
|
||||||
not list `dir3`, `file3` or `.ignore`.
|
not list `dir3`, `file3` or `.ignore`.
|
||||||
|
|
||||||
|
## Metadata filters {#metadata}
|
||||||
|
|
||||||
|
The metadata filters work in a very similar way to the normal file
|
||||||
|
name filters, except they match [metadata](/docs/#metadata) on the
|
||||||
|
object.
|
||||||
|
|
||||||
|
The metadata should be specified as `key=value` patterns. This may be
|
||||||
|
wildcarded using the normal [filter patterns](#patterns) or [regular
|
||||||
|
expressions](#regexp).
|
||||||
|
|
||||||
|
For example if you wished to list only local files with a mode of
|
||||||
|
`100664` you could do that with:
|
||||||
|
|
||||||
|
rclone lsf -M --files-only --metadata-include "mode=100664" .
|
||||||
|
|
||||||
|
Or if you wished to show files with an `atime`, `mtime` or `btime` at a given date:
|
||||||
|
|
||||||
|
rclone lsf -M --files-only --metadata-include "[abm]time=2022-12-16*" .
|
||||||
|
|
||||||
|
Like file filtering, metadata filtering only applies to files not to
|
||||||
|
directories.
|
||||||
|
|
||||||
|
The filters can be applied using these flags.
|
||||||
|
|
||||||
|
- `--metadata-include` - Include metadatas matching pattern
|
||||||
|
- `--metadata-include-from` - Read metadata include patterns from file (use - to read from stdin)
|
||||||
|
- `--metadata-exclude` - Exclude metadatas matching pattern
|
||||||
|
- `--metadata-exclude-from` - Read metadata exclude patterns from file (use - to read from stdin)
|
||||||
|
- `--metadata-filter` - Add a metadata filtering rule
|
||||||
|
- `--metadata-filter-from` - Read metadata filtering patterns from a file (use - to read from stdin)
|
||||||
|
|
||||||
|
Each flag can be repeated. See the section on [how filter rules are
|
||||||
|
applied](#how-filter-rules-work) for more details - these flags work
|
||||||
|
in an identical way to the file name filtering flags, but instead of
|
||||||
|
file name patterns have metadata patterns.
|
||||||
|
|
||||||
|
|
||||||
## Common pitfalls
|
## Common pitfalls
|
||||||
|
|
||||||
The most frequent filter support issues on
|
The most frequent filter support issues on
|
||||||
|
@ -66,6 +66,22 @@ It can be triggered when you did a server-side copy.
|
|||||||
|
|
||||||
Reading metadata will also provide custom (non-standard nor reserved) ones.
|
Reading metadata will also provide custom (non-standard nor reserved) ones.
|
||||||
|
|
||||||
|
## Filtering auto generated files
|
||||||
|
|
||||||
|
The Internet Archive automatically creates metadata files after
|
||||||
|
upload. These can cause problems when doing an `rclone sync` as rclone
|
||||||
|
will try, and fail, to delete them. These metadata files are not
|
||||||
|
changeable, as they are created by the Internet Archive automatically.
|
||||||
|
|
||||||
|
These auto-created files can be excluded from the sync using [metadata
|
||||||
|
filtering](/filtering/#metadata).
|
||||||
|
|
||||||
|
rclone sync ... --metadata-exclude "source=metadata" --metadata-exclude "format=Metadata"
|
||||||
|
|
||||||
|
Which excludes from the sync any files which have the
|
||||||
|
`source=metadata` or `format=Metadata` flags which are added to
|
||||||
|
Internet Archive auto-created files.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Here is an example of making an internetarchive configuration.
|
Here is an example of making an internetarchive configuration.
|
||||||
|
@ -26,6 +26,7 @@ type Opt struct {
|
|||||||
ExcludeFile []string
|
ExcludeFile []string
|
||||||
FilesFrom []string
|
FilesFrom []string
|
||||||
FilesFromRaw []string
|
FilesFromRaw []string
|
||||||
|
MetaRules RulesOpt
|
||||||
MinAge fs.Duration
|
MinAge fs.Duration
|
||||||
MaxAge fs.Duration
|
MaxAge fs.Duration
|
||||||
MinSize fs.SizeSuffix
|
MinSize fs.SizeSuffix
|
||||||
@ -51,6 +52,7 @@ type Filter struct {
|
|||||||
ModTimeTo time.Time
|
ModTimeTo time.Time
|
||||||
fileRules rules
|
fileRules rules
|
||||||
dirRules rules
|
dirRules rules
|
||||||
|
metaRules rules
|
||||||
files FilesMap // files if filesFrom
|
files FilesMap // files if filesFrom
|
||||||
dirs FilesMap // dirs from filesFrom
|
dirs FilesMap // dirs from filesFrom
|
||||||
}
|
}
|
||||||
@ -85,6 +87,11 @@ func NewFilter(opt *Opt) (f *Filter, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = parseRules(&f.Opt.MetaRules, f.metaRules.Add, f.metaRules.clear)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
inActive := f.InActive()
|
inActive := f.InActive()
|
||||||
|
|
||||||
for _, rule := range f.Opt.FilesFrom {
|
for _, rule := range f.Opt.FilesFrom {
|
||||||
@ -234,6 +241,7 @@ func (f *Filter) Files() FilesMap {
|
|||||||
func (f *Filter) Clear() {
|
func (f *Filter) Clear() {
|
||||||
f.fileRules.clear()
|
f.fileRules.clear()
|
||||||
f.dirRules.clear()
|
f.dirRules.clear()
|
||||||
|
f.metaRules.clear()
|
||||||
}
|
}
|
||||||
|
|
||||||
// InActive returns false if any filters are active
|
// InActive returns false if any filters are active
|
||||||
@ -245,6 +253,7 @@ func (f *Filter) InActive() bool {
|
|||||||
f.Opt.MaxSize < 0 &&
|
f.Opt.MaxSize < 0 &&
|
||||||
f.fileRules.len() == 0 &&
|
f.fileRules.len() == 0 &&
|
||||||
f.dirRules.len() == 0 &&
|
f.dirRules.len() == 0 &&
|
||||||
|
f.metaRules.len() == 0 &&
|
||||||
len(f.Opt.ExcludeFile) == 0)
|
len(f.Opt.ExcludeFile) == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,7 +331,7 @@ func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remo
|
|||||||
|
|
||||||
// Include returns whether this object should be included into the
|
// Include returns whether this object should be included into the
|
||||||
// sync or not
|
// sync or not
|
||||||
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
|
func (f *Filter) Include(remote string, size int64, modTime time.Time, metadata fs.Metadata) bool {
|
||||||
// filesFrom takes precedence
|
// filesFrom takes precedence
|
||||||
if f.files != nil {
|
if f.files != nil {
|
||||||
_, include := f.files[remote]
|
_, include := f.files[remote]
|
||||||
@ -340,6 +349,20 @@ func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
|
|||||||
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
|
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if f.metaRules.len() > 0 {
|
||||||
|
metadatas := make([]string, 0, len(metadata)+1)
|
||||||
|
for key, value := range metadata {
|
||||||
|
metadatas = append(metadatas, fmt.Sprintf("%s=%s", key, value))
|
||||||
|
}
|
||||||
|
if len(metadata) == 0 {
|
||||||
|
// If there is no metadata, add a null one
|
||||||
|
// otherwise the default action isn't taken
|
||||||
|
metadatas = append(metadatas, "\x00=\x00")
|
||||||
|
}
|
||||||
|
if !f.metaRules.includeMany(metadatas) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
return f.IncludeRemote(remote)
|
return f.IncludeRemote(remote)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,7 +377,17 @@ func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
|
|||||||
} else {
|
} else {
|
||||||
modTime = time.Unix(0, 0)
|
modTime = time.Unix(0, 0)
|
||||||
}
|
}
|
||||||
return f.Include(o.Remote(), o.Size(), modTime)
|
var metadata fs.Metadata
|
||||||
|
if f.metaRules.len() > 0 {
|
||||||
|
var err error
|
||||||
|
metadata, err = fs.GetMetadata(ctx, o)
|
||||||
|
if err != nil {
|
||||||
|
fs.Errorf(o, "Failed to read metadata: %v", err)
|
||||||
|
metadata = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return f.Include(o.Remote(), o.Size(), modTime, metadata)
|
||||||
}
|
}
|
||||||
|
|
||||||
// DumpFilters dumps the filters in textual form, 1 per line
|
// DumpFilters dumps the filters in textual form, 1 per line
|
||||||
@ -374,6 +407,12 @@ func (f *Filter) DumpFilters() string {
|
|||||||
for _, dirRule := range f.dirRules.rules {
|
for _, dirRule := range f.dirRules.rules {
|
||||||
rules = append(rules, dirRule.String())
|
rules = append(rules, dirRule.String())
|
||||||
}
|
}
|
||||||
|
if f.metaRules.len() > 0 {
|
||||||
|
rules = append(rules, "--- Metadata filter rules ---")
|
||||||
|
for _, metaRule := range f.metaRules.rules {
|
||||||
|
rules = append(rules, metaRule.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
return strings.Join(rules, "\n")
|
return strings.Join(rules, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ func TestNewFilterDefault(t *testing.T) {
|
|||||||
assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize)
|
assert.Equal(t, fs.SizeSuffix(-1), f.Opt.MaxSize)
|
||||||
assert.Len(t, f.fileRules.rules, 0)
|
assert.Len(t, f.fileRules.rules, 0)
|
||||||
assert.Len(t, f.dirRules.rules, 0)
|
assert.Len(t, f.dirRules.rules, 0)
|
||||||
|
assert.Len(t, f.metaRules.rules, 0)
|
||||||
assert.Nil(t, f.files)
|
assert.Nil(t, f.files)
|
||||||
assert.True(t, f.InActive())
|
assert.True(t, f.InActive())
|
||||||
}
|
}
|
||||||
@ -207,7 +208,7 @@ type includeTest struct {
|
|||||||
|
|
||||||
func testInclude(t *testing.T, f *Filter, tests []includeTest) {
|
func testInclude(t *testing.T, f *Filter, tests []includeTest) {
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0))
|
got := f.Include(test.in, test.size, time.Unix(test.modTime, 0), nil)
|
||||||
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
|
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, size=%v, modTime=%v", test.in, test.size, time.Unix(test.modTime, 0)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -527,6 +528,56 @@ func TestNewFilterMatchesRegexp(t *testing.T) {
|
|||||||
assert.False(t, f.InActive())
|
assert.False(t, f.InActive())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type includeTestMetadata struct {
|
||||||
|
in string
|
||||||
|
metadata fs.Metadata
|
||||||
|
want bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func testIncludeMetadata(t *testing.T, f *Filter, tests []includeTestMetadata) {
|
||||||
|
for _, test := range tests {
|
||||||
|
got := f.Include(test.in, 0, time.Time{}, test.metadata)
|
||||||
|
assert.Equal(t, test.want, got, fmt.Sprintf("in=%q, metadata=%+v", test.in, test.metadata))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewFilterMetadataInclude(t *testing.T) {
|
||||||
|
f, err := NewFilter(nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
add := func(s string) {
|
||||||
|
err := f.metaRules.AddRule(s)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
add(`+ t*=t*`)
|
||||||
|
add(`- *`)
|
||||||
|
testIncludeMetadata(t, f, []includeTestMetadata{
|
||||||
|
{"nil", nil, false},
|
||||||
|
{"empty", fs.Metadata{}, false},
|
||||||
|
{"ok1", fs.Metadata{"thing": "thang"}, true},
|
||||||
|
{"ok2", fs.Metadata{"thing1": "thang1"}, true},
|
||||||
|
{"missing", fs.Metadata{"Thing1": "Thang1"}, false},
|
||||||
|
})
|
||||||
|
assert.False(t, f.InActive())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewFilterMetadataExclude(t *testing.T) {
|
||||||
|
f, err := NewFilter(nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
add := func(s string) {
|
||||||
|
err := f.metaRules.AddRule(s)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
add(`- thing=thang`)
|
||||||
|
add(`+ *`)
|
||||||
|
testIncludeMetadata(t, f, []includeTestMetadata{
|
||||||
|
{"nil", nil, true},
|
||||||
|
{"empty", fs.Metadata{}, true},
|
||||||
|
{"ok1", fs.Metadata{"thing": "thang"}, false},
|
||||||
|
{"missing1", fs.Metadata{"thing1": "thang1"}, true},
|
||||||
|
})
|
||||||
|
assert.False(t, f.InActive())
|
||||||
|
}
|
||||||
|
|
||||||
func TestFilterAddDirRuleOrFileRule(t *testing.T) {
|
func TestFilterAddDirRuleOrFileRule(t *testing.T) {
|
||||||
for _, test := range []struct {
|
for _, test := range []struct {
|
||||||
included bool
|
included bool
|
||||||
@ -713,7 +764,7 @@ func TestFilterMatchesFromDocs(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
err = f.Add(false, "*")
|
err = f.Add(false, "*")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
included := f.Include(test.file, 0, time.Unix(0, 0))
|
included := f.Include(test.file, 0, time.Unix(0, 0), nil)
|
||||||
if included != test.included {
|
if included != test.included {
|
||||||
t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
|
t.Errorf("%q match %q: want %v got %v", test.glob, test.file, test.included, included)
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ package filterflags
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/rclone/rclone/fs/config/flags"
|
"github.com/rclone/rclone/fs/config/flags"
|
||||||
"github.com/rclone/rclone/fs/filter"
|
"github.com/rclone/rclone/fs/filter"
|
||||||
@ -26,17 +27,27 @@ func Reload(ctx context.Context) (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddRuleFlags add a set of rules flags with prefix
|
||||||
|
func AddRuleFlags(flagSet *pflag.FlagSet, Opt *filter.RulesOpt, what, prefix string) {
|
||||||
|
shortFilter := ""
|
||||||
|
if prefix == "" {
|
||||||
|
shortFilter = "f"
|
||||||
|
}
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.FilterRule, prefix+"filter", shortFilter, nil, fmt.Sprintf("Add a %s filtering rule", what))
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, prefix+"filter-from", "", nil, fmt.Sprintf("Read %s filtering patterns from a file (use - to read from stdin)", what))
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, prefix+"exclude", "", nil, fmt.Sprintf("Exclude %ss matching pattern", what))
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, prefix+"exclude-from", "", nil, fmt.Sprintf("Read %s exclude patterns from file (use - to read from stdin)", what))
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, prefix+"include", "", nil, fmt.Sprintf("Include %ss matching pattern", what))
|
||||||
|
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, prefix+"include-from", "", nil, fmt.Sprintf("Read %s include patterns from file (use - to read from stdin)", what))
|
||||||
|
}
|
||||||
|
|
||||||
// AddFlags adds the non filing system specific flags to the command
|
// AddFlags adds the non filing system specific flags to the command
|
||||||
func AddFlags(flagSet *pflag.FlagSet) {
|
func AddFlags(flagSet *pflag.FlagSet) {
|
||||||
rc.AddOptionReload("filter", &Opt, Reload)
|
rc.AddOptionReload("filter", &Opt, Reload)
|
||||||
flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
|
flags.BoolVarP(flagSet, &Opt.DeleteExcluded, "delete-excluded", "", false, "Delete files on dest excluded from sync")
|
||||||
flags.StringArrayVarP(flagSet, &Opt.FilterRule, "filter", "f", nil, "Add a file-filtering rule")
|
AddRuleFlags(flagSet, &Opt.RulesOpt, "file", "")
|
||||||
flags.StringArrayVarP(flagSet, &Opt.FilterFrom, "filter-from", "", nil, "Read filtering patterns from a file (use - to read from stdin)")
|
AddRuleFlags(flagSet, &Opt.MetaRules, "metadata", "metadata-")
|
||||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeRule, "exclude", "", nil, "Exclude files matching pattern")
|
|
||||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeFrom, "exclude-from", "", nil, "Read exclude patterns from file (use - to read from stdin)")
|
|
||||||
flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
|
flags.StringArrayVarP(flagSet, &Opt.ExcludeFile, "exclude-if-present", "", nil, "Exclude directories if filename is present")
|
||||||
flags.StringArrayVarP(flagSet, &Opt.IncludeRule, "include", "", nil, "Include files matching pattern")
|
|
||||||
flags.StringArrayVarP(flagSet, &Opt.IncludeFrom, "include-from", "", nil, "Read include patterns from file (use - to read from stdin)")
|
|
||||||
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
|
flags.StringArrayVarP(flagSet, &Opt.FilesFrom, "files-from", "", nil, "Read list of source-file names from file (use - to read from stdin)")
|
||||||
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
|
flags.StringArrayVarP(flagSet, &Opt.FilesFromRaw, "files-from-raw", "", nil, "Read list of source-file names from file without any processing of lines (use - to read from stdin)")
|
||||||
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")
|
flags.FVarP(flagSet, &Opt.MinAge, "min-age", "", "Only transfer files older than this in s or suffix ms|s|m|h|d|w|M|y")
|
||||||
|
@ -1469,7 +1469,7 @@ func Rmdirs(ctx context.Context, f fs.Fs, dir string, leaveRoot bool) error {
|
|||||||
dir := toDelete[i]
|
dir := toDelete[i]
|
||||||
// If a filter matches the directory then that
|
// If a filter matches the directory then that
|
||||||
// directory is a candidate for deletion
|
// directory is a candidate for deletion
|
||||||
if !fi.Include(dir+"/", 0, time.Now()) {
|
if !fi.IncludeRemote(dir + "/") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
err = TryRmdir(ctx, f, dir)
|
err = TryRmdir(ctx, f, dir)
|
||||||
|
Loading…
Reference in New Issue
Block a user