rclone/fs/filter/filter.go
Nick Craig-Wood 11da2a6c9b Break the fs package up into smaller parts.
The purpose of this is to make it easier to maintain and eventually to
allow the rclone backends to be re-used in other projects without
having to use the rclone configuration system.

The new code layout is documented in CONTRIBUTING.
2018-01-15 17:51:14 +00:00

501 lines
12 KiB
Go

// Package filter controls the filtering of files
package filter
import (
"bufio"
"fmt"
"log"
"os"
"path"
"regexp"
"strings"
"time"
"github.com/ncw/rclone/fs"
"github.com/pkg/errors"
)
// Active is the globally active filter
var Active = mustNewFilter(nil)
// rule is one filter rule
type rule struct {
Include bool
Regexp *regexp.Regexp
}
// Match returns true if rule matches path
func (r *rule) Match(path string) bool {
return r.Regexp.MatchString(path)
}
// String the rule
func (r *rule) String() string {
c := "-"
if r.Include {
c = "+"
}
return fmt.Sprintf("%s %s", c, r.Regexp.String())
}
// rules is a slice of rules
type rules struct {
rules []rule
existing map[string]struct{}
}
// add adds a rule if it doesn't exist already
func (rs *rules) add(Include bool, re *regexp.Regexp) {
if rs.existing == nil {
rs.existing = make(map[string]struct{})
}
newRule := rule{
Include: Include,
Regexp: re,
}
newRuleString := newRule.String()
if _, ok := rs.existing[newRuleString]; ok {
return // rule already exists
}
rs.rules = append(rs.rules, newRule)
rs.existing[newRuleString] = struct{}{}
}
// clear clears all the rules
func (rs *rules) clear() {
rs.rules = nil
rs.existing = nil
}
// len returns the number of rules
func (rs *rules) len() int {
return len(rs.rules)
}
// FilesMap describes the map of files to transfer
type FilesMap map[string]struct{}
// Opt configues the filter
type Opt struct {
DeleteExcluded bool
FilterRule []string
FilterFrom []string
ExcludeRule []string
ExcludeFrom []string
ExcludeFile string
IncludeRule []string
IncludeFrom []string
FilesFrom []string
MinAge fs.Duration
MaxAge fs.Duration
MinSize fs.SizeSuffix
MaxSize fs.SizeSuffix
}
const unusedAge = fs.Duration((1 << 63) - 1)
// DefaultOpt is the default config for the filter
var DefaultOpt = Opt{
MinAge: unusedAge,
MaxAge: unusedAge,
MinSize: fs.SizeSuffix(-1),
MaxSize: fs.SizeSuffix(-1),
}
// Filter describes any filtering in operation
type Filter struct {
Opt Opt
ModTimeFrom time.Time
ModTimeTo time.Time
fileRules rules
dirRules rules
files FilesMap // files if filesFrom
dirs FilesMap // dirs from filesFrom
}
// NewFilter parses the command line options and creates a Filter
// object. If opt is nil, then DefaultOpt will be used
func NewFilter(opt *Opt) (f *Filter, err error) {
f = &Filter{}
// Make a copy of the options
if opt != nil {
f.Opt = *opt
} else {
f.Opt = DefaultOpt
}
// Filter flags
if f.Opt.MinAge != unusedAge {
f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge))
fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo)
}
if f.Opt.MaxAge != unusedAge {
f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge))
if !f.ModTimeTo.IsZero() && f.ModTimeFrom.Before(f.ModTimeTo) {
log.Fatal("filter: --min-age can't be larger than --max-age")
}
fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
}
addImplicitExclude := false
foundExcludeRule := false
for _, rule := range f.Opt.IncludeRule {
err = f.Add(true, rule)
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.IncludeFrom {
err := forEachLine(rule, func(line string) error {
return f.Add(true, line)
})
if err != nil {
return nil, err
}
addImplicitExclude = true
}
for _, rule := range f.Opt.ExcludeRule {
err = f.Add(false, rule)
if err != nil {
return nil, err
}
foundExcludeRule = true
}
for _, rule := range f.Opt.ExcludeFrom {
err := forEachLine(rule, func(line string) error {
return f.Add(false, line)
})
if err != nil {
return nil, err
}
foundExcludeRule = true
}
if addImplicitExclude && foundExcludeRule {
fs.Infof(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
}
for _, rule := range f.Opt.FilterRule {
err = f.AddRule(rule)
if err != nil {
return nil, err
}
}
for _, rule := range f.Opt.FilterFrom {
err := forEachLine(rule, f.AddRule)
if err != nil {
return nil, err
}
}
for _, rule := range f.Opt.FilesFrom {
f.initAddFile() // init to show --files-from set even if no files within
err := forEachLine(rule, func(line string) error {
return f.AddFile(line)
})
if err != nil {
return nil, err
}
}
if addImplicitExclude {
err = f.Add(false, "/**")
if err != nil {
return nil, err
}
}
if fs.Config.Dump&fs.DumpFilters != 0 {
fmt.Println("--- start filters ---")
fmt.Println(f.DumpFilters())
fmt.Println("--- end filters ---")
}
return f, nil
}
func mustNewFilter(opt *Opt) *Filter {
f, err := NewFilter(opt)
if err != nil {
panic(err)
}
return f
}
// addDirGlobs adds directory globs from the file glob passed in
func (f *Filter) addDirGlobs(Include bool, glob string) error {
for _, dirGlob := range globToDirGlobs(glob) {
// Don't add "/" as we always include the root
if dirGlob == "/" {
continue
}
dirRe, err := globToRegexp(dirGlob)
if err != nil {
return err
}
f.dirRules.add(Include, dirRe)
}
return nil
}
// Add adds a filter rule with include or exclude status indicated
func (f *Filter) Add(Include bool, glob string) error {
isDirRule := strings.HasSuffix(glob, "/")
isFileRule := !isDirRule
if strings.Contains(glob, "**") {
isDirRule, isFileRule = true, true
}
re, err := globToRegexp(glob)
if err != nil {
return err
}
if isFileRule {
f.fileRules.add(Include, re)
// If include rule work out what directories are needed to scan
// if exclude rule, we can't rule anything out
// Unless it is `*` which matches everything
// NB ** and /** are DirRules
if Include || glob == "*" {
err = f.addDirGlobs(Include, glob)
if err != nil {
return err
}
}
}
if isDirRule {
f.dirRules.add(Include, re)
}
return nil
}
// AddRule adds a filter rule with include/exclude indicated by the prefix
//
// These are
//
// + glob
// - glob
// !
//
// '+' includes the glob, '-' excludes it and '!' resets the filter list
//
// Line comments may be introduced with '#' or ';'
func (f *Filter) AddRule(rule string) error {
switch {
case rule == "!":
f.Clear()
return nil
case strings.HasPrefix(rule, "- "):
return f.Add(false, rule[2:])
case strings.HasPrefix(rule, "+ "):
return f.Add(true, rule[2:])
}
return errors.Errorf("malformed rule %q", rule)
}
// initAddFile creates f.files and f.dirs
func (f *Filter) initAddFile() {
if f.files == nil {
f.files = make(FilesMap)
f.dirs = make(FilesMap)
}
}
// AddFile adds a single file to the files from list
func (f *Filter) AddFile(file string) error {
f.initAddFile()
file = strings.Trim(file, "/")
f.files[file] = struct{}{}
// Put all the parent directories into f.dirs
for {
file = path.Dir(file)
if file == "." {
break
}
if _, found := f.dirs[file]; found {
break
}
f.dirs[file] = struct{}{}
}
return nil
}
// Files returns all the files from the `--files-from` list
//
// It may be nil if the list is empty
func (f *Filter) Files() FilesMap {
return f.files
}
// Clear clears all the filter rules
func (f *Filter) Clear() {
f.fileRules.clear()
f.dirRules.clear()
}
// InActive returns false if any filters are active
func (f *Filter) InActive() bool {
return (f.files == nil &&
f.ModTimeFrom.IsZero() &&
f.ModTimeTo.IsZero() &&
f.Opt.MinSize < 0 &&
f.Opt.MaxSize < 0 &&
f.fileRules.len() == 0 &&
f.dirRules.len() == 0 &&
len(f.Opt.ExcludeFile) == 0)
}
// includeRemote returns whether this remote passes the filter rules.
func (f *Filter) includeRemote(remote string) bool {
for _, rule := range f.fileRules.rules {
if rule.Match(remote) {
return rule.Include
}
}
return true
}
// ListContainsExcludeFile checks if exclude file is present in the list.
func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool {
if len(f.Opt.ExcludeFile) == 0 {
return false
}
for _, entry := range entries {
obj, ok := entry.(fs.Object)
if ok {
basename := path.Base(obj.Remote())
if basename == f.Opt.ExcludeFile {
return true
}
}
}
return false
}
// IncludeDirectory returns a function which checks whether this
// directory should be included in the sync or not.
func (f *Filter) IncludeDirectory(fs fs.Fs) func(string) (bool, error) {
return func(remote string) (bool, error) {
remote = strings.Trim(remote, "/")
// first check if we need to remove directory based on
// the exclude file
excl, err := f.DirContainsExcludeFile(fs, remote)
if err != nil {
return false, err
}
if excl {
return false, nil
}
// filesFrom takes precedence
if f.files != nil {
_, include := f.dirs[remote]
return include, nil
}
remote += "/"
for _, rule := range f.dirRules.rules {
if rule.Match(remote) {
return rule.Include, nil
}
}
return true, nil
}
}
// DirContainsExcludeFile checks if exclude file is present in a
// directroy. If fs is nil, it works properly if ExcludeFile is an
// empty string (for testing).
func (f *Filter) DirContainsExcludeFile(fremote fs.Fs, remote string) (bool, error) {
if len(f.Opt.ExcludeFile) > 0 {
exists, err := fs.FileExists(fremote, path.Join(remote, f.Opt.ExcludeFile))
if err != nil {
return false, err
}
if exists {
return true, nil
}
}
return false, nil
}
// Include returns whether this object should be included into the
// sync or not
func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
// filesFrom takes precedence
if f.files != nil {
_, include := f.files[remote]
return include
}
if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
return false
}
if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) {
return false
}
if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) {
return false
}
if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
return false
}
return f.includeRemote(remote)
}
// IncludeObject returns whether this object should be included into
// the sync or not. This is a convenience function to avoid calling
// o.ModTime(), which is an expensive operation.
func (f *Filter) IncludeObject(o fs.Object) bool {
var modTime time.Time
if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() {
modTime = o.ModTime()
} else {
modTime = time.Unix(0, 0)
}
return f.Include(o.Remote(), o.Size(), modTime)
}
// forEachLine calls fn on every line in the file pointed to by path
//
// It ignores empty lines and lines starting with '#' or ';'
func forEachLine(path string, fn func(string) error) (err error) {
in, err := os.Open(path)
if err != nil {
return err
}
defer fs.CheckClose(in, &err)
scanner := bufio.NewScanner(in)
for scanner.Scan() {
line := scanner.Text()
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == '#' || line[0] == ';' {
continue
}
err := fn(line)
if err != nil {
return err
}
}
return scanner.Err()
}
// DumpFilters dumps the filters in textual form, 1 per line
func (f *Filter) DumpFilters() string {
rules := []string{}
if !f.ModTimeFrom.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
}
if !f.ModTimeTo.IsZero() {
rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
}
rules = append(rules, "--- File filter rules ---")
for _, rule := range f.fileRules.rules {
rules = append(rules, rule.String())
}
rules = append(rules, "--- Directory filter rules ---")
for _, dirRule := range f.dirRules.rules {
rules = append(rules, dirRule.String())
}
return strings.Join(rules, "\n")
}