From 0f73129ab79eea29ec1fb18eef040264568d5b89 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sun, 31 Jan 2016 12:58:41 +0000 Subject: [PATCH] dedupe command to deduplicate a remote. Useful with google drive - fixes #41 --- docs/content/docs.md | 41 ++++++++++++++++++++++++++++++ fs/config.go | 19 ++++++++++++++ fs/operations.go | 59 ++++++++++++++++++++++++++++++++++++++++++++ rclone.go | 13 ++++++++++ 4 files changed, 132 insertions(+) diff --git a/docs/content/docs.md b/docs/content/docs.md index 768b331d8..755015a12 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -136,6 +136,47 @@ Checks the files in the source and destination match. It compares sizes and MD5SUMs and prints a report of files which don't match. It doesn't alter the source or destination. +### rclone dedupe remote:path ### + +Interactively find duplicate files and offer to delete all but one or +rename them to be different. Only useful with Google Drive which can +have duplicate file names. + +``` +$ rclone dedupe drive:dupes +2016/01/31 14:13:11 Google drive root 'dupes': Looking for duplicates +two.txt: Found 3 duplicates + 1: 564374 bytes, 2016-01-31 14:07:22.159000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81 + 2: 1744073 bytes, 2016-01-31 14:07:12.490000000, md5sum 851957f7fb6f0bc4ce76be966d336802 + 3: 6048320 bytes, 2016-01-31 14:07:02.111000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36 +s) Skip and do nothing +k) Keep just one (choose which in next step) +r) Rename all to be different (by changing file.jpg to file-1.jpg) +s/k/r> r +two-1.txt: renamed from: two.txt +two-2.txt: renamed from: two.txt +two-3.txt: renamed from: two.txt +one.txt: Found 2 duplicates + 1: 6579 bytes, 2016-01-31 14:05:01.235000000, md5sum 2b76c776249409d925ae7ccd49aea59b + 2: 6579 bytes, 2016-01-31 12:50:30.318000000, md5sum 2b76c776249409d925ae7ccd49aea59b +s) Skip and do nothing +k) Keep just one (choose which in next step) +r) Rename all to be different (by changing file.jpg to file-1.jpg) +s/k/r> k +Enter the number of the file to keep> 2 +one.txt: Deleted 1 extra copies +``` + +The result being + +``` +$ rclone lsl drive:dupes + 564374 2016-01-31 14:07:22.159000000 two-1.txt + 1744073 2016-01-31 14:07:12.490000000 two-2.txt + 6048320 2016-01-31 14:07:02.111000000 two-3.txt + 6579 2016-01-31 12:50:30.318000000 one.txt +``` + ### rclone config ### Enter an interactive configuration session. diff --git a/fs/config.go b/fs/config.go index 956365b5a..0a1184106 100644 --- a/fs/config.go +++ b/fs/config.go @@ -413,6 +413,25 @@ func Choose(what string, defaults, help []string, newOk bool) string { } } +// ChooseNumber asks the user to enter a number between min and max +// inclusive prompting them with what. +func ChooseNumber(what string, min, max int) int { + for { + fmt.Printf("%s> ", what) + result := ReadLine() + i, err := strconv.Atoi(result) + if err != nil { + fmt.Printf("Bad number: %v\n", err) + continue + } + if i < min || i > max { + fmt.Printf("Out of range - %d to %d inclusive\n", min, max) + continue + } + return i + } +} + // ShowRemote shows the contents of the remote func ShowRemote(name string) { fmt.Printf("--------------------\n") diff --git a/fs/operations.go b/fs/operations.go index a6a0a87d3..d8143178f 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -908,3 +908,62 @@ func Delete(f Fs) error { close(delete) return err } + +// Deduplicate interactively finds duplicate files and offers to +// delete all but one or rename them to be different. Only useful with +// Google Drive which can have duplicate file names. +func Deduplicate(f Fs) error { + mover, ok := f.(Mover) + if !ok { + return fmt.Errorf("%v can't Move files", f) + } + Log(f, "Looking for duplicates") + files := map[string][]Object{} + for o := range f.List() { + remote := o.Remote() + files[remote] = append(files[remote], o) + } + for remote, objs := range files { + if len(objs) > 1 { + fmt.Printf("%s: Found %d duplicates\n", remote, len(objs)) + for i, o := range objs { + md5sum, err := o.Hash(HashMD5) + if err != nil { + md5sum = err.Error() + } + fmt.Printf(" %d: %12d bytes, %s, md5sum %32s\n", i+1, o.Size(), o.ModTime().Format("2006-01-02 15:04:05.000000000"), md5sum) + } + switch Command([]string{"sSkip and do nothing", "kKeep just one (choose which in next step)", "rRename all to be different (by changing file.jpg to file-1.jpg)"}) { + case 's': + case 'k': + keep := ChooseNumber("Enter the number of the file to keep", 1, len(objs)) + deleted := 0 + for i, o := range objs { + if i+1 == keep { + continue + } + err := o.Remove() + if err != nil { + ErrorLog(o, "Failed to delete: %v", err) + continue + } + deleted++ + } + fmt.Printf("%s: Deleted %d extra copies\n", remote, deleted) + case 'r': + ext := path.Ext(remote) + base := remote[:len(remote)-len(ext)] + for i, o := range objs { + newName := fmt.Sprintf("%s-%d%s", base, i+1, ext) + newObj, err := mover.Move(o, newName) + if err != nil { + ErrorLog(o, "Failed to rename: %v", err) + continue + } + fmt.Printf("%v: renamed from: %v\n", newObj, o) + } + } + } + } + return nil +} diff --git a/rclone.go b/rclone.go index ae8cee399..d51bfa504 100644 --- a/rclone.go +++ b/rclone.go @@ -240,6 +240,19 @@ var Commands = []Command{ MinArgs: 2, MaxArgs: 2, }, + { + Name: "dedupe", + ArgsHelp: "remote:path", + Help: ` + Interactively find duplicate files and offer to delete all + but one or rename them to be different. Only useful with + Google Drive which can have duplicate file names.`, + Run: func(fdst, fsrc fs.Fs) error { + return fs.Deduplicate(fdst) + }, + MinArgs: 1, + MaxArgs: 1, + }, { Name: "config", Help: `