mirror of
https://github.com/rclone/rclone.git
synced 2025-01-09 15:58:28 +01:00
dedupe command to deduplicate a remote. Useful with google drive - fixes #41
This commit is contained in:
parent
1373efaa39
commit
0f73129ab7
@ -136,6 +136,47 @@ Checks the files in the source and destination match. It
|
|||||||
compares sizes and MD5SUMs and prints a report of files which
|
compares sizes and MD5SUMs and prints a report of files which
|
||||||
don't match. It doesn't alter the source or destination.
|
don't match. It doesn't alter the source or destination.
|
||||||
|
|
||||||
|
### rclone dedupe remote:path ###
|
||||||
|
|
||||||
|
Interactively find duplicate files and offer to delete all but one or
|
||||||
|
rename them to be different. Only useful with Google Drive which can
|
||||||
|
have duplicate file names.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ rclone dedupe drive:dupes
|
||||||
|
2016/01/31 14:13:11 Google drive root 'dupes': Looking for duplicates
|
||||||
|
two.txt: Found 3 duplicates
|
||||||
|
1: 564374 bytes, 2016-01-31 14:07:22.159000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
|
||||||
|
2: 1744073 bytes, 2016-01-31 14:07:12.490000000, md5sum 851957f7fb6f0bc4ce76be966d336802
|
||||||
|
3: 6048320 bytes, 2016-01-31 14:07:02.111000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
|
||||||
|
s) Skip and do nothing
|
||||||
|
k) Keep just one (choose which in next step)
|
||||||
|
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
||||||
|
s/k/r> r
|
||||||
|
two-1.txt: renamed from: two.txt
|
||||||
|
two-2.txt: renamed from: two.txt
|
||||||
|
two-3.txt: renamed from: two.txt
|
||||||
|
one.txt: Found 2 duplicates
|
||||||
|
1: 6579 bytes, 2016-01-31 14:05:01.235000000, md5sum 2b76c776249409d925ae7ccd49aea59b
|
||||||
|
2: 6579 bytes, 2016-01-31 12:50:30.318000000, md5sum 2b76c776249409d925ae7ccd49aea59b
|
||||||
|
s) Skip and do nothing
|
||||||
|
k) Keep just one (choose which in next step)
|
||||||
|
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
||||||
|
s/k/r> k
|
||||||
|
Enter the number of the file to keep> 2
|
||||||
|
one.txt: Deleted 1 extra copies
|
||||||
|
```
|
||||||
|
|
||||||
|
The result being
|
||||||
|
|
||||||
|
```
|
||||||
|
$ rclone lsl drive:dupes
|
||||||
|
564374 2016-01-31 14:07:22.159000000 two-1.txt
|
||||||
|
1744073 2016-01-31 14:07:12.490000000 two-2.txt
|
||||||
|
6048320 2016-01-31 14:07:02.111000000 two-3.txt
|
||||||
|
6579 2016-01-31 12:50:30.318000000 one.txt
|
||||||
|
```
|
||||||
|
|
||||||
### rclone config ###
|
### rclone config ###
|
||||||
|
|
||||||
Enter an interactive configuration session.
|
Enter an interactive configuration session.
|
||||||
|
19
fs/config.go
19
fs/config.go
@ -413,6 +413,25 @@ func Choose(what string, defaults, help []string, newOk bool) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ChooseNumber asks the user to enter a number between min and max
|
||||||
|
// inclusive prompting them with what.
|
||||||
|
func ChooseNumber(what string, min, max int) int {
|
||||||
|
for {
|
||||||
|
fmt.Printf("%s> ", what)
|
||||||
|
result := ReadLine()
|
||||||
|
i, err := strconv.Atoi(result)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Bad number: %v\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if i < min || i > max {
|
||||||
|
fmt.Printf("Out of range - %d to %d inclusive\n", min, max)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ShowRemote shows the contents of the remote
|
// ShowRemote shows the contents of the remote
|
||||||
func ShowRemote(name string) {
|
func ShowRemote(name string) {
|
||||||
fmt.Printf("--------------------\n")
|
fmt.Printf("--------------------\n")
|
||||||
|
@ -908,3 +908,62 @@ func Delete(f Fs) error {
|
|||||||
close(delete)
|
close(delete)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deduplicate interactively finds duplicate files and offers to
|
||||||
|
// delete all but one or rename them to be different. Only useful with
|
||||||
|
// Google Drive which can have duplicate file names.
|
||||||
|
func Deduplicate(f Fs) error {
|
||||||
|
mover, ok := f.(Mover)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("%v can't Move files", f)
|
||||||
|
}
|
||||||
|
Log(f, "Looking for duplicates")
|
||||||
|
files := map[string][]Object{}
|
||||||
|
for o := range f.List() {
|
||||||
|
remote := o.Remote()
|
||||||
|
files[remote] = append(files[remote], o)
|
||||||
|
}
|
||||||
|
for remote, objs := range files {
|
||||||
|
if len(objs) > 1 {
|
||||||
|
fmt.Printf("%s: Found %d duplicates\n", remote, len(objs))
|
||||||
|
for i, o := range objs {
|
||||||
|
md5sum, err := o.Hash(HashMD5)
|
||||||
|
if err != nil {
|
||||||
|
md5sum = err.Error()
|
||||||
|
}
|
||||||
|
fmt.Printf(" %d: %12d bytes, %s, md5sum %32s\n", i+1, o.Size(), o.ModTime().Format("2006-01-02 15:04:05.000000000"), md5sum)
|
||||||
|
}
|
||||||
|
switch Command([]string{"sSkip and do nothing", "kKeep just one (choose which in next step)", "rRename all to be different (by changing file.jpg to file-1.jpg)"}) {
|
||||||
|
case 's':
|
||||||
|
case 'k':
|
||||||
|
keep := ChooseNumber("Enter the number of the file to keep", 1, len(objs))
|
||||||
|
deleted := 0
|
||||||
|
for i, o := range objs {
|
||||||
|
if i+1 == keep {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err := o.Remove()
|
||||||
|
if err != nil {
|
||||||
|
ErrorLog(o, "Failed to delete: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
deleted++
|
||||||
|
}
|
||||||
|
fmt.Printf("%s: Deleted %d extra copies\n", remote, deleted)
|
||||||
|
case 'r':
|
||||||
|
ext := path.Ext(remote)
|
||||||
|
base := remote[:len(remote)-len(ext)]
|
||||||
|
for i, o := range objs {
|
||||||
|
newName := fmt.Sprintf("%s-%d%s", base, i+1, ext)
|
||||||
|
newObj, err := mover.Move(o, newName)
|
||||||
|
if err != nil {
|
||||||
|
ErrorLog(o, "Failed to rename: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Printf("%v: renamed from: %v\n", newObj, o)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
13
rclone.go
13
rclone.go
@ -240,6 +240,19 @@ var Commands = []Command{
|
|||||||
MinArgs: 2,
|
MinArgs: 2,
|
||||||
MaxArgs: 2,
|
MaxArgs: 2,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "dedupe",
|
||||||
|
ArgsHelp: "remote:path",
|
||||||
|
Help: `
|
||||||
|
Interactively find duplicate files and offer to delete all
|
||||||
|
but one or rename them to be different. Only useful with
|
||||||
|
Google Drive which can have duplicate file names.`,
|
||||||
|
Run: func(fdst, fsrc fs.Fs) error {
|
||||||
|
return fs.Deduplicate(fdst)
|
||||||
|
},
|
||||||
|
MinArgs: 1,
|
||||||
|
MaxArgs: 1,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Name: "config",
|
Name: "config",
|
||||||
Help: `
|
Help: `
|
||||||
|
Loading…
Reference in New Issue
Block a user