mirror of
https://github.com/rclone/rclone.git
synced 2025-01-22 06:09:21 +01:00
dedupe command to deduplicate a remote. Useful with google drive - fixes #41
This commit is contained in:
parent
1373efaa39
commit
0f73129ab7
@ -136,6 +136,47 @@ Checks the files in the source and destination match. It
|
||||
compares sizes and MD5SUMs and prints a report of files which
|
||||
don't match. It doesn't alter the source or destination.
|
||||
|
||||
### rclone dedupe remote:path ###
|
||||
|
||||
Interactively find duplicate files and offer to delete all but one or
|
||||
rename them to be different. Only useful with Google Drive which can
|
||||
have duplicate file names.
|
||||
|
||||
```
|
||||
$ rclone dedupe drive:dupes
|
||||
2016/01/31 14:13:11 Google drive root 'dupes': Looking for duplicates
|
||||
two.txt: Found 3 duplicates
|
||||
1: 564374 bytes, 2016-01-31 14:07:22.159000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
|
||||
2: 1744073 bytes, 2016-01-31 14:07:12.490000000, md5sum 851957f7fb6f0bc4ce76be966d336802
|
||||
3: 6048320 bytes, 2016-01-31 14:07:02.111000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
|
||||
s) Skip and do nothing
|
||||
k) Keep just one (choose which in next step)
|
||||
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
||||
s/k/r> r
|
||||
two-1.txt: renamed from: two.txt
|
||||
two-2.txt: renamed from: two.txt
|
||||
two-3.txt: renamed from: two.txt
|
||||
one.txt: Found 2 duplicates
|
||||
1: 6579 bytes, 2016-01-31 14:05:01.235000000, md5sum 2b76c776249409d925ae7ccd49aea59b
|
||||
2: 6579 bytes, 2016-01-31 12:50:30.318000000, md5sum 2b76c776249409d925ae7ccd49aea59b
|
||||
s) Skip and do nothing
|
||||
k) Keep just one (choose which in next step)
|
||||
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
||||
s/k/r> k
|
||||
Enter the number of the file to keep> 2
|
||||
one.txt: Deleted 1 extra copies
|
||||
```
|
||||
|
||||
The result being
|
||||
|
||||
```
|
||||
$ rclone lsl drive:dupes
|
||||
564374 2016-01-31 14:07:22.159000000 two-1.txt
|
||||
1744073 2016-01-31 14:07:12.490000000 two-2.txt
|
||||
6048320 2016-01-31 14:07:02.111000000 two-3.txt
|
||||
6579 2016-01-31 12:50:30.318000000 one.txt
|
||||
```
|
||||
|
||||
### rclone config ###
|
||||
|
||||
Enter an interactive configuration session.
|
||||
|
19
fs/config.go
19
fs/config.go
@ -413,6 +413,25 @@ func Choose(what string, defaults, help []string, newOk bool) string {
|
||||
}
|
||||
}
|
||||
|
||||
// ChooseNumber asks the user to enter a number between min and max
|
||||
// inclusive prompting them with what.
|
||||
func ChooseNumber(what string, min, max int) int {
|
||||
for {
|
||||
fmt.Printf("%s> ", what)
|
||||
result := ReadLine()
|
||||
i, err := strconv.Atoi(result)
|
||||
if err != nil {
|
||||
fmt.Printf("Bad number: %v\n", err)
|
||||
continue
|
||||
}
|
||||
if i < min || i > max {
|
||||
fmt.Printf("Out of range - %d to %d inclusive\n", min, max)
|
||||
continue
|
||||
}
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
// ShowRemote shows the contents of the remote
|
||||
func ShowRemote(name string) {
|
||||
fmt.Printf("--------------------\n")
|
||||
|
@ -908,3 +908,62 @@ func Delete(f Fs) error {
|
||||
close(delete)
|
||||
return err
|
||||
}
|
||||
|
||||
// Deduplicate interactively finds duplicate files and offers to
|
||||
// delete all but one or rename them to be different. Only useful with
|
||||
// Google Drive which can have duplicate file names.
|
||||
func Deduplicate(f Fs) error {
|
||||
mover, ok := f.(Mover)
|
||||
if !ok {
|
||||
return fmt.Errorf("%v can't Move files", f)
|
||||
}
|
||||
Log(f, "Looking for duplicates")
|
||||
files := map[string][]Object{}
|
||||
for o := range f.List() {
|
||||
remote := o.Remote()
|
||||
files[remote] = append(files[remote], o)
|
||||
}
|
||||
for remote, objs := range files {
|
||||
if len(objs) > 1 {
|
||||
fmt.Printf("%s: Found %d duplicates\n", remote, len(objs))
|
||||
for i, o := range objs {
|
||||
md5sum, err := o.Hash(HashMD5)
|
||||
if err != nil {
|
||||
md5sum = err.Error()
|
||||
}
|
||||
fmt.Printf(" %d: %12d bytes, %s, md5sum %32s\n", i+1, o.Size(), o.ModTime().Format("2006-01-02 15:04:05.000000000"), md5sum)
|
||||
}
|
||||
switch Command([]string{"sSkip and do nothing", "kKeep just one (choose which in next step)", "rRename all to be different (by changing file.jpg to file-1.jpg)"}) {
|
||||
case 's':
|
||||
case 'k':
|
||||
keep := ChooseNumber("Enter the number of the file to keep", 1, len(objs))
|
||||
deleted := 0
|
||||
for i, o := range objs {
|
||||
if i+1 == keep {
|
||||
continue
|
||||
}
|
||||
err := o.Remove()
|
||||
if err != nil {
|
||||
ErrorLog(o, "Failed to delete: %v", err)
|
||||
continue
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
fmt.Printf("%s: Deleted %d extra copies\n", remote, deleted)
|
||||
case 'r':
|
||||
ext := path.Ext(remote)
|
||||
base := remote[:len(remote)-len(ext)]
|
||||
for i, o := range objs {
|
||||
newName := fmt.Sprintf("%s-%d%s", base, i+1, ext)
|
||||
newObj, err := mover.Move(o, newName)
|
||||
if err != nil {
|
||||
ErrorLog(o, "Failed to rename: %v", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("%v: renamed from: %v\n", newObj, o)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
13
rclone.go
13
rclone.go
@ -240,6 +240,19 @@ var Commands = []Command{
|
||||
MinArgs: 2,
|
||||
MaxArgs: 2,
|
||||
},
|
||||
{
|
||||
Name: "dedupe",
|
||||
ArgsHelp: "remote:path",
|
||||
Help: `
|
||||
Interactively find duplicate files and offer to delete all
|
||||
but one or rename them to be different. Only useful with
|
||||
Google Drive which can have duplicate file names.`,
|
||||
Run: func(fdst, fsrc fs.Fs) error {
|
||||
return fs.Deduplicate(fdst)
|
||||
},
|
||||
MinArgs: 1,
|
||||
MaxArgs: 1,
|
||||
},
|
||||
{
|
||||
Name: "config",
|
||||
Help: `
|
||||
|
Loading…
Reference in New Issue
Block a user