2016-08-05 18:12:27 +02:00
|
|
|
package dedupe
|
|
|
|
|
|
|
|
import (
|
2019-06-17 10:34:30 +02:00
|
|
|
"context"
|
2016-08-05 18:12:27 +02:00
|
|
|
"log"
|
|
|
|
|
2019-07-28 19:47:38 +02:00
|
|
|
"github.com/rclone/rclone/cmd"
|
2019-10-11 17:55:04 +02:00
|
|
|
"github.com/rclone/rclone/fs/config/flags"
|
2019-07-28 19:47:38 +02:00
|
|
|
"github.com/rclone/rclone/fs/operations"
|
2016-08-05 18:12:27 +02:00
|
|
|
"github.com/spf13/cobra"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2018-01-12 17:30:54 +01:00
|
|
|
dedupeMode = operations.DeduplicateInteractive
|
2016-08-05 18:12:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
2019-10-11 17:58:11 +02:00
|
|
|
cmd.Root.AddCommand(commandDefinition)
|
2019-10-11 17:55:04 +02:00
|
|
|
cmdFlag := commandDefinition.Flags()
|
2020-01-19 12:09:45 +01:00
|
|
|
flags.FVarP(cmdFlag, &dedupeMode, "dedupe-mode", "", "Dedupe mode interactive|skip|first|newest|oldest|largest|smallest|rename.")
|
2016-08-05 18:12:27 +02:00
|
|
|
}
|
|
|
|
|
2019-10-11 17:58:11 +02:00
|
|
|
var commandDefinition = &cobra.Command{
|
2016-08-05 18:12:27 +02:00
|
|
|
Use: "dedupe [mode] remote:path",
|
2017-10-16 10:00:50 +02:00
|
|
|
Short: `Interactively find duplicate files and delete/rename them.`,
|
2016-08-05 18:12:27 +02:00
|
|
|
Long: `
|
2017-07-24 12:13:32 +02:00
|
|
|
By default ` + "`" + `dedupe` + "`" + ` interactively finds duplicate files and offers to
|
2016-08-05 18:12:27 +02:00
|
|
|
delete all but one or rename them to be different. Only useful with
|
|
|
|
Google Drive which can have duplicate file names.
|
|
|
|
|
2017-08-02 22:34:22 +02:00
|
|
|
In the first pass it will merge directories with the same name. It
|
|
|
|
will do this iteratively until all the identical directories have been
|
|
|
|
merged.
|
|
|
|
|
2016-08-05 18:12:27 +02:00
|
|
|
The ` + "`" + `dedupe` + "`" + ` command will delete all but one of any identical (same
|
|
|
|
md5sum) files it finds without confirmation. This means that for most
|
2020-06-05 18:04:23 +02:00
|
|
|
duplicated files the ` + "`" + `dedupe` + "`" + ` command will not be interactive.
|
|
|
|
|
|
|
|
**Important**: Since this can cause data loss, test first with the
|
|
|
|
` + "`--dry-run` or the `--interactive`/`-i`" + ` flag.
|
2016-08-05 18:12:27 +02:00
|
|
|
|
|
|
|
Here is an example run.
|
|
|
|
|
|
|
|
Before - with duplicates
|
|
|
|
|
|
|
|
$ rclone lsl drive:dupes
|
|
|
|
6048320 2016-03-05 16:23:16.798000000 one.txt
|
|
|
|
6048320 2016-03-05 16:23:11.775000000 one.txt
|
|
|
|
564374 2016-03-05 16:23:06.731000000 one.txt
|
|
|
|
6048320 2016-03-05 16:18:26.092000000 one.txt
|
|
|
|
6048320 2016-03-05 16:22:46.185000000 two.txt
|
|
|
|
1744073 2016-03-05 16:22:38.104000000 two.txt
|
|
|
|
564374 2016-03-05 16:22:52.118000000 two.txt
|
|
|
|
|
|
|
|
Now the ` + "`" + `dedupe` + "`" + ` session
|
|
|
|
|
|
|
|
$ rclone dedupe drive:dupes
|
|
|
|
2016/03/05 16:24:37 Google drive root 'dupes': Looking for duplicates using interactive mode.
|
|
|
|
one.txt: Found 4 duplicates - deleting identical copies
|
|
|
|
one.txt: Deleting 2/3 identical duplicates (md5sum "1eedaa9fe86fd4b8632e2ac549403b36")
|
|
|
|
one.txt: 2 duplicates remain
|
|
|
|
1: 6048320 bytes, 2016-03-05 16:23:16.798000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
|
|
|
|
2: 564374 bytes, 2016-03-05 16:23:06.731000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
|
|
|
|
s) Skip and do nothing
|
|
|
|
k) Keep just one (choose which in next step)
|
|
|
|
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
|
|
|
s/k/r> k
|
|
|
|
Enter the number of the file to keep> 1
|
|
|
|
one.txt: Deleted 1 extra copies
|
|
|
|
two.txt: Found 3 duplicates - deleting identical copies
|
|
|
|
two.txt: 3 duplicates remain
|
|
|
|
1: 564374 bytes, 2016-03-05 16:22:52.118000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
|
|
|
|
2: 6048320 bytes, 2016-03-05 16:22:46.185000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
|
|
|
|
3: 1744073 bytes, 2016-03-05 16:22:38.104000000, md5sum 851957f7fb6f0bc4ce76be966d336802
|
|
|
|
s) Skip and do nothing
|
|
|
|
k) Keep just one (choose which in next step)
|
|
|
|
r) Rename all to be different (by changing file.jpg to file-1.jpg)
|
|
|
|
s/k/r> r
|
|
|
|
two-1.txt: renamed from: two.txt
|
|
|
|
two-2.txt: renamed from: two.txt
|
|
|
|
two-3.txt: renamed from: two.txt
|
|
|
|
|
|
|
|
The result being
|
|
|
|
|
|
|
|
$ rclone lsl drive:dupes
|
|
|
|
6048320 2016-03-05 16:23:16.798000000 one.txt
|
|
|
|
564374 2016-03-05 16:22:52.118000000 two-1.txt
|
|
|
|
6048320 2016-03-05 16:22:46.185000000 two-2.txt
|
|
|
|
1744073 2016-03-05 16:22:38.104000000 two-3.txt
|
|
|
|
|
|
|
|
Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" + ` flag or by using an extra parameter with the same value
|
|
|
|
|
|
|
|
* ` + "`" + `--dedupe-mode interactive` + "`" + ` - interactive as above.
|
|
|
|
* ` + "`" + `--dedupe-mode skip` + "`" + ` - removes identical files then skips anything left.
|
|
|
|
* ` + "`" + `--dedupe-mode first` + "`" + ` - removes identical files then keeps the first one.
|
|
|
|
* ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
|
|
|
|
* ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
|
2018-04-21 23:57:08 +02:00
|
|
|
* ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
|
2020-01-16 14:47:15 +01:00
|
|
|
* ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one.
|
2016-08-05 18:12:27 +02:00
|
|
|
* ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
|
|
|
|
|
|
|
|
For example to rename all the identically named photos in your Google Photos directory, do
|
|
|
|
|
|
|
|
rclone dedupe --dedupe-mode rename "drive:Google Photos"
|
|
|
|
|
|
|
|
Or
|
|
|
|
|
|
|
|
rclone dedupe rename "drive:Google Photos"
|
|
|
|
`,
|
|
|
|
Run: func(command *cobra.Command, args []string) {
|
|
|
|
cmd.CheckArgs(1, 2, command, args)
|
|
|
|
if len(args) > 1 {
|
|
|
|
err := dedupeMode.Set(args[0])
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
args = args[1:]
|
|
|
|
}
|
|
|
|
fdst := cmd.NewFsSrc(args)
|
2016-12-04 17:52:24 +01:00
|
|
|
cmd.Run(false, false, command, func() error {
|
2019-06-17 10:34:30 +02:00
|
|
|
return operations.Deduplicate(context.Background(), fdst, dedupeMode)
|
2016-08-05 18:12:27 +02:00
|
|
|
})
|
|
|
|
},
|
|
|
|
}
|