operations: add operations/check to the rc API

Fixes #7015
This commit is contained in:
Nick Craig-Wood 2023-05-24 10:51:30 +01:00
parent c12085b265
commit a752563842
3 changed files with 308 additions and 5 deletions

View File

@ -24,15 +24,18 @@ func init() {
} }
var commandDefinition = &cobra.Command{ var commandDefinition = &cobra.Command{
Use: "checksum <hash> sumfile src:path", Use: "checksum <hash> sumfile dst:path",
Short: `Checks the files in the source against a SUM file.`, Short: `Checks the files in the destination against a SUM file.`,
Long: strings.ReplaceAll(` Long: strings.ReplaceAll(`
Checks that hashsums of source files match the SUM file. Checks that hashsums of destination files match the SUM file.
It compares hashes (MD5, SHA1, etc) and logs a report of files which It compares hashes (MD5, SHA1, etc) and logs a report of files which
don't match. It doesn't alter the file system. don't match. It doesn't alter the file system.
If you supply the |--download| flag, it will download the data from remote The sumfile is treated as the source and the dst:path is treated as
and calculate the contents hash on the fly. This can be useful for remotes the destination for the purposes of the output.
If you supply the |--download| flag, it will download the data from the remote
and calculate the content hash on the fly. This can be useful for remotes
that don't support hashes or if you really want to check all the data. that don't support hashes or if you really want to check all the data.
Note that hash values in the SUM file are treated as case insensitive. Note that hash values in the SUM file are treated as case insensitive.

View File

@ -2,6 +2,7 @@ package operations
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"io" "io"
"mime" "mime"
@ -13,6 +14,7 @@ import (
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config" "github.com/rclone/rclone/fs/config"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/rc" "github.com/rclone/rclone/fs/rc"
"github.com/rclone/rclone/lib/diskusage" "github.com/rclone/rclone/lib/diskusage"
) )
@ -697,3 +699,180 @@ func rcDu(ctx context.Context, in rc.Params) (out rc.Params, err error) {
} }
return out, nil return out, nil
} }
func init() {
rc.Add(rc.Call{
Path: "operations/check",
AuthRequired: true,
Fn: rcCheck,
Title: "check the source and destination are the same",
Help: `Checks the files in the source and destination match. It compares
sizes and hashes and logs a report of files that don't
match. It doesn't alter the source or destination.
This takes the following parameters:
- srcFs - a remote name string e.g. "drive:" for the source, "/" for local filesystem
- dstFs - a remote name string e.g. "drive2:" for the destination, "/" for local filesystem
- download - check by downloading rather than with hash
- checkFileHash - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type
- checkFileFs - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type
- checkFileRemote - treat checkFileFs:checkFileRemote as a SUM file with hashes of given type
- oneWay - check one way only, source files must exist on remote
- combined - make a combined report of changes (default false)
- missingOnSrc - report all files missing from the source (default true)
- missingOnDst - report all files missing from the destination (default true)
- match - report all matching files (default false)
- differ - report all non-matching files (default true)
- error - report all files with errors (hashing or reading) (default true)
If you supply the download flag, it will download the data from
both remotes and check them against each other on the fly. This can
be useful for remotes that don't support hashes or if you really want
to check all the data.
If you supply the size-only global flag, it will only compare the sizes not
the hashes as well. Use this for a quick check.
If you supply the checkFileHash option with a valid hash name, the
checkFileFs:checkFileRemote must point to a text file in the SUM
format. This treats the checksum file as the source and dstFs as the
destination. Note that srcFs is not used and should not be supplied in
this case.
Returns:
- success - true if no error, false otherwise
- status - textual summary of check, OK or text string
- hashType - hash used in check, may be missing
- combined - array of strings of combined report of changes
- missingOnSrc - array of strings of all files missing from the source
- missingOnDst - array of strings of all files missing from the destination
- match - array of strings of all matching files
- differ - array of strings of all non-matching files
- error - array of strings of all files with errors (hashing or reading)
`,
})
}
// Writer which writes into the slice provided
type stringWriter struct {
out *[]string
}
// Write writes len(p) bytes from p to the underlying data stream. It returns
// the number of bytes written from p (0 <= n <= len(p)) and any error
// encountered that caused the write to stop early. Write must return a non-nil
// error if it returns n < len(p). Write must not modify the slice data,
// even temporarily.
//
// Implementations must not retain p.
func (s stringWriter) Write(p []byte) (n int, err error) {
result := string(p)
result = strings.TrimSuffix(result, "\n")
*s.out = append(*s.out, result)
return len(p), nil
}
// Check two directories
func rcCheck(ctx context.Context, in rc.Params) (out rc.Params, err error) {
srcFs, err := rc.GetFsNamed(ctx, in, "srcFs")
if err != nil && !rc.IsErrParamNotFound(err) {
return nil, err
}
dstFs, err := rc.GetFsNamed(ctx, in, "dstFs")
if err != nil {
return nil, err
}
checkFileFs, checkFileRemote, err := rc.GetFsAndRemoteNamed(ctx, in, "checkFileFs", "checkFileRemote")
if err != nil && !rc.IsErrParamNotFound(err) {
return nil, err
}
checkFileHash, err := in.GetString("checkFileHash")
if err != nil && !rc.IsErrParamNotFound(err) {
return nil, err
}
checkFileSet := 0
if checkFileHash != "" {
checkFileSet++
}
if checkFileFs != nil {
checkFileSet++
}
if checkFileRemote != "" {
checkFileSet++
}
if checkFileSet > 0 && checkFileSet < 3 {
return nil, fmt.Errorf("need all of checkFileFs, checkFileRemote, checkFileHash to be set together")
}
var checkFileHashType hash.Type
if checkFileHash != "" {
if err := checkFileHashType.Set(checkFileHash); err != nil {
return nil, err
}
if srcFs != nil {
return nil, rc.NewErrParamInvalid(errors.New("only supply dstFs when using checkFileHash"))
}
} else {
if srcFs == nil {
return nil, rc.NewErrParamInvalid(errors.New("need srcFs parameter when not using checkFileHash"))
}
}
oneway, _ := in.GetBool("oneway")
download, _ := in.GetBool("download")
opt := &CheckOpt{
Fsrc: srcFs,
Fdst: dstFs,
OneWay: oneway,
}
out = rc.Params{}
getOutput := func(name string, Default bool) io.Writer {
active, err := in.GetBool(name)
if err != nil {
active = Default
}
if !active {
return nil
}
result := []string{}
out[name] = &result
return stringWriter{&result}
}
opt.Combined = getOutput("combined", false)
opt.MissingOnSrc = getOutput("missingOnSrc", true)
opt.MissingOnDst = getOutput("missingOnDst", true)
opt.Match = getOutput("match", false)
opt.Differ = getOutput("differ", true)
opt.Error = getOutput("error", true)
if checkFileHash != "" {
out["hashType"] = checkFileHashType.String()
err = CheckSum(context.Background(), dstFs, checkFileFs, checkFileRemote, checkFileHashType, opt, download)
} else {
if download {
err = CheckDownload(context.Background(), opt)
} else {
out["hashType"] = srcFs.Hashes().Overlap(dstFs.Hashes()).GetOne().String()
err = Check(context.Background(), opt)
}
}
if err != nil {
out["status"] = err.Error()
out["success"] = false
} else {
out["status"] = "OK"
out["success"] = true
}
return out, nil
}

View File

@ -7,6 +7,8 @@ import (
"net/url" "net/url"
"os" "os"
"path" "path"
"sort"
"strings"
"testing" "testing"
"time" "time"
@ -658,3 +660,122 @@ func TestRcDu(t *testing.T) {
assert.True(t, info.Total > info.Available) assert.True(t, info.Total > info.Available)
assert.True(t, info.Free >= info.Available) assert.True(t, info.Free >= info.Available)
} }
// operations/check: check the source and destination are the same
func TestRcCheck(t *testing.T) {
ctx := context.Background()
r, call := rcNewRun(t, "operations/check")
r.Mkdir(ctx, r.Fremote)
MD5SUMS := `
0ef726ce9b1a7692357ff70dd321d595 file1
deadbeefcafe00000000000000000000 subdir/file2
0386a8b8fcf672c326845c00ba41b9e2 subdir/subsubdir/file4
`
file1 := r.WriteBoth(ctx, "file1", "file1 contents", t1)
file2 := r.WriteFile("subdir/file2", MD5SUMS, t2)
file3 := r.WriteObject(ctx, "subdir/subsubdir/file3", "file3 contents", t3)
file4a := r.WriteFile("subdir/subsubdir/file4", "file4 contents", t3)
file4b := r.WriteObject(ctx, "subdir/subsubdir/file4", "file4 different contents", t3)
// operations.HashLister(ctx, hash.MD5, false, false, r.Fremote, os.Stdout)
r.CheckLocalItems(t, file1, file2, file4a)
r.CheckRemoteItems(t, file1, file3, file4b)
pstring := func(items ...fstest.Item) *[]string {
xs := make([]string, len(items))
for i, item := range items {
xs[i] = item.Path
}
return &xs
}
for _, testName := range []string{"Normal", "Download"} {
t.Run(testName, func(t *testing.T) {
in := rc.Params{
"srcFs": r.LocalName,
"dstFs": r.FremoteName,
"combined": true,
"missingOnSrc": true,
"missingOnDst": true,
"match": true,
"differ": true,
"error": true,
}
if testName == "Download" {
in["download"] = true
}
out, err := call.Fn(ctx, in)
require.NoError(t, err)
combined := []string{
"= " + file1.Path,
"+ " + file2.Path,
"- " + file3.Path,
"* " + file4a.Path,
}
sort.Strings(combined)
sort.Strings(*out["combined"].(*[]string))
want := rc.Params{
"missingOnSrc": pstring(file3),
"missingOnDst": pstring(file2),
"differ": pstring(file4a),
"error": pstring(),
"match": pstring(file1),
"combined": &combined,
"status": "3 differences found",
"success": false,
}
if testName == "Normal" {
want["hashType"] = "md5"
}
assert.Equal(t, want, out)
})
}
t.Run("CheckFile", func(t *testing.T) {
// The checksum file is treated as the source and srcFs is not used
in := rc.Params{
"dstFs": r.FremoteName,
"combined": true,
"missingOnSrc": true,
"missingOnDst": true,
"match": true,
"differ": true,
"error": true,
"checkFileFs": r.LocalName,
"checkFileRemote": file2.Path,
"checkFileHash": "md5",
}
out, err := call.Fn(ctx, in)
require.NoError(t, err)
combined := []string{
"= " + file1.Path,
"+ " + file2.Path,
"- " + file3.Path,
"* " + file4a.Path,
}
sort.Strings(combined)
sort.Strings(*out["combined"].(*[]string))
if strings.HasPrefix(out["status"].(string), "file not in") {
out["status"] = "file not in"
}
want := rc.Params{
"missingOnSrc": pstring(file3),
"missingOnDst": pstring(file2),
"differ": pstring(file4a),
"error": pstring(),
"match": pstring(file1),
"combined": &combined,
"hashType": "md5",
"status": "file not in",
"success": false,
}
assert.Equal(t, want, out)
})
}