check: Add --download flag to check all the data, not just hashes

This commit is contained in:
Nick Craig-Wood 2017-02-13 10:48:26 +00:00
parent 370f242fa2
commit 541929258b
4 changed files with 245 additions and 34 deletions

View File

@ -6,24 +6,39 @@ import (
"github.com/spf13/cobra"
)
// Globals
var (
download = false
)
func init() {
cmd.Root.AddCommand(commandDefintion)
commandDefintion.Flags().BoolVarP(&download, "download", "", download, "Check by downloading rather than with hash.")
}
var commandDefintion = &cobra.Command{
Use: "check source:path dest:path",
Short: `Checks the files in the source and destination match.`,
Long: `
Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination.
Checks the files in the source and destination match. It compares
sizes and hashes (MD5 or SHA1) and logs a report of files which don't
match. It doesn't alter the source or destination.
` + "`" + `--size-only` + "`" + ` may be used to only compare the sizes, not the MD5SUMs.
If you supply the --size-only flag, it will only compare the sizes not
the hashes as well. Use this for a quick check.
If you supply the --download flag, it will download the data from
both remotes and check them against each other on the fly. This can
be useful for remotes that don't support hashes or if you really want
to check all the data.
`,
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(2, 2, command, args)
fsrc, fdst := cmd.NewFsSrcDst(args)
cmd.Run(false, false, command, func() error {
if download {
return fs.CheckDownload(fdst, fsrc)
}
return fs.Check(fdst, fsrc)
})
},

View File

@ -68,13 +68,6 @@ func cryptCheck(fdst, fsrc fs.Fs) error {
// it returns true if differences were found
// it also returns whether it couldn't be hashed
checkIdentical := func(dst, src fs.Object) (differ bool, noHash bool) {
fs.Stats.Checking(src.Remote())
defer fs.Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
fs.Stats.Error()
fs.Errorf(src, "Sizes differ")
return true, false
}
cryptDst := dst.(*crypt.Object)
underlyingDst := cryptDst.UnWrap()
underlyingHash, err := underlyingDst.Hash(hashType)

View File

@ -3,6 +3,7 @@
package fs
import (
"bytes"
"fmt"
"io"
"log"
@ -669,14 +670,6 @@ func Overlapping(fdst, fsrc Info) bool {
// it returns true if differences were found
// it also returns whether it couldn't be hashed
func checkIdentical(dst, src Object) (differ bool, noHash bool) {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
Stats.Error()
Errorf(src, "Sizes differ")
return true, false
}
if !Config.SizeOnly {
same, hash, err := CheckHashes(src, dst)
if err != nil {
// CheckHashes will log and count errors
@ -690,8 +683,6 @@ func checkIdentical(dst, src Object) (differ bool, noHash bool) {
Errorf(src, "%v differ", hash)
return true, false
}
}
Debugf(src, "OK")
return false, false
}
@ -746,15 +737,31 @@ func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash
close(checks)
}()
checkIdentical := func(dst, src Object) (differ bool, noHash bool) {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
Stats.Error()
Errorf(src, "Sizes differ")
return true, false
}
if Config.SizeOnly {
return false, false
}
return checkFunction(dst, src)
}
var checkerWg sync.WaitGroup
checkerWg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go func() {
defer checkerWg.Done()
for check := range checks {
differ, noHash := checkFunction(check[0], check[1])
differ, noHash := checkIdentical(check[0], check[1])
if differ {
atomic.AddInt32(&differences, 1)
} else {
Debugf(check[0], "OK")
}
if noHash {
atomic.AddInt32(&noHashes, 1)
@ -780,6 +787,88 @@ func Check(fdst, fsrc Fs) error {
return CheckFn(fdst, fsrc, checkIdentical)
}
// ReadFill reads as much data from r into buf as it can
//
// It reads until the buffer is full or r.Read returned an error.
//
// This is io.ReadFull but when you just want as much data as
// possible, not an exact size of block.
func ReadFill(r io.Reader, buf []byte) (n int, err error) {
var nn int
for n < len(buf) && err == nil {
nn, err = r.Read(buf[n:])
n += nn
}
return n, err
}
// CheckEqualReaders checks to see if in1 and in2 have the same
// content when read.
//
// it returns true if differences were found
func CheckEqualReaders(in1, in2 io.Reader) (differ bool, err error) {
const bufSize = 64 * 1024
buf1 := make([]byte, bufSize)
buf2 := make([]byte, bufSize)
for {
n1, err1 := ReadFill(in1, buf1)
n2, err2 := ReadFill(in2, buf2)
// check errors
if err1 != nil && err1 != io.EOF {
return true, err1
} else if err2 != nil && err2 != io.EOF {
return true, err2
}
// err1 && err2 are nil or io.EOF here
// process the data
if n1 != n2 || !bytes.Equal(buf1[:n1], buf2[:n2]) {
return true, nil
}
// if both streams finished the we have finished
if err1 == io.EOF && err2 == io.EOF {
break
}
}
return false, nil
}
// CheckIdentical checks to see if dst and src are identical by
// reading all their bytes if necessary.
//
// it returns true if differences were found
func CheckIdentical(dst, src Object) (differ bool, err error) {
in1, err := dst.Open()
if err != nil {
return true, errors.Wrapf(err, "failed to open %q", dst)
}
in1 = NewAccountWithBuffer(in1, dst) // account and buffer the transfer
defer CheckClose(in1, &err)
in2, err := src.Open()
if err != nil {
return true, errors.Wrapf(err, "failed to open %q", src)
}
in2 = NewAccountWithBuffer(in2, src) // account and buffer the transfer
defer CheckClose(in2, &err)
return CheckEqualReaders(in1, in2)
}
// CheckDownload checks the files in fsrc and fdst according to Size
// and the actual contents of the files.
func CheckDownload(fdst, fsrc Fs) error {
check := func(a, b Object) (differ bool, noHash bool) {
differ, err := CheckIdentical(a, b)
if err != nil {
Stats.Error()
Errorf(a, "Failed to download: %v", err)
return true, true
}
return differ, false
}
return CheckFn(fdst, fsrc, check)
}
// ListFn lists the Fs to the supplied function
//
// Lists in parallel which may get them out of order

View File

@ -21,8 +21,10 @@ package fs_test
import (
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
@ -469,14 +471,14 @@ func TestDelete(t *testing.T) {
fstest.CheckItems(t, r.fremote, file3)
}
func TestCheck(t *testing.T) {
func testCheck(t *testing.T, checkFunction func(fdst, fsrc fs.Fs) error) {
r := NewRun(t)
defer r.Finalise()
check := func(i int, wantErrors int64) {
fs.Debugf(r.fremote, "%d: Starting check test", i)
oldErrors := fs.Stats.GetErrors()
err := fs.Check(r.flocal, r.fremote)
err := checkFunction(r.flocal, r.fremote)
gotErrors := fs.Stats.GetErrors() - oldErrors
if wantErrors == 0 && err != nil {
t.Errorf("%d: Got error when not expecting one: %v", i, err)
@ -517,6 +519,14 @@ func TestCheck(t *testing.T) {
check(5, 0)
}
func TestCheck(t *testing.T) {
testCheck(t, fs.Check)
}
func TestCheckDownload(t *testing.T) {
testCheck(t, fs.CheckDownload)
}
func TestCheckSizeOnly(t *testing.T) {
fs.Config.SizeOnly = true
defer func() { fs.Config.SizeOnly = false }()
@ -954,3 +964,107 @@ func TestListDirSorted(t *testing.T) {
require.Len(t, items, 1)
assert.Equal(t, "sub dir/sub sub dir/", str(0))
}
type byteReader struct {
c byte
}
func (br *byteReader) Read(p []byte) (n int, err error) {
if br.c == 0 {
err = io.EOF
} else if len(p) >= 1 {
p[0] = br.c
n = 1
br.c--
}
return
}
func TestReadFill(t *testing.T) {
buf := []byte{9, 9, 9, 9, 9}
n, err := fs.ReadFill(&byteReader{0}, buf)
assert.Equal(t, io.EOF, err)
assert.Equal(t, 0, n)
assert.Equal(t, []byte{9, 9, 9, 9, 9}, buf)
n, err = fs.ReadFill(&byteReader{3}, buf)
assert.Equal(t, io.EOF, err)
assert.Equal(t, 3, n)
assert.Equal(t, []byte{3, 2, 1, 9, 9}, buf)
n, err = fs.ReadFill(&byteReader{8}, buf)
assert.Equal(t, nil, err)
assert.Equal(t, 5, n)
assert.Equal(t, []byte{8, 7, 6, 5, 4}, buf)
}
type errorReader struct {
err error
}
func (er errorReader) Read(p []byte) (n int, err error) {
return 0, er.err
}
func TestCheckEqualReaders(t *testing.T) {
b65a := make([]byte, 65*1024)
b65b := make([]byte, 65*1024)
b65b[len(b65b)-1] = 1
b66 := make([]byte, 66*1024)
differ, err := fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65a))
assert.NoError(t, err)
assert.Equal(t, differ, false)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65b))
assert.NoError(t, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b66))
assert.NoError(t, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), bytes.NewBuffer(b65a))
assert.NoError(t, err)
assert.Equal(t, differ, true)
myErr := errors.New("sentinel")
wrap := func(b []byte) io.Reader {
r := bytes.NewBuffer(b)
e := errorReader{myErr}
return io.MultiReader(r, e)
}
differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65b))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b66))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(wrap(b66), bytes.NewBuffer(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65b))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b66))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), wrap(b65a))
assert.Equal(t, myErr, err)
assert.Equal(t, differ, true)
}