mirror of
https://github.com/rclone/rclone.git
synced 2024-11-24 17:34:57 +01:00
411 lines
12 KiB
Go
411 lines
12 KiB
Go
// Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash.
|
|
//
|
|
// Note: This implementation does not grant access to any partial hashes generated.
|
|
//
|
|
// See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf
|
|
// (link to newest version: https://static.hidrive.com/dev/0001)
|
|
package hidrivehash
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha1"
|
|
"encoding"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"hash"
|
|
"io"
|
|
|
|
"github.com/rclone/rclone/backend/hidrive/hidrivehash/internal"
|
|
)
|
|
|
|
const (
|
|
// BlockSize of the checksum in bytes.
|
|
BlockSize = 4096
|
|
// Size of the checksum in bytes.
|
|
Size = sha1.Size
|
|
// sumsPerLevel is the number of checksums
|
|
sumsPerLevel = 256
|
|
)
|
|
|
|
var (
|
|
// zeroSum is a special hash consisting of 20 null-bytes.
|
|
// This will be the hash of any empty file (or ones containing only null-bytes).
|
|
zeroSum = [Size]byte{}
|
|
// ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid.
|
|
ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash")
|
|
// ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input.
|
|
ErrorHashFull = errors.New("hash reached its capacity")
|
|
)
|
|
|
|
// writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize.
|
|
// It returns the number of bytes written from p (0 <= n <= len(p))
|
|
// and any error encountered that caused the write to stop early.
|
|
//
|
|
// A pointer bytesInBlock to a counter needs to be supplied,
|
|
// that is used to keep track how many bytes have been written to the writer already.
|
|
// A pointer onlyNullBytesInBlock to a boolean needs to be supplied,
|
|
// that is used to keep track whether the block so far only consists of null-bytes.
|
|
// The callback onBlockWritten is called whenever a full block has been written to the writer
|
|
// and is given as input the number of bytes that still need to be written.
|
|
func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) {
|
|
total := len(p)
|
|
nullBytes := make([]byte, blockSize)
|
|
for len(p) > 0 {
|
|
toWrite := int(blockSize - *bytesInBlock)
|
|
if toWrite > len(p) {
|
|
toWrite = len(p)
|
|
}
|
|
c, err := writer.Write(p[:toWrite])
|
|
*bytesInBlock += uint32(c)
|
|
*onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite])
|
|
// Discard data written through a reslice
|
|
p = p[c:]
|
|
if err != nil {
|
|
return total - len(p), err
|
|
}
|
|
if *bytesInBlock == blockSize {
|
|
err = onBlockWritten(len(p))
|
|
if err != nil {
|
|
return total - len(p), err
|
|
}
|
|
*bytesInBlock = 0
|
|
*onlyNullBytesInBlock = true
|
|
}
|
|
}
|
|
return total, nil
|
|
}
|
|
|
|
// level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it.
|
|
// It is used to represent any level-n hash, except for level-0.
|
|
type level struct {
|
|
checksum [Size]byte // aggregated checksum of this level
|
|
sumCount uint32 // number of sums contained in this level so far
|
|
bytesInHasher uint32 // number of bytes written into hasher so far
|
|
onlyNullBytesInHasher bool // whether the hasher only contains null-bytes so far
|
|
hasher hash.Hash
|
|
}
|
|
|
|
// NewLevel returns a new hash.Hash computing any level-n hash, except level-0.
|
|
func NewLevel() hash.Hash {
|
|
l := &level{}
|
|
l.Reset()
|
|
return l
|
|
}
|
|
|
|
// Add takes a position-embedded SHA-1 checksum and adds it to the level.
|
|
func (l *level) Add(sha1sum []byte) {
|
|
var tmp uint
|
|
var carry bool
|
|
for i := Size - 1; i >= 0; i-- {
|
|
tmp = uint(sha1sum[i]) + uint(l.checksum[i])
|
|
if carry {
|
|
tmp++
|
|
}
|
|
carry = tmp > 255
|
|
l.checksum[i] = byte(tmp)
|
|
}
|
|
}
|
|
|
|
// IsFull returns whether the number of checksums added to this level reached its capacity.
|
|
func (l *level) IsFull() bool {
|
|
return l.sumCount >= sumsPerLevel
|
|
}
|
|
|
|
// Write (via the embedded io.Writer interface) adds more data to the running hash.
|
|
// Contrary to the specification from hash.Hash, this DOES return an error,
|
|
// specifically ErrorHashFull if and only if IsFull() returns true.
|
|
func (l *level) Write(p []byte) (n int, err error) {
|
|
if l.IsFull() {
|
|
return 0, ErrorHashFull
|
|
}
|
|
onBlockWritten := func(remaining int) error {
|
|
if !l.onlyNullBytesInHasher {
|
|
c, err := l.hasher.Write([]byte{byte(l.sumCount)})
|
|
l.bytesInHasher += uint32(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
l.Add(l.hasher.Sum(nil))
|
|
}
|
|
l.sumCount++
|
|
l.hasher.Reset()
|
|
if remaining > 0 && l.IsFull() {
|
|
return ErrorHashFull
|
|
}
|
|
return nil
|
|
}
|
|
return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten)
|
|
}
|
|
|
|
// Sum appends the current hash to b and returns the resulting slice.
|
|
// It does not change the underlying hash state.
|
|
func (l *level) Sum(b []byte) []byte {
|
|
return append(b, l.checksum[:]...)
|
|
}
|
|
|
|
// Reset resets the Hash to its initial state.
|
|
func (l *level) Reset() {
|
|
l.checksum = zeroSum // clear the current checksum
|
|
l.sumCount = 0
|
|
l.bytesInHasher = 0
|
|
l.onlyNullBytesInHasher = true
|
|
l.hasher = sha1.New()
|
|
}
|
|
|
|
// Size returns the number of bytes Sum will return.
|
|
func (l *level) Size() int {
|
|
return Size
|
|
}
|
|
|
|
// BlockSize returns the hash's underlying block size.
|
|
// The Write method must be able to accept any amount
|
|
// of data, but it may operate more efficiently if all writes
|
|
// are a multiple of the block size.
|
|
func (l *level) BlockSize() int {
|
|
return Size
|
|
}
|
|
|
|
// MarshalBinary encodes the hash into a binary form and returns the result.
|
|
func (l *level) MarshalBinary() ([]byte, error) {
|
|
b := make([]byte, Size+4+4+1)
|
|
copy(b, l.checksum[:])
|
|
binary.BigEndian.PutUint32(b[Size:], l.sumCount)
|
|
binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher)
|
|
if l.onlyNullBytesInHasher {
|
|
b[Size+4+4] = 1
|
|
}
|
|
encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = append(b, encodedHasher...)
|
|
return b, nil
|
|
}
|
|
|
|
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
|
|
// The hash will replace its internal state accordingly.
|
|
func (l *level) UnmarshalBinary(b []byte) error {
|
|
if len(b) < Size+4+4+1 {
|
|
return ErrorInvalidEncoding
|
|
}
|
|
copy(l.checksum[:], b)
|
|
l.sumCount = binary.BigEndian.Uint32(b[Size:])
|
|
l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:])
|
|
switch b[Size+4+4] {
|
|
case 0:
|
|
l.onlyNullBytesInHasher = false
|
|
case 1:
|
|
l.onlyNullBytesInHasher = true
|
|
default:
|
|
return ErrorInvalidEncoding
|
|
}
|
|
err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:])
|
|
return err
|
|
}
|
|
|
|
// hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes.
|
|
type hidriveHash struct {
|
|
levels []*level // collection of level-hashes, one for each level starting at level-1
|
|
lastSumWritten [Size]byte // the last checksum written to any of the levels
|
|
bytesInBlock uint32 // bytes written into blockHash so far
|
|
onlyNullBytesInBlock bool // whether the hasher only contains null-bytes so far
|
|
blockHash hash.Hash
|
|
}
|
|
|
|
// New returns a new hash.Hash computing the HiDrive checksum.
|
|
func New() hash.Hash {
|
|
h := &hidriveHash{}
|
|
h.Reset()
|
|
return h
|
|
}
|
|
|
|
// aggregateToLevel writes the checksum to the level at the given index
|
|
// and if necessary propagates any changes to levels above.
|
|
func (h *hidriveHash) aggregateToLevel(index int, sum []byte) {
|
|
for i := index; ; i++ {
|
|
if i >= len(h.levels) {
|
|
h.levels = append(h.levels, NewLevel().(*level))
|
|
}
|
|
_, err := h.levels[i].Write(sum)
|
|
copy(h.lastSumWritten[:], sum)
|
|
if err != nil {
|
|
panic(fmt.Errorf("level-hash should not have produced an error: %w", err))
|
|
}
|
|
if !h.levels[i].IsFull() {
|
|
break
|
|
}
|
|
sum = h.levels[i].Sum(nil)
|
|
h.levels[i].Reset()
|
|
}
|
|
}
|
|
|
|
// Write (via the embedded io.Writer interface) adds more data to the running hash.
|
|
// It never returns an error.
|
|
func (h *hidriveHash) Write(p []byte) (n int, err error) {
|
|
onBlockWritten := func(remaining int) error {
|
|
var sum []byte
|
|
if h.onlyNullBytesInBlock {
|
|
sum = zeroSum[:]
|
|
} else {
|
|
sum = h.blockHash.Sum(nil)
|
|
}
|
|
h.blockHash.Reset()
|
|
h.aggregateToLevel(0, sum)
|
|
return nil
|
|
}
|
|
return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten)
|
|
}
|
|
|
|
// Sum appends the current hash to b and returns the resulting slice.
|
|
// It does not change the underlying hash state.
|
|
func (h *hidriveHash) Sum(b []byte) []byte {
|
|
// Save internal state.
|
|
state, err := h.MarshalBinary()
|
|
if err != nil {
|
|
panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err))
|
|
}
|
|
|
|
if h.bytesInBlock > 0 {
|
|
// Fill remainder of block with null-bytes.
|
|
filler := make([]byte, h.BlockSize()-int(h.bytesInBlock))
|
|
_, err = h.Write(filler)
|
|
if err != nil {
|
|
panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err))
|
|
}
|
|
}
|
|
|
|
checksum := zeroSum
|
|
for i := 0; i < len(h.levels); i++ {
|
|
level := h.levels[i]
|
|
if i < len(h.levels)-1 {
|
|
// Aggregate non-empty non-final levels.
|
|
if level.sumCount >= 1 {
|
|
h.aggregateToLevel(i+1, level.Sum(nil))
|
|
level.Reset()
|
|
}
|
|
} else {
|
|
// Determine sum of final level.
|
|
if level.sumCount > 1 {
|
|
copy(checksum[:], level.Sum(nil))
|
|
} else {
|
|
// This is needed, otherwise there is no way to return
|
|
// the non-position-embedded checksum.
|
|
checksum = h.lastSumWritten
|
|
}
|
|
}
|
|
}
|
|
|
|
// Restore internal state.
|
|
err = h.UnmarshalBinary(state)
|
|
if err != nil {
|
|
panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err))
|
|
}
|
|
|
|
return append(b, checksum[:]...)
|
|
}
|
|
|
|
// Reset resets the Hash to its initial state.
|
|
func (h *hidriveHash) Reset() {
|
|
h.levels = nil
|
|
h.lastSumWritten = zeroSum // clear the last written checksum
|
|
h.bytesInBlock = 0
|
|
h.onlyNullBytesInBlock = true
|
|
h.blockHash = sha1.New()
|
|
}
|
|
|
|
// Size returns the number of bytes Sum will return.
|
|
func (h *hidriveHash) Size() int {
|
|
return Size
|
|
}
|
|
|
|
// BlockSize returns the hash's underlying block size.
|
|
// The Write method must be able to accept any amount
|
|
// of data, but it may operate more efficiently if all writes
|
|
// are a multiple of the block size.
|
|
func (h *hidriveHash) BlockSize() int {
|
|
return BlockSize
|
|
}
|
|
|
|
// MarshalBinary encodes the hash into a binary form and returns the result.
|
|
func (h *hidriveHash) MarshalBinary() ([]byte, error) {
|
|
b := make([]byte, Size+4+1+8)
|
|
copy(b, h.lastSumWritten[:])
|
|
binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock)
|
|
if h.onlyNullBytesInBlock {
|
|
b[Size+4] = 1
|
|
}
|
|
|
|
binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels)))
|
|
for _, level := range h.levels {
|
|
encodedLevel, err := level.MarshalBinary()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
encodedLength := make([]byte, 8)
|
|
binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel)))
|
|
b = append(b, encodedLength...)
|
|
b = append(b, encodedLevel...)
|
|
}
|
|
encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = append(b, encodedBlockHash...)
|
|
return b, nil
|
|
}
|
|
|
|
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
|
|
// The hash will replace its internal state accordingly.
|
|
func (h *hidriveHash) UnmarshalBinary(b []byte) error {
|
|
if len(b) < Size+4+1+8 {
|
|
return ErrorInvalidEncoding
|
|
}
|
|
copy(h.lastSumWritten[:], b)
|
|
h.bytesInBlock = binary.BigEndian.Uint32(b[Size:])
|
|
switch b[Size+4] {
|
|
case 0:
|
|
h.onlyNullBytesInBlock = false
|
|
case 1:
|
|
h.onlyNullBytesInBlock = true
|
|
default:
|
|
return ErrorInvalidEncoding
|
|
}
|
|
|
|
amount := binary.BigEndian.Uint64(b[Size+4+1:])
|
|
h.levels = make([]*level, int(amount))
|
|
offset := Size + 4 + 1 + 8
|
|
for i := range h.levels {
|
|
length := int(binary.BigEndian.Uint64(b[offset:]))
|
|
offset += 8
|
|
h.levels[i] = NewLevel().(*level)
|
|
err := h.levels[i].UnmarshalBinary(b[offset : offset+length])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
offset += length
|
|
}
|
|
err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:])
|
|
return err
|
|
}
|
|
|
|
// Sum returns the HiDrive checksum of the data.
|
|
func Sum(data []byte) [Size]byte {
|
|
h := New().(*hidriveHash)
|
|
_, _ = h.Write(data)
|
|
var result [Size]byte
|
|
copy(result[:], h.Sum(nil))
|
|
return result
|
|
}
|
|
|
|
// Check the interfaces are satisfied.
|
|
var (
|
|
_ hash.Hash = (*level)(nil)
|
|
_ encoding.BinaryMarshaler = (*level)(nil)
|
|
_ encoding.BinaryUnmarshaler = (*level)(nil)
|
|
_ internal.LevelHash = (*level)(nil)
|
|
_ hash.Hash = (*hidriveHash)(nil)
|
|
_ encoding.BinaryMarshaler = (*hidriveHash)(nil)
|
|
_ encoding.BinaryUnmarshaler = (*hidriveHash)(nil)
|
|
)
|