rclone/backend/onedrive/quickxorhash/quickxorhash.go

// Package quickxorhash provides the quickXorHash algorithm which is a
// quick, simple non-cryptographic hash algorithm that works by XORing
// the bytes in a circular-shifting fashion.
//
// It is used by Microsoft Onedrive for Business to hash data.
//
// See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
package quickxorhash

// This code was ported from the code snippet linked from
// https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
// Which has the copyright

// ------------------------------------------------------------------------------
//  Copyright (c) 2016 Microsoft Corporation
//
//  Permission is hereby granted, free of charge, to any person obtaining a copy
//  of this software and associated documentation files (the "Software"), to deal
//  in the Software without restriction, including without limitation the rights
//  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the Software is
//  furnished to do so, subject to the following conditions:
//
//  The above copyright notice and this permission notice shall be included in
//  all copies or substantial portions of the Software.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
//  THE SOFTWARE.
// ------------------------------------------------------------------------------

import (
	"hash"
)

const (
	// BlockSize is the preferred size for hashing
	BlockSize = 64
	// Size of the output checksum
	Size           = 20
	bitsInLastCell = 32
	shift          = 11
	widthInBits    = 8 * Size
	dataSize       = (widthInBits-1)/64 + 1
)

type quickXorHash struct {
	data        [dataSize]uint64
	lengthSoFar uint64
	shiftSoFar  int
}

// New returns a new hash.Hash computing the quickXorHash checksum.
func New() hash.Hash {
	return &quickXorHash{}
}

// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
//
// Write writes len(p) bytes from p to the underlying data stream. It returns
// the number of bytes written from p (0 <= n <= len(p)) and any error
// encountered that caused the write to stop early. Write must return a non-nil
// error if it returns n < len(p). Write must not modify the slice data, even
// temporarily.
//
// Implementations must not retain p.
func (q *quickXorHash) Write(p []byte) (n int, err error) {
	currentshift := q.shiftSoFar

	// The bitvector where we'll start xoring
	vectorArrayIndex := currentshift / 64

	// The position within the bit vector at which we begin xoring
	vectorOffset := currentshift % 64
	iterations := len(p)
	if iterations > widthInBits {
		iterations = widthInBits
	}

	for i := 0; i < iterations; i++ {
		isLastCell := vectorArrayIndex == len(q.data)-1
		var bitsInVectorCell int
		if isLastCell {
			bitsInVectorCell = bitsInLastCell
		} else {
			bitsInVectorCell = 64
		}

		// There's at least 2 bitvectors before we reach the end of the array
		if vectorOffset <= bitsInVectorCell-8 {
			for j := i; j < len(p); j += widthInBits {
				q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)
			}
		} else {
			index1 := vectorArrayIndex
			var index2 int
			if isLastCell {
				index2 = 0
			} else {
				index2 = vectorArrayIndex + 1
			}
			low := byte(bitsInVectorCell - vectorOffset)

			xoredByte := byte(0)
			for j := i; j < len(p); j += widthInBits {
				xoredByte ^= p[j]
			}
			q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)
			q.data[index2] ^= uint64(xoredByte) >> low
		}
		vectorOffset += shift
		for vectorOffset >= bitsInVectorCell {
			if isLastCell {
				vectorArrayIndex = 0
			} else {
				vectorArrayIndex = vectorArrayIndex + 1
			}
			vectorOffset -= bitsInVectorCell
		}
	}

	// Update the starting position in a circular shift pattern
	q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits

	q.lengthSoFar += uint64(len(p))

	return len(p), nil
}

// Calculate the current checksum
func (q *quickXorHash) checkSum() (h [Size]byte) {
	// Output the data as little endian bytes
	ph := 0
	for i := 0; i < len(q.data)-1; i++ {
		d := q.data[i]
		_ = h[ph+7] // bounds check
		h[ph+0] = byte(d >> (8 * 0))
		h[ph+1] = byte(d >> (8 * 1))
		h[ph+2] = byte(d >> (8 * 2))
		h[ph+3] = byte(d >> (8 * 3))
		h[ph+4] = byte(d >> (8 * 4))
		h[ph+5] = byte(d >> (8 * 5))
		h[ph+6] = byte(d >> (8 * 6))
		h[ph+7] = byte(d >> (8 * 7))
		ph += 8
	}
	// remaining 32 bits
	d := q.data[len(q.data)-1]
	h[Size-4] = byte(d >> (8 * 0))
	h[Size-3] = byte(d >> (8 * 1))
	h[Size-2] = byte(d >> (8 * 2))
	h[Size-1] = byte(d >> (8 * 3))

	// XOR the file length with the least significant bits in little endian format
	d = q.lengthSoFar
	h[Size-8] ^= byte(d >> (8 * 0))
	h[Size-7] ^= byte(d >> (8 * 1))
	h[Size-6] ^= byte(d >> (8 * 2))
	h[Size-5] ^= byte(d >> (8 * 3))
	h[Size-4] ^= byte(d >> (8 * 4))
	h[Size-3] ^= byte(d >> (8 * 5))
	h[Size-2] ^= byte(d >> (8 * 6))
	h[Size-1] ^= byte(d >> (8 * 7))

	return h
}

// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (q *quickXorHash) Sum(b []byte) []byte {
	hash := q.checkSum()
	return append(b, hash[:]...)
}

// Reset resets the Hash to its initial state.
func (q *quickXorHash) Reset() {
	*q = quickXorHash{}
}

// Size returns the number of bytes Sum will return.
func (q *quickXorHash) Size() int {
	return Size
}

// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (q *quickXorHash) BlockSize() int {
	return BlockSize
}

// Sum returns the quickXorHash checksum of the data.
func Sum(data []byte) [Size]byte {
	var d quickXorHash
	_, _ = d.Write(data)
	return d.checkSum()
}
onedrive: implement quickXorHash algorithm #2262 2018-04-20 10:54:27 +02:00			`// Package quickxorhash provides the quickXorHash algorithm which is a`
			`// quick, simple non-cryptographic hash algorithm that works by XORing`
			`// the bytes in a circular-shifting fashion.`
			`//`
			`// It is used by Microsoft Onedrive for Business to hash data.`
			`//`
			`// See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash`
			`package quickxorhash`

			`// This code was ported from the code snippet linked from`
			`// https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash`
			`// Which has the copyright`

			`// ------------------------------------------------------------------------------`
			`// Copyright (c) 2016 Microsoft Corporation`
			`//`
			`// Permission is hereby granted, free of charge, to any person obtaining a copy`
			`// of this software and associated documentation files (the "Software"), to deal`
			`// in the Software without restriction, including without limitation the rights`
			`// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`// copies of the Software, and to permit persons to whom the Software is`
			`// furnished to do so, subject to the following conditions:`
			`//`
			`// The above copyright notice and this permission notice shall be included in`
			`// all copies or substantial portions of the Software.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`// THE SOFTWARE.`
			`// ------------------------------------------------------------------------------`

			`import (`
			`"hash"`
			`)`

			`const (`
			`// BlockSize is the preferred size for hashing`
			`BlockSize = 64`
			`// Size of the output checksum`
			`Size = 20`
			`bitsInLastCell = 32`
			`shift = 11`
			`widthInBits = 8 * Size`
			`dataSize = (widthInBits-1)/64 + 1`
			`)`

			`type quickXorHash struct {`
			`data [dataSize]uint64`
			`lengthSoFar uint64`
			`shiftSoFar int`
			`}`

			`// New returns a new hash.Hash computing the quickXorHash checksum.`
			`func New() hash.Hash {`
			`return &quickXorHash{}`
			`}`

			`// Write (via the embedded io.Writer interface) adds more data to the running hash.`
			`// It never returns an error.`
			`//`
			`// Write writes len(p) bytes from p to the underlying data stream. It returns`
			`// the number of bytes written from p (0 <= n <= len(p)) and any error`
			`// encountered that caused the write to stop early. Write must return a non-nil`
			`// error if it returns n < len(p). Write must not modify the slice data, even`
			`// temporarily.`
			`//`
			`// Implementations must not retain p.`
			`func (q *quickXorHash) Write(p []byte) (n int, err error) {`
			`currentshift := q.shiftSoFar`

			`// The bitvector where we'll start xoring`
			`vectorArrayIndex := currentshift / 64`

			`// The position within the bit vector at which we begin xoring`
			`vectorOffset := currentshift % 64`
			`iterations := len(p)`
			`if iterations > widthInBits {`
			`iterations = widthInBits`
			`}`

			`for i := 0; i < iterations; i++ {`
			`isLastCell := vectorArrayIndex == len(q.data)-1`
			`var bitsInVectorCell int`
			`if isLastCell {`
			`bitsInVectorCell = bitsInLastCell`
			`} else {`
			`bitsInVectorCell = 64`
			`}`

			`// There's at least 2 bitvectors before we reach the end of the array`
			`if vectorOffset <= bitsInVectorCell-8 {`
			`for j := i; j < len(p); j += widthInBits {`
			`q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)`
			`}`
			`} else {`
			`index1 := vectorArrayIndex`
			`var index2 int`
			`if isLastCell {`
			`index2 = 0`
			`} else {`
			`index2 = vectorArrayIndex + 1`
			`}`
			`low := byte(bitsInVectorCell - vectorOffset)`

			`xoredByte := byte(0)`
			`for j := i; j < len(p); j += widthInBits {`
			`xoredByte ^= p[j]`
			`}`
			`q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)`
			`q.data[index2] ^= uint64(xoredByte) >> low`
			`}`
			`vectorOffset += shift`
			`for vectorOffset >= bitsInVectorCell {`
			`if isLastCell {`
			`vectorArrayIndex = 0`
			`} else {`
			`vectorArrayIndex = vectorArrayIndex + 1`
			`}`
			`vectorOffset -= bitsInVectorCell`
			`}`
			`}`

			`// Update the starting position in a circular shift pattern`
			`q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits`

			`q.lengthSoFar += uint64(len(p))`

			`return len(p), nil`
			`}`

			`// Calculate the current checksum`
			`func (q *quickXorHash) checkSum() (h [Size]byte) {`
			`// Output the data as little endian bytes`
			`ph := 0`
onedrive: minor optimization of quickxorhash This patch avoids creating a new slice header in favour of a for loop. This saves a few instructions! 2022-01-14 18:30:56 +01:00			`for i := 0; i < len(q.data)-1; i++ {`
			`d := q.data[i]`
onedrive: implement quickXorHash algorithm #2262 2018-04-20 10:54:27 +02:00			`_ = h[ph+7] // bounds check`
			`h[ph+0] = byte(d >> (8 * 0))`
			`h[ph+1] = byte(d >> (8 * 1))`
			`h[ph+2] = byte(d >> (8 * 2))`
			`h[ph+3] = byte(d >> (8 * 3))`
			`h[ph+4] = byte(d >> (8 * 4))`
			`h[ph+5] = byte(d >> (8 * 5))`
			`h[ph+6] = byte(d >> (8 * 6))`
			`h[ph+7] = byte(d >> (8 * 7))`
			`ph += 8`
			`}`
			`// remaining 32 bits`
			`d := q.data[len(q.data)-1]`
			`h[Size-4] = byte(d >> (8 * 0))`
			`h[Size-3] = byte(d >> (8 * 1))`
			`h[Size-2] = byte(d >> (8 * 2))`
			`h[Size-1] = byte(d >> (8 * 3))`

			`// XOR the file length with the least significant bits in little endian format`
			`d = q.lengthSoFar`
			`h[Size-8] ^= byte(d >> (8 * 0))`
			`h[Size-7] ^= byte(d >> (8 * 1))`
			`h[Size-6] ^= byte(d >> (8 * 2))`
			`h[Size-5] ^= byte(d >> (8 * 3))`
			`h[Size-4] ^= byte(d >> (8 * 4))`
			`h[Size-3] ^= byte(d >> (8 * 5))`
			`h[Size-2] ^= byte(d >> (8 * 6))`
			`h[Size-1] ^= byte(d >> (8 * 7))`

			`return h`
			`}`

			`// Sum appends the current hash to b and returns the resulting slice.`
			`// It does not change the underlying hash state.`
			`func (q *quickXorHash) Sum(b []byte) []byte {`
			`hash := q.checkSum()`
			`return append(b, hash[:]...)`
			`}`

			`// Reset resets the Hash to its initial state.`
			`func (q *quickXorHash) Reset() {`
			`*q = quickXorHash{}`
			`}`

			`// Size returns the number of bytes Sum will return.`
			`func (q *quickXorHash) Size() int {`
			`return Size`
			`}`

			`// BlockSize returns the hash's underlying block size.`
			`// The Write method must be able to accept any amount`
			`// of data, but it may operate more efficiently if all writes`
			`// are a multiple of the block size.`
			`func (q *quickXorHash) BlockSize() int {`
			`return BlockSize`
			`}`

			`// Sum returns the quickXorHash checksum of the data.`
			`func Sum(data []byte) [Size]byte {`
			`var d quickXorHash`
			`_, _ = d.Write(data)`
			`return d.checkSum()`
			`}`