mirror of
https://github.com/rclone/rclone.git
synced 2025-01-23 22:59:27 +01:00
press: second overhaul
... drop all compression algorithms except gzip ... factor code to make gzip seekable into new external library ... many small bugfixes
This commit is contained in:
parent
5da98499ed
commit
ba6b077b30
@ -1,75 +0,0 @@
|
||||
package press
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
|
||||
"github.com/klauspost/compress/gzip"
|
||||
)
|
||||
|
||||
// AlgGzip represents gzip compression algorithm
|
||||
type AlgGzip struct {
|
||||
level int
|
||||
blockSize uint32
|
||||
}
|
||||
|
||||
// InitializeGzip initializes the gzip compression Algorithm
|
||||
func InitializeGzip(bs uint32, level int) Algorithm {
|
||||
a := new(AlgGzip)
|
||||
a.blockSize = bs
|
||||
a.level = level
|
||||
return a
|
||||
}
|
||||
|
||||
// GetFileExtension returns file extension
|
||||
func (a *AlgGzip) GetFileExtension() string {
|
||||
return ".gz"
|
||||
}
|
||||
|
||||
// GetHeader returns the Lz4 compression header
|
||||
func (a *AlgGzip) GetHeader() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
// GetFooter returns
|
||||
func (a *AlgGzip) GetFooter() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
// CompressBlock that compresses a block using gzip
|
||||
func (a *AlgGzip) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
|
||||
// Initialize buffer
|
||||
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
|
||||
|
||||
// Initialize block writer
|
||||
outw, err := gzip.NewWriterLevel(bufw, a.level)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
// Compress block
|
||||
_, err = outw.Write(in)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
// Finalize gzip file, flush buffer and return
|
||||
err = outw.Close()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
blockSize := uint32(bufw.Buffered())
|
||||
err = bufw.Flush()
|
||||
|
||||
return blockSize, uint64(len(in)), err
|
||||
}
|
||||
|
||||
// DecompressBlock decompresses Lz4 compressed block
|
||||
func (a *AlgGzip) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
|
||||
gzipReader, err := gzip.NewReader(in)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
written, err := io.Copy(out, gzipReader)
|
||||
return int(written), err
|
||||
}
|
@ -1,223 +0,0 @@
|
||||
package press
|
||||
|
||||
// This file implements the LZ4 algorithm.
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/bits"
|
||||
|
||||
"github.com/buengese/xxh32"
|
||||
lz4 "github.com/pierrec/lz4"
|
||||
)
|
||||
|
||||
/*
|
||||
Structure of LZ4 header:
|
||||
Flags:
|
||||
Version = 01
|
||||
Independent = 1
|
||||
Block Checksum = 1
|
||||
Content Size = 0
|
||||
Content Checksum = 0
|
||||
Reserved = 0
|
||||
Dictionary ID = 0
|
||||
|
||||
BD byte:
|
||||
Reserved = 0
|
||||
Block Max Size = 101 (or 5; 256kb)
|
||||
Reserved = 0000
|
||||
|
||||
Header checksum byte (xxhash(flags and bd byte) >> 1) & 0xff
|
||||
*/
|
||||
|
||||
// LZ4Header - Header of our LZ4 file
|
||||
//var LZ4Header = []byte{0x04, 0x22, 0x4d, 0x18, 0x70, 0x50, 0x84}
|
||||
|
||||
// LZ4Footer - Footer of our LZ4 file
|
||||
var LZ4Footer = []byte{0x00, 0x00, 0x00, 0x00} // This is just an empty block
|
||||
|
||||
const (
|
||||
frameMagic uint32 = 0x184D2204
|
||||
|
||||
compressedBlockFlag = 1 << 31
|
||||
compressedBlockMask = compressedBlockFlag - 1
|
||||
)
|
||||
|
||||
// AlgLz4 is the Lz4 Compression algorithm
|
||||
type AlgLz4 struct {
|
||||
Header lz4.Header
|
||||
buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
|
||||
}
|
||||
|
||||
// InitializeLz4 creates an Lz4 compression algorithm
|
||||
func InitializeLz4(bs uint32, blockChecksum bool) Algorithm {
|
||||
a := new(AlgLz4)
|
||||
a.Header.Reset()
|
||||
a.Header = lz4.Header{
|
||||
BlockChecksum: blockChecksum,
|
||||
BlockMaxSize: int(bs),
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// GetFileExtension returns file extension
|
||||
func (a *AlgLz4) GetFileExtension() string {
|
||||
return ".lz4"
|
||||
}
|
||||
|
||||
// GetHeader returns the Lz4 compression header
|
||||
func (a *AlgLz4) GetHeader() []byte {
|
||||
// Size is optional.
|
||||
buf := a.buf[:]
|
||||
|
||||
// Set the fixed size data: magic number, block max size and flags.
|
||||
binary.LittleEndian.PutUint32(buf[0:], frameMagic)
|
||||
flg := byte(lz4.Version << 6)
|
||||
flg |= 1 << 5 // No block dependency.
|
||||
if a.Header.BlockChecksum {
|
||||
flg |= 1 << 4
|
||||
}
|
||||
if a.Header.Size > 0 {
|
||||
flg |= 1 << 3
|
||||
}
|
||||
buf[4] = flg
|
||||
buf[5] = blockSizeValueToIndex(a.Header.BlockMaxSize) << 4
|
||||
|
||||
// Current buffer size: magic(4) + flags(1) + block max size (1).
|
||||
n := 6
|
||||
if a.Header.Size > 0 {
|
||||
binary.LittleEndian.PutUint64(buf[n:], a.Header.Size)
|
||||
n += 8
|
||||
}
|
||||
|
||||
// The header checksum includes the flags, block max size and optional Size.
|
||||
buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
|
||||
|
||||
// Header ready, write it out.
|
||||
return buf[0 : n+1]
|
||||
}
|
||||
|
||||
// GetFooter returns
|
||||
func (a *AlgLz4) GetFooter() []byte {
|
||||
return LZ4Footer
|
||||
}
|
||||
|
||||
// CompressBlock that compresses a block using lz4
|
||||
func (a *AlgLz4) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
|
||||
if len(in) > 0 {
|
||||
n, err := a.compressBlock(in, out)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return n, uint64(len(in)), nil
|
||||
}
|
||||
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
// compressBlock compresses a block.
|
||||
func (a *AlgLz4) compressBlock(data []byte, dst io.Writer) (uint32, error) {
|
||||
zdata := make([]byte, a.Header.BlockMaxSize) // The compressed block size cannot exceed the input's.
|
||||
var zn int
|
||||
if level := a.Header.CompressionLevel; level != 0 {
|
||||
zn, _ = lz4.CompressBlockHC(data, zdata, level)
|
||||
} else {
|
||||
var hashTable [1 << 16]int
|
||||
zn, _ = lz4.CompressBlock(data, zdata, hashTable[:])
|
||||
}
|
||||
|
||||
var bLen uint32
|
||||
if zn > 0 && zn < len(data) {
|
||||
// Compressible and compressed size smaller than uncompressed: ok!
|
||||
bLen = uint32(zn)
|
||||
zdata = zdata[:zn]
|
||||
} else {
|
||||
// Uncompressed block.
|
||||
bLen = uint32(len(data)) | compressedBlockFlag
|
||||
zdata = data
|
||||
}
|
||||
|
||||
// Write the block.
|
||||
if err := a.writeUint32(bLen, dst); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
_, err := dst.Write(zdata)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !a.Header.BlockChecksum {
|
||||
return bLen, nil
|
||||
}
|
||||
checksum := xxh32.ChecksumZero(zdata)
|
||||
if err := a.writeUint32(checksum, dst); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return bLen, nil
|
||||
}
|
||||
|
||||
// writeUint32 writes a uint32 to the underlying writer.
|
||||
func (a *AlgLz4) writeUint32(x uint32, dst io.Writer) error {
|
||||
buf := make([]byte, 4)
|
||||
binary.LittleEndian.PutUint32(buf, x)
|
||||
_, err := dst.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
func blockSizeValueToIndex(size int) byte {
|
||||
return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
|
||||
}
|
||||
|
||||
// DecompressBlock decompresses Lz4 compressed block
|
||||
func (a *AlgLz4) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
|
||||
// Get our compressed data
|
||||
var b bytes.Buffer
|
||||
_, err = io.Copy(&b, in)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
zdata := b.Bytes()
|
||||
bLen := binary.LittleEndian.Uint32(zdata[:4])
|
||||
|
||||
if bLen&compressedBlockFlag > 0 {
|
||||
// Uncompressed block.
|
||||
bLen &= compressedBlockMask
|
||||
|
||||
if bLen > BlockSize {
|
||||
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
|
||||
}
|
||||
data := zdata[4 : bLen+4]
|
||||
|
||||
if a.Header.BlockChecksum {
|
||||
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
|
||||
|
||||
if h := xxh32.ChecksumZero(data); h != checksum {
|
||||
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
|
||||
}
|
||||
}
|
||||
_, err := out.Write(data)
|
||||
return len(data), err
|
||||
}
|
||||
|
||||
// compressed block
|
||||
if bLen > BlockSize {
|
||||
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
|
||||
}
|
||||
|
||||
if a.Header.BlockChecksum {
|
||||
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
|
||||
|
||||
if h := xxh32.ChecksumZero(zdata[4 : bLen+4]); h != checksum {
|
||||
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
|
||||
}
|
||||
}
|
||||
|
||||
data := make([]byte, BlockSize)
|
||||
n, err = lz4.UncompressBlock(zdata[4:bLen+4], data)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
_, err = out.Write(data[:n])
|
||||
return n, err
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
package press
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz"
|
||||
)
|
||||
|
||||
// AlgXZ represents the XZ compression algorithm
|
||||
type AlgXZ struct {
|
||||
blockSize uint32
|
||||
config xz.WriterConfig
|
||||
}
|
||||
|
||||
// InitializeXZ creates an Lz4 compression algorithm
|
||||
func InitializeXZ(bs uint32) Algorithm {
|
||||
a := new(AlgXZ)
|
||||
a.blockSize = bs
|
||||
a.config = xz.WriterConfig{}
|
||||
return a
|
||||
}
|
||||
|
||||
// GetFileExtension returns file extension
|
||||
func (a *AlgXZ) GetFileExtension() string {
|
||||
return ".xz"
|
||||
}
|
||||
|
||||
// GetHeader returns the Lz4 compression header
|
||||
func (a *AlgXZ) GetHeader() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
// GetFooter returns
|
||||
func (a *AlgXZ) GetFooter() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
// CompressBlock that compresses a block using lz4
|
||||
func (a *AlgXZ) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
|
||||
// Initialize buffer
|
||||
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
|
||||
|
||||
// Initialize block writer
|
||||
outw, err := a.config.NewWriter(bufw)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
// Compress block
|
||||
_, err = outw.Write(in)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
// Finalize gzip file, flush buffer and return
|
||||
err = outw.Close()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
blockSize := uint32(bufw.Buffered())
|
||||
err = bufw.Flush()
|
||||
|
||||
return blockSize, uint64(len(in)), err
|
||||
}
|
||||
|
||||
// DecompressBlock decompresses Lz4 compressed block
|
||||
func (a *AlgXZ) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
|
||||
xzReader, err := xz.NewReader(in)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
written, err := io.Copy(out, xzReader)
|
||||
return int(written), err
|
||||
}
|
@ -1,526 +0,0 @@
|
||||
// Package press provides wrappers for Fs and Object which implement compression.
|
||||
// This file is the backend implementation for seekable compression.
|
||||
package press
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
)
|
||||
|
||||
// Compression modes
|
||||
const (
|
||||
Uncompressed = -1
|
||||
LZ4 = 2
|
||||
Gzip = 4
|
||||
XZ = 8
|
||||
)
|
||||
|
||||
// Errors
|
||||
var (
|
||||
ErrMetadataCorrupted = errors.New("metadata may have been corrupted")
|
||||
)
|
||||
|
||||
// DEBUG - flag for debug mode
|
||||
const DEBUG = false
|
||||
|
||||
// Compression is a struct containing configurable variables (what used to be constants)
|
||||
type Compression struct {
|
||||
CompressionMode int // Compression mode
|
||||
Algorithm Algorithm
|
||||
BlockSize uint32 // Size of blocks. Higher block size means better compression but more download bandwidth needed for small downloads
|
||||
// ~1MB is recommended for xz, while ~128KB is recommended for gzip and lz4
|
||||
HeuristicBytes int64 // Bytes to perform gzip heuristic on to determine whether a file should be compressed
|
||||
NumThreads int // Number of threads to use for compression
|
||||
MaxCompressionRatio float64 // Maximum compression ratio for a file to be considered compressible
|
||||
BinPath string // Path to compression binary. This is used for all non-gzip compression.
|
||||
}
|
||||
|
||||
// Algorithm is the main compression Algorithm Interface
|
||||
type Algorithm interface {
|
||||
GetHeader() []byte
|
||||
|
||||
GetFileExtension() string
|
||||
|
||||
CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error)
|
||||
|
||||
DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error)
|
||||
|
||||
GetFooter() []byte
|
||||
}
|
||||
|
||||
// NewCompressionPreset creates a Compression object with a preset mode/bs
|
||||
func NewCompressionPreset(preset string) (*Compression, error) {
|
||||
switch preset {
|
||||
case "lz4":
|
||||
alg := InitializeLz4(262144, true)
|
||||
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
|
||||
case "gzip":
|
||||
alg := InitializeGzip(131072, 6)
|
||||
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
|
||||
case "xz":
|
||||
alg := InitializeXZ(1048576)
|
||||
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
|
||||
}
|
||||
return nil, errors.New("Compression mode doesn't exist")
|
||||
}
|
||||
|
||||
// NewCompressionPresetNumber creates a Compression object with a preset mode/bs
|
||||
func NewCompressionPresetNumber(preset int) (*Compression, error) {
|
||||
switch preset {
|
||||
case LZ4:
|
||||
alg := InitializeLz4(262144, true)
|
||||
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
|
||||
case Gzip:
|
||||
alg := InitializeGzip(131072, 6)
|
||||
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
|
||||
case XZ:
|
||||
alg := InitializeXZ(1048576)
|
||||
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
|
||||
}
|
||||
return nil, errors.New("Compression mode doesn't exist")
|
||||
}
|
||||
|
||||
// NewCompression creates a Compression object with some default configuration values
|
||||
func NewCompression(mode int, alg Algorithm, bs uint32) (*Compression, error) {
|
||||
return NewCompressionAdvanced(mode, alg, bs, 1048576, 12, 0.9)
|
||||
}
|
||||
|
||||
// NewCompressionAdvanced creates a Compression object
|
||||
func NewCompressionAdvanced(mode int, alg Algorithm, bs uint32, hb int64, threads int, mcr float64) (c *Compression, err error) {
|
||||
// Set vars
|
||||
c = new(Compression)
|
||||
c.Algorithm = alg
|
||||
c.CompressionMode = mode
|
||||
c.BlockSize = bs
|
||||
c.HeuristicBytes = hb
|
||||
c.NumThreads = threads
|
||||
c.MaxCompressionRatio = mcr
|
||||
return c, err
|
||||
}
|
||||
|
||||
/*** UTILITY FUNCTIONS ***/
|
||||
|
||||
// GetFileExtension gets a file extension for current compression mode
|
||||
func (c *Compression) GetFileExtension() string {
|
||||
return c.Algorithm.GetFileExtension()
|
||||
}
|
||||
|
||||
// GetFileCompressionInfo gets a file extension along with compressibility of file
|
||||
func (c *Compression) GetFileCompressionInfo(reader io.Reader) (compressable bool, extension string, err error) {
|
||||
// Use our compression algorithm to do a heuristic on the first few bytes
|
||||
var emulatedBlock, emulatedBlockCompressed bytes.Buffer
|
||||
_, err = io.CopyN(&emulatedBlock, reader, c.HeuristicBytes)
|
||||
if err != nil && err != io.EOF {
|
||||
return false, "", err
|
||||
}
|
||||
compressedSize, uncompressedSize, err := c.Algorithm.CompressBlock(emulatedBlock.Bytes(), &emulatedBlockCompressed)
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
}
|
||||
compressionRatio := float64(compressedSize) / float64(uncompressedSize)
|
||||
|
||||
// If the data is not compressible, return so
|
||||
if compressionRatio > c.MaxCompressionRatio {
|
||||
return false, ".bin", nil
|
||||
}
|
||||
|
||||
// If the file is compressible, select file extension based on compression mode
|
||||
return true, c.Algorithm.GetFileExtension(), nil
|
||||
}
|
||||
|
||||
/*** MAIN COMPRESSION INTERFACE ***/
|
||||
// compressionResult represents the result of compression for a single block (gotten by a single thread)
|
||||
type compressionResult struct {
|
||||
buffer *bytes.Buffer
|
||||
n uint64
|
||||
err error
|
||||
}
|
||||
|
||||
// CompressFileReturningBlockData compresses a file returning the block data for that file.
|
||||
func (c *Compression) CompressFileReturningBlockData(in io.Reader, out io.Writer) (blockData []uint32, err error) {
|
||||
// Initialize buffered writer
|
||||
bufw := bufio.NewWriterSize(out, int((c.BlockSize+(c.BlockSize)>>4)*uint32(c.NumThreads)))
|
||||
|
||||
// Get blockData, copy over header, add length of header to blockData
|
||||
blockData = make([]uint32, 0)
|
||||
header := c.Algorithm.GetHeader()
|
||||
_, err = bufw.Write(header)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blockData = append(blockData, uint32(len(header)))
|
||||
|
||||
// Compress blocks
|
||||
for {
|
||||
// Loop through threads, spawning a go procedure for each thread. If we get eof on one thread, set eofAt to that thread and break
|
||||
compressionResults := make([]chan compressionResult, c.NumThreads)
|
||||
eofAt := -1
|
||||
for i := 0; i < c.NumThreads; i++ {
|
||||
// Create thread channel and allocate buffer to pass to thread
|
||||
compressionResults[i] = make(chan compressionResult)
|
||||
var inputBuffer bytes.Buffer
|
||||
_, err = io.CopyN(&inputBuffer, in, int64(c.BlockSize))
|
||||
if err == io.EOF {
|
||||
eofAt = i
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Run thread
|
||||
go func(i int, in []byte) {
|
||||
// Initialize thread writer and result struct
|
||||
var res compressionResult
|
||||
var buffer bytes.Buffer
|
||||
|
||||
// Compress block
|
||||
_, n, err := c.Algorithm.CompressBlock(in, &buffer)
|
||||
if err != nil && err != io.EOF { // This errored out.
|
||||
res.buffer = nil
|
||||
res.n = 0
|
||||
res.err = err
|
||||
compressionResults[i] <- res
|
||||
return
|
||||
}
|
||||
// Pass our data back to the main thread as a compression result
|
||||
res.buffer = &buffer
|
||||
res.n = n
|
||||
res.err = err
|
||||
compressionResults[i] <- res
|
||||
}(i, inputBuffer.Bytes())
|
||||
// If we have reached eof, we don't need more threads
|
||||
if eofAt != -1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Process writers in order
|
||||
for i := 0; i < c.NumThreads; i++ {
|
||||
if compressionResults[i] != nil {
|
||||
// Get current compression result, get buffer, and copy buffer over to output
|
||||
res := <-compressionResults[i]
|
||||
close(compressionResults[i])
|
||||
if res.buffer == nil {
|
||||
return nil, res.err
|
||||
}
|
||||
blockSize := uint32(res.buffer.Len())
|
||||
|
||||
_, err = io.Copy(bufw, res.buffer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if DEBUG {
|
||||
fmt.Printf("%d %d\n", res.n, blockSize)
|
||||
}
|
||||
|
||||
// Append block size to block data
|
||||
blockData = append(blockData, blockSize)
|
||||
|
||||
// If this is the last block, add the raw size of the last block to the end of blockData and break
|
||||
if eofAt == i {
|
||||
if DEBUG {
|
||||
log.Printf("%d %d %d\n", res.n, byte(res.n%256), byte(res.n/256))
|
||||
}
|
||||
|
||||
blockData = append(blockData, uint32(res.n))
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get number of bytes written in this block (they should all be in the bufio buffer), then close gzip and flush buffer
|
||||
err = bufw.Flush()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If eof happened, break
|
||||
if eofAt != -1 {
|
||||
if DEBUG {
|
||||
log.Printf("%d", eofAt)
|
||||
log.Printf("%v", blockData)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Write footer and flush
|
||||
footer := c.Algorithm.GetFooter()
|
||||
|
||||
_, err = bufw.Write(footer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = bufw.Flush()
|
||||
|
||||
// Return
|
||||
return blockData, err
|
||||
}
|
||||
|
||||
/*** BLOCK DECOMPRESSION FUNCTIONS ***/
|
||||
|
||||
// Wrapper function for decompressBlock that implements multithreading
|
||||
// decompressionResult represents the result of decompressing a block
|
||||
type decompressionResult struct {
|
||||
err error
|
||||
buffer *bytes.Buffer
|
||||
}
|
||||
|
||||
func (d *Decompressor) decompressBlockRangeMultithreaded(in io.Reader, out io.Writer, startingBlock uint32) (n int, err error) {
|
||||
// First, use bufio.Reader to reduce the number of reads and bufio.Writer to reduce the number of writes
|
||||
bufin := bufio.NewReader(in)
|
||||
bufout := bufio.NewWriter(out)
|
||||
|
||||
// Decompress each block individually.
|
||||
currBatch := startingBlock // Block # of start of current batch of blocks
|
||||
totalBytesCopied := 0
|
||||
for {
|
||||
// Loop through threads
|
||||
eofAt := -1
|
||||
decompressionResults := make([]chan decompressionResult, d.c.NumThreads)
|
||||
|
||||
for i := 0; i < d.c.NumThreads; i++ {
|
||||
// Get currBlock
|
||||
currBlock := currBatch + uint32(i)
|
||||
|
||||
// Create channel
|
||||
decompressionResults[i] = make(chan decompressionResult)
|
||||
|
||||
// Check if we've reached EOF
|
||||
if currBlock >= d.numBlocks {
|
||||
eofAt = i
|
||||
break
|
||||
}
|
||||
|
||||
// Get block to decompress
|
||||
var compressedBlock bytes.Buffer
|
||||
var err error
|
||||
n, err := io.CopyN(&compressedBlock, bufin, d.blockStarts[currBlock+1]-d.blockStarts[currBlock])
|
||||
if err != nil || n == 0 { // End of stream
|
||||
eofAt = i
|
||||
break
|
||||
}
|
||||
|
||||
// Spawn thread to decompress block
|
||||
if DEBUG {
|
||||
log.Printf("Spawning %d", i)
|
||||
}
|
||||
go func(i int, currBlock uint32, in io.Reader) {
|
||||
var block bytes.Buffer
|
||||
var res decompressionResult
|
||||
|
||||
// Decompress block
|
||||
_, res.err = d.c.Algorithm.DecompressBlock(in, &block, d.c.BlockSize)
|
||||
res.buffer = &block
|
||||
decompressionResults[i] <- res
|
||||
}(i, currBlock, &compressedBlock)
|
||||
}
|
||||
if DEBUG {
|
||||
log.Printf("Eof at %d", eofAt)
|
||||
}
|
||||
|
||||
// Process results
|
||||
for i := 0; i < d.c.NumThreads; i++ {
|
||||
// If we got EOF, return
|
||||
if eofAt == i {
|
||||
return totalBytesCopied, bufout.Flush() // Flushing bufout is needed to prevent us from getting all nulls
|
||||
}
|
||||
|
||||
// Get result and close
|
||||
res := <-decompressionResults[i]
|
||||
close(decompressionResults[i])
|
||||
if res.err != nil {
|
||||
return totalBytesCopied, res.err
|
||||
}
|
||||
|
||||
// Copy to output and add to total bytes copied
|
||||
n, err := io.Copy(bufout, res.buffer)
|
||||
totalBytesCopied += int(n)
|
||||
if err != nil {
|
||||
return totalBytesCopied, err
|
||||
}
|
||||
}
|
||||
|
||||
// Add NumThreads to currBatch
|
||||
currBatch += uint32(d.c.NumThreads)
|
||||
}
|
||||
}
|
||||
|
||||
/*** MAIN DECOMPRESSION INTERFACE ***/
|
||||
|
||||
// Decompressor is the ReadSeeker implementation for decompression
|
||||
type Decompressor struct {
|
||||
cursorPos *int64 // The current location we have seeked to
|
||||
blockStarts []int64 // The start of each block. These will be recovered from the block sizes
|
||||
numBlocks uint32 // Number of blocks
|
||||
decompressedSize int64 // Decompressed size of the file.
|
||||
in io.ReadSeeker // Input
|
||||
c *Compression // Compression options
|
||||
}
|
||||
|
||||
// Parses block data. Returns the number of blocks, the block start locations for each block, and the decompressed size of the entire file.
|
||||
func parseBlockData(blockData []uint32, BlockSize uint32) (numBlocks uint32, blockStarts []int64, decompressedSize int64) {
|
||||
// Parse the block data
|
||||
blockDataLen := len(blockData)
|
||||
numBlocks = uint32(blockDataLen - 1)
|
||||
if DEBUG {
|
||||
log.Printf("%v\n", blockData)
|
||||
log.Printf("metadata len, numblocks = %d, %d", blockDataLen, numBlocks)
|
||||
}
|
||||
blockStarts = make([]int64, numBlocks+1) // Starts with start of first block (and end of header), ends with end of last block
|
||||
currentBlockPosition := int64(0)
|
||||
for i := uint32(0); i < numBlocks; i++ { // Loop through block data, getting starts of blocks.
|
||||
currentBlockSize := blockData[i]
|
||||
currentBlockPosition += int64(currentBlockSize)
|
||||
blockStarts[i] = currentBlockPosition
|
||||
}
|
||||
blockStarts[numBlocks] = currentBlockPosition // End of last block
|
||||
|
||||
//log.Printf("Block Starts: %v\n", d.blockStarts)
|
||||
|
||||
numBlocks-- // Subtract 1 from number of blocks because our header technically isn't a block
|
||||
|
||||
// Get uncompressed size of last block and derive uncompressed size of file
|
||||
lastBlockRawSize := blockData[blockDataLen-1]
|
||||
decompressedSize = int64(numBlocks-1)*int64(BlockSize) + int64(lastBlockRawSize)
|
||||
if DEBUG {
|
||||
log.Printf("Decompressed size = %d", decompressedSize)
|
||||
}
|
||||
|
||||
return numBlocks, blockStarts, decompressedSize
|
||||
}
|
||||
|
||||
// Initializes decompressor with the block data specified.
|
||||
func (d *Decompressor) initWithBlockData(c *Compression, in io.ReadSeeker, size int64, blockData []uint32) (err error) {
|
||||
// Copy over compression object
|
||||
d.c = c
|
||||
|
||||
// Initialize cursor position
|
||||
d.cursorPos = new(int64)
|
||||
|
||||
// Parse the block data
|
||||
d.numBlocks, d.blockStarts, d.decompressedSize = parseBlockData(blockData, d.c.BlockSize)
|
||||
|
||||
// Initialize cursor position value and copy over reader
|
||||
*d.cursorPos = 0
|
||||
_, err = in.Seek(0, io.SeekStart)
|
||||
d.in = in
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Read reads data using a decompressor
|
||||
func (d Decompressor) Read(p []byte) (int, error) {
|
||||
if DEBUG {
|
||||
log.Printf("Cursor pos before: %d\n", *d.cursorPos)
|
||||
}
|
||||
// Check if we're at the end of the file or before the beginning of the file
|
||||
if *d.cursorPos >= d.decompressedSize || *d.cursorPos < 0 {
|
||||
if DEBUG {
|
||||
log.Println("Out of bounds EOF")
|
||||
}
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// Get block range to read
|
||||
blockNumber := *d.cursorPos / int64(d.c.BlockSize)
|
||||
blockStart := d.blockStarts[blockNumber] // Start position of blocks to read
|
||||
dataOffset := *d.cursorPos % int64(d.c.BlockSize) // Offset of data to read in blocks to read
|
||||
bytesToRead := len(p) // Number of bytes to read
|
||||
blocksToRead := (int64(bytesToRead)+dataOffset)/int64(d.c.BlockSize) + 1 // Number of blocks to read
|
||||
returnEOF := false
|
||||
if blockNumber+blocksToRead > int64(d.numBlocks) { // Overflowed the last block
|
||||
blocksToRead = int64(d.numBlocks) - blockNumber
|
||||
returnEOF = true
|
||||
}
|
||||
blockEnd := d.blockStarts[blockNumber+blocksToRead] // Start of the block after the last block we want to get is the end of the last block we want to get
|
||||
blockLen := blockEnd - blockStart
|
||||
|
||||
// Read compressed block range into buffer
|
||||
var compressedBlocks bytes.Buffer
|
||||
_, err := d.in.Seek(blockStart, io.SeekStart)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
n1, err := io.CopyN(&compressedBlocks, d.in, blockLen)
|
||||
if DEBUG {
|
||||
log.Printf("block # = %d @ %d <- %d, len %d, copied %d bytes", blockNumber, blockStart, *d.cursorPos, blockLen, n1)
|
||||
}
|
||||
if err != nil {
|
||||
if DEBUG {
|
||||
log.Println("Copy Error")
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Decompress block range
|
||||
var b bytes.Buffer
|
||||
n, err := d.decompressBlockRangeMultithreaded(&compressedBlocks, &b, uint32(blockNumber))
|
||||
if err != nil {
|
||||
log.Println("Decompression error")
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Calculate bytes read
|
||||
readOverflow := *d.cursorPos + int64(bytesToRead) - d.decompressedSize
|
||||
if readOverflow < 0 {
|
||||
readOverflow = 0
|
||||
}
|
||||
bytesRead := int64(bytesToRead) - readOverflow
|
||||
if DEBUG {
|
||||
log.Printf("Read offset = %d, overflow = %d", dataOffset, readOverflow)
|
||||
log.Printf("Decompressed %d bytes; read %d out of %d bytes\n", n, bytesRead, bytesToRead)
|
||||
// log.Printf("%v", b.Bytes())
|
||||
}
|
||||
|
||||
// If we read 0 bytes, we reached the end of the file
|
||||
if bytesRead == 0 {
|
||||
log.Println("EOF")
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// Copy from buffer+offset to p
|
||||
_, err = io.CopyN(ioutil.Discard, &b, dataOffset)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
n, err = b.Read(p) // Note: everything after bytesToRead bytes will be discarded; we are returning bytesToRead instead of n
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Increment cursor position and return
|
||||
*d.cursorPos += bytesRead
|
||||
if returnEOF {
|
||||
if DEBUG {
|
||||
log.Println("EOF")
|
||||
}
|
||||
return int(bytesRead), io.EOF
|
||||
}
|
||||
return int(bytesRead), nil
|
||||
}
|
||||
|
||||
// Seek seeks to a location in compressed stream
|
||||
func (d Decompressor) Seek(offset int64, whence int) (int64, error) {
|
||||
// Seek to offset in cursorPos
|
||||
if whence == io.SeekStart {
|
||||
*d.cursorPos = offset
|
||||
} else if whence == io.SeekCurrent {
|
||||
*d.cursorPos += offset
|
||||
} else if whence == io.SeekEnd {
|
||||
*d.cursorPos = d.decompressedSize + offset
|
||||
}
|
||||
|
||||
// Return
|
||||
return offset, nil
|
||||
}
|
||||
|
||||
// DecompressFileExtData decompresses a file using external block data. Argument "size" is very useful here.
|
||||
func (c *Compression) DecompressFileExtData(in io.ReadSeeker, size int64, blockData []uint32) (FileHandle io.ReadSeeker, decompressedSize int64, err error) {
|
||||
var decompressor Decompressor
|
||||
err = decompressor.initWithBlockData(c, in, size, blockData)
|
||||
return decompressor, decompressor.decompressedSize, err
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
package press
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"encoding/base64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const TestStringSmall = "The quick brown fox jumps over the lazy dog."
|
||||
const TestSizeLarge = 2097152 // 2 megabytes
|
||||
|
||||
// Tests compression and decompression for a preset
|
||||
func testCompressDecompress(t *testing.T, preset string, testString string) {
|
||||
// Create compression instance
|
||||
comp, err := NewCompressionPreset(preset)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Open files and hashers
|
||||
testFile := strings.NewReader(testString)
|
||||
testFileHasher := md5.New()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
compressedFile, err := ioutil.TempFile(os.TempDir(), "rclone_compression_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
outHasher := md5.New()
|
||||
|
||||
// Compress file and hash it (size doesn't matter here)
|
||||
testFileReader, testFileWriter := io.Pipe()
|
||||
go func() {
|
||||
_, err := io.Copy(io.MultiWriter(testFileHasher, testFileWriter), testFile)
|
||||
if err != nil {
|
||||
t.Fatal("Failed to write compressed file")
|
||||
}
|
||||
err = testFileWriter.Close()
|
||||
if err != nil {
|
||||
t.Log("Failed to close compressed file")
|
||||
}
|
||||
}()
|
||||
var blockData []uint32
|
||||
blockData, err = comp.CompressFileReturningBlockData(testFileReader, compressedFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Compression failed with error: %v", err)
|
||||
}
|
||||
testFileHash := testFileHasher.Sum(nil)
|
||||
|
||||
// Get the size, seek to the beginning of the compressed file
|
||||
size, err := compressedFile.Seek(0, io.SeekEnd)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = compressedFile.Seek(0, io.SeekStart)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("Compressed size: %d\n", size)
|
||||
|
||||
// Decompress file into a hasher
|
||||
var FileHandle io.ReadSeeker
|
||||
var decompressedSize int64
|
||||
FileHandle, decompressedSize, err = comp.DecompressFileExtData(compressedFile, size, blockData)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("Decompressed size: %d\n", decompressedSize)
|
||||
bufr := bufio.NewReaderSize(FileHandle, 12345678)
|
||||
_, err = io.Copy(outHasher, bufr)
|
||||
if err != nil && err != io.EOF {
|
||||
t.Fatal(err)
|
||||
}
|
||||
decompressedFileHash := outHasher.Sum(nil)
|
||||
|
||||
// Clean up
|
||||
err = compressedFile.Close()
|
||||
if err != nil {
|
||||
t.Log("Warning: cannot close compressed test file")
|
||||
}
|
||||
err = os.Remove(compressedFile.Name())
|
||||
if err != nil {
|
||||
t.Log("Warning: cannot remove compressed test file")
|
||||
}
|
||||
|
||||
// Compare hashes
|
||||
if !bytes.Equal(testFileHash, decompressedFileHash) {
|
||||
t.Logf("Hash of original file: %x\n", testFileHash)
|
||||
t.Logf("Hash of recovered file: %x\n", decompressedFileHash)
|
||||
t.Fatal("Hashes do not match!")
|
||||
}
|
||||
}
|
||||
|
||||
// Tests both small and large strings for a preset
|
||||
func testSmallLarge(t *testing.T, preset string) {
|
||||
testStringLarge := getCompressibleString(TestSizeLarge)
|
||||
t.Run("TestSmall", func(t *testing.T) {
|
||||
testCompressDecompress(t, preset, TestStringSmall)
|
||||
})
|
||||
t.Run("TestLarge", func(t *testing.T) {
|
||||
testCompressDecompress(t, preset, testStringLarge)
|
||||
})
|
||||
}
|
||||
|
||||
// Gets a compressible string
|
||||
func getCompressibleString(size int) string {
|
||||
// Get pseudorandom bytes
|
||||
prbytes := make([]byte, size*3/4+16)
|
||||
prsource := rand.New(rand.NewSource(0))
|
||||
prsource.Read(prbytes)
|
||||
// Encode in base64
|
||||
encoding := base64.NewEncoding("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/")
|
||||
return encoding.EncodeToString(prbytes)[:size]
|
||||
}
|
||||
|
||||
func TestCompression(t *testing.T) {
|
||||
testCases := []string{"lz4", "gzip", "xz"}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
testSmallLarge(t, tc)
|
||||
})
|
||||
}
|
||||
}
|
@ -13,9 +13,11 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/buengese/sgzip"
|
||||
"github.com/gabriel-vasile/mimetype"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
@ -27,23 +29,36 @@ import (
|
||||
"github.com/rclone/rclone/fs/fspath"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
// Used for Rcat
|
||||
)
|
||||
|
||||
// Globals
|
||||
const (
|
||||
initialChunkSize = 262144 // Initial and max sizes of chunks when reading parts of the file. Currently
|
||||
maxChunkSize = 8388608 // at 256KB and 8 MB.
|
||||
|
||||
bufferSize = 8388608
|
||||
heuristicBytes = 1048576
|
||||
|
||||
metaFileExt = ".meta"
|
||||
uncompressedFileExt = ".bin"
|
||||
)
|
||||
|
||||
// Compression modes
|
||||
const (
|
||||
Uncompressed = 0
|
||||
Gzip = 2
|
||||
)
|
||||
|
||||
var unconpressibleRegexp = regexp.MustCompile("(^(video|image|audio)/.*)|(^.*?/(x-7z-compressed|zip|gzip|x-rar-compressed|zstd|x-xz|lzip|warc))")
|
||||
|
||||
// Register with Fs
|
||||
func init() {
|
||||
// Build compression mode options. Show XZ options only if they're supported on the current system.
|
||||
compressionModeOptions := []fs.OptionExample{{ // Default compression mode options
|
||||
Value: "lz4",
|
||||
Help: "Fast, real-time compression with reasonable compression ratios.",
|
||||
}, {
|
||||
Value: "gzip",
|
||||
Help: "Standard gzip compression with fastest parameters.",
|
||||
}, {
|
||||
Value: "xz",
|
||||
Help: "Standard xz compression with fastest parameters.",
|
||||
},
|
||||
// Build compression mode options.
|
||||
compressionModeOptions := []fs.OptionExample{
|
||||
{ // Default compression mode options {
|
||||
Value: "gzip",
|
||||
Help: "Standard gzip compression with fastest parameters.",
|
||||
},
|
||||
}
|
||||
|
||||
// Register our remote
|
||||
@ -60,24 +75,33 @@ func init() {
|
||||
Help: "Compression mode.",
|
||||
Default: "gzip",
|
||||
Examples: compressionModeOptions,
|
||||
}, {
|
||||
Name: "compression_level",
|
||||
Help: "gzip compression level -2 to 9",
|
||||
Default: sgzip.DefaultCompression,
|
||||
Advanced: true,
|
||||
}},
|
||||
})
|
||||
}
|
||||
|
||||
// Constants
|
||||
const bufferSize = 8388608 // Size of buffer when compressing or decompressing the entire file.
|
||||
// Larger size means more multithreading with larger block sizes and thread counts.
|
||||
// Currently at 8MB.
|
||||
const initialChunkSize = 262144 // Initial and max sizes of chunks when reading parts of the file. Currently
|
||||
const maxChunkSize = 8388608 // at 256KB and 8 MB.
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
Remote string `config:"remote"`
|
||||
CompressionMode string `config:"compression_mode"`
|
||||
CompressionLevel int `config:"compression_level"`
|
||||
}
|
||||
|
||||
const metaFileExt = ".meta"
|
||||
const uncompressedFileExt = ".bin"
|
||||
/*** FILESYSTEM FUNCTIONS ***/
|
||||
|
||||
// newCompressionForConfig constructs a Compression object for the given config name
|
||||
func newCompressionForConfig(opt *Options) (*Compression, error) {
|
||||
c, err := NewCompressionPreset(opt.CompressionMode)
|
||||
return c, err
|
||||
// Fs represents a wrapped fs.Fs
|
||||
type Fs struct {
|
||||
fs.Fs
|
||||
wrapper fs.Fs
|
||||
name string
|
||||
root string
|
||||
opt Options
|
||||
mode int // compression mode id
|
||||
features *fs.Features // optional features
|
||||
}
|
||||
|
||||
// NewFs contstructs an Fs from the path, container:path
|
||||
@ -99,16 +123,12 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||
return nil, errors.Wrapf(err, "failed to parse remote %q to wrap", remote)
|
||||
}
|
||||
|
||||
c, err := newCompressionForConfig(opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Strip trailing slashes if they exist in rpath
|
||||
rpath = strings.TrimRight(rpath, "\\/")
|
||||
|
||||
// First, check for a file
|
||||
// If a metadata file was found, return an error. Otherwise, check for a directory
|
||||
remotePath := fspath.JoinRootPath(wPath, generateMetadataName(rpath))
|
||||
remotePath := fspath.JoinRootPath(wPath, makeMetadataName(rpath))
|
||||
wrappedFs, err := wInfo.NewFs(wName, remotePath, wConfig)
|
||||
if err != fs.ErrorIsFile {
|
||||
remotePath = fspath.JoinRootPath(wPath, rpath)
|
||||
@ -124,7 +144,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||
name: name,
|
||||
root: rpath,
|
||||
opt: *opt,
|
||||
c: c,
|
||||
mode: compressionModeFromName(opt.CompressionMode),
|
||||
}
|
||||
// the features here are ones we could support, and they are
|
||||
// ANDed with the ones from wrappedFs
|
||||
@ -140,6 +160,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||
}).Fill(f).Mask(wrappedFs).WrapsFs(f, wrappedFs)
|
||||
// We support reading MIME types no matter the wrapped fs
|
||||
f.features.ReadMimeType = true
|
||||
// We can only support putstream if we have serverside copy or move
|
||||
if wrappedFs.Features().Move == nil && wrappedFs.Features().Copy == nil {
|
||||
f.features.PutStream = nil
|
||||
}
|
||||
@ -147,6 +168,15 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
|
||||
return f, err
|
||||
}
|
||||
|
||||
func compressionModeFromName(name string) int {
|
||||
switch name {
|
||||
case "gzip":
|
||||
return Gzip
|
||||
default:
|
||||
return Uncompressed
|
||||
}
|
||||
}
|
||||
|
||||
// Converts an int64 to hex
|
||||
func int64ToHex(number int64) string {
|
||||
intBytes := make([]byte, 8)
|
||||
@ -192,7 +222,7 @@ func processFileName(compressedFileName string) (origFileName string, extension
|
||||
}
|
||||
|
||||
// Generates the file name for a metadata file
|
||||
func generateMetadataName(remote string) (newRemote string) {
|
||||
func makeMetadataName(remote string) (newRemote string) {
|
||||
return remote + metaFileExt
|
||||
}
|
||||
|
||||
@ -202,43 +232,20 @@ func isMetadataFile(filename string) bool {
|
||||
}
|
||||
|
||||
// Generates the file name for a data file
|
||||
func (c *Compression) generateDataName(remote string, size int64, compressed bool) (newRemote string) {
|
||||
if compressed {
|
||||
newRemote = remote + int64ToHex(size) + c.GetFileExtension()
|
||||
func makeDataName(remote string, size int64, mode int) (newRemote string) {
|
||||
if mode > 0 {
|
||||
newRemote = remote + int64ToHex(size) + ".gz"
|
||||
} else {
|
||||
newRemote = remote + uncompressedFileExt
|
||||
}
|
||||
return newRemote
|
||||
}
|
||||
|
||||
// Generates the file name from a compression mode
|
||||
func generateDataNameFromCompressionMode(remote string, size int64, mode int) (newRemote string) {
|
||||
if mode != Uncompressed {
|
||||
c, _ := NewCompressionPresetNumber(mode)
|
||||
newRemote = c.generateDataName(remote, size, true)
|
||||
} else {
|
||||
newRemote = remote + uncompressedFileExt
|
||||
func (f *Fs) dataName(remote string, size int64, compressed bool) (name string) {
|
||||
if !compressed {
|
||||
return makeDataName(remote, size, Uncompressed)
|
||||
}
|
||||
return newRemote
|
||||
}
|
||||
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
Remote string `config:"remote"`
|
||||
CompressionMode string `config:"compression_mode"`
|
||||
}
|
||||
|
||||
/*** FILESYSTEM FUNCTIONS ***/
|
||||
|
||||
// Fs represents a wrapped fs.Fs
|
||||
type Fs struct {
|
||||
fs.Fs
|
||||
wrapper fs.Fs
|
||||
name string
|
||||
root string
|
||||
opt Options
|
||||
features *fs.Features // optional features
|
||||
c *Compression
|
||||
return makeDataName(remote, size, f.mode)
|
||||
}
|
||||
|
||||
// Get an Object from a data DirEntry
|
||||
@ -251,7 +258,7 @@ func (f *Fs) addData(entries *fs.DirEntries, o fs.Object) {
|
||||
if size == -2 { // File is uncompressed
|
||||
size = o.Size()
|
||||
}
|
||||
metaName := generateMetadataName(origFileName)
|
||||
metaName := makeMetadataName(origFileName)
|
||||
*entries = append(*entries, f.newObjectSizeAndNameOnly(o, metaName, size))
|
||||
}
|
||||
|
||||
@ -271,9 +278,6 @@ func (f *Fs) processEntries(entries fs.DirEntries) (newEntries fs.DirEntries, er
|
||||
for _, entry := range entries {
|
||||
switch x := entry.(type) {
|
||||
case fs.Object:
|
||||
// if isMetadataFile(x.Remote()) {
|
||||
// f.addMeta(&newEntries, x) // Only care about metadata files; non-metadata files are redundant.
|
||||
// }
|
||||
if !isMetadataFile(x.Remote()) {
|
||||
f.addData(&newEntries, x) // Only care about data files for now; metadata files are redundant.
|
||||
}
|
||||
@ -333,7 +337,7 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
|
||||
// NewObject finds the Object at remote.
|
||||
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||
// Read metadata from metadata object
|
||||
mo, err := f.Fs.NewObject(ctx, generateMetadataName(remote))
|
||||
mo, err := f.Fs.NewObject(ctx, makeMetadataName(remote))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -342,34 +346,32 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
|
||||
return nil, errors.New("error decoding metadata")
|
||||
}
|
||||
// Create our Object
|
||||
o, err := f.Fs.NewObject(ctx, generateDataNameFromCompressionMode(remote, meta.Size, meta.CompressionMode))
|
||||
o, err := f.Fs.NewObject(ctx, makeDataName(remote, meta.Size, meta.Mode))
|
||||
return f.newObject(o, mo, meta), err
|
||||
}
|
||||
|
||||
// Checks the compressibility and mime type of a file. Returns a rewinded reader, whether the file is compressible, and an error code.
|
||||
func (c *Compression) checkFileCompressibilityAndType(in io.Reader) (newReader io.Reader, compressible bool, mimeType string, err error) {
|
||||
// Unwrap accounting, get compressibility of file, rewind reader, then wrap accounting back on
|
||||
// findMimeType attempts to find the mime type of the object so we can determine compressibility
|
||||
// returns a multireader with the bytes that were read to determine mime type
|
||||
func findMimeType(in io.Reader) (newReader io.Reader, mimeType string, err error) {
|
||||
in, wrap := accounting.UnWrap(in)
|
||||
var b bytes.Buffer
|
||||
_, err = io.CopyN(&b, in, c.HeuristicBytes)
|
||||
_, err = io.CopyN(&b, in, heuristicBytes)
|
||||
if err != nil && err != io.EOF {
|
||||
return nil, false, "", err
|
||||
}
|
||||
compressible, _, err = c.GetFileCompressionInfo(bytes.NewReader(b.Bytes()))
|
||||
if err != nil {
|
||||
return nil, false, "", err
|
||||
return nil, "", err
|
||||
}
|
||||
mime := mimetype.Detect(b.Bytes())
|
||||
in = io.MultiReader(bytes.NewReader(b.Bytes()), in)
|
||||
in = wrap(in)
|
||||
return in, compressible, mime.String(), nil
|
||||
return wrap(in), mime.String(), nil
|
||||
}
|
||||
|
||||
func isCompressible(mime string) bool {
|
||||
return !unconpressibleRegexp.MatchString(mime)
|
||||
}
|
||||
|
||||
// Verifies an object hash
|
||||
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) (err error) {
|
||||
func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.MultiHasher, ht hash.Type) error {
|
||||
srcHash := hasher.Sums()[ht]
|
||||
var dstHash string
|
||||
dstHash, err = o.Hash(ctx, ht)
|
||||
dstHash, err := o.Hash(ctx, ht)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to read destination hash")
|
||||
}
|
||||
@ -386,9 +388,9 @@ func (f *Fs) verifyObjectHash(ctx context.Context, o fs.Object, hasher *hash.Mul
|
||||
|
||||
type putFn func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error)
|
||||
|
||||
type blockDataAndError struct {
|
||||
err error
|
||||
blockData []uint32
|
||||
type compressionResult struct {
|
||||
err error
|
||||
meta sgzip.GzipMetadata
|
||||
}
|
||||
|
||||
// Put a compressed version of a file. Returns a wrappable object and metadata.
|
||||
@ -401,21 +403,29 @@ func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, o
|
||||
in = io.TeeReader(in, metaHasher)
|
||||
|
||||
// Compress the file
|
||||
var wrappedIn io.Reader
|
||||
pipeReader, pipeWriter := io.Pipe()
|
||||
compressionResult := make(chan blockDataAndError)
|
||||
results := make(chan compressionResult)
|
||||
go func() {
|
||||
blockData, err := f.c.CompressFileReturningBlockData(in, pipeWriter)
|
||||
closeErr := pipeWriter.Close()
|
||||
if closeErr != nil {
|
||||
fs.Errorf(nil, "Failed to close compression pipe: %v", err)
|
||||
gz, err := sgzip.NewWriterLevel(pipeWriter, f.opt.CompressionLevel)
|
||||
if err != nil {
|
||||
results <- compressionResult{err: err, meta: sgzip.GzipMetadata{}}
|
||||
return
|
||||
}
|
||||
_, err = io.Copy(gz, in)
|
||||
gzErr := gz.Close()
|
||||
if gzErr != nil {
|
||||
fs.Errorf(nil, "Failed to close compress: %v", gzErr)
|
||||
if err == nil {
|
||||
err = closeErr
|
||||
err = gzErr
|
||||
}
|
||||
}
|
||||
compressionResult <- blockDataAndError{err: err, blockData: blockData}
|
||||
closeErr := pipeWriter.Close()
|
||||
if closeErr != nil {
|
||||
fs.Errorf(nil, "Failed to close pipe: %v", closeErr)
|
||||
}
|
||||
results <- compressionResult{err: err, meta: gz.MetaData()}
|
||||
}()
|
||||
wrappedIn = wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Bufio required for multithreading
|
||||
wrappedIn := wrap(bufio.NewReaderSize(pipeReader, bufferSize)) // Probably no longer needed as sgzip has it's own buffering
|
||||
|
||||
// If verifyCompressedObject is on, find a hash the destination supports to compute a hash of
|
||||
// the compressed data.
|
||||
@ -435,8 +445,7 @@ func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, o
|
||||
}
|
||||
|
||||
// Transfer the data
|
||||
//o, err := put(ctx, wrappedIn, f.wrapInfo(src, f.c.generateDataName(src.Remote(), src.Size(), true), src.Size()), options...)
|
||||
o, err := operations.Rcat(ctx, f.Fs, f.c.generateDataName(src.Remote(), src.Size(), true), ioutil.NopCloser(wrappedIn), src.ModTime(ctx))
|
||||
o, err := operations.Rcat(ctx, f.Fs, makeDataName(src.Remote(), src.Size(), f.mode), ioutil.NopCloser(wrappedIn), src.ModTime(ctx))
|
||||
if err != nil {
|
||||
if o != nil {
|
||||
removeErr := o.Remove(ctx)
|
||||
@ -447,7 +456,7 @@ func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, o
|
||||
return nil, nil, err
|
||||
}
|
||||
// Check whether we got an error during compression
|
||||
result := <-compressionResult
|
||||
result := <-results
|
||||
err = result.err
|
||||
if err != nil {
|
||||
if o != nil {
|
||||
@ -460,13 +469,11 @@ func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, o
|
||||
}
|
||||
|
||||
// Generate metadata
|
||||
blockData := result.blockData
|
||||
_, _, decompressedSize := parseBlockData(blockData, f.c.BlockSize)
|
||||
meta := newMetadata(decompressedSize, f.c.CompressionMode, blockData, metaHasher.Sum(nil), mimeType)
|
||||
meta := newMetadata(result.meta.Size, f.mode, result.meta, metaHasher.Sum(nil), mimeType)
|
||||
|
||||
// Check the hashes of the compressed data if we were comparing them
|
||||
if ht != hash.None && hasher != nil {
|
||||
err := f.verifyObjectHash(ctx, o, hasher, ht)
|
||||
err = f.verifyObjectHash(ctx, o, hasher, ht)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@ -479,28 +486,23 @@ func (f *Fs) putCompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, o
|
||||
func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo, options []fs.OpenOption, put putFn, mimeType string, verifyCompressedObject bool) (fs.Object, *ObjectMetadata, error) {
|
||||
// Unwrap the accounting, add our metadata hasher, then wrap it back on
|
||||
in, wrap := accounting.UnWrap(in)
|
||||
metaHasher := md5.New()
|
||||
|
||||
hs := hash.NewHashSet(hash.MD5)
|
||||
ht := f.Fs.Hashes().GetOne()
|
||||
if verifyCompressedObject {
|
||||
if !hs.Contains(ht) {
|
||||
hs.Add(ht)
|
||||
}
|
||||
}
|
||||
metaHasher, err := hash.NewMultiHasherTypes(hs)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
in = io.TeeReader(in, metaHasher)
|
||||
wrappedIn := wrap(in)
|
||||
// If verifyCompressedObject is on, find a hash the destination supports to compute a hash of
|
||||
// the compressed data.
|
||||
ht := f.Fs.Hashes().GetOne()
|
||||
var hasher *hash.MultiHasher
|
||||
var err error
|
||||
if ht != hash.None && verifyCompressedObject {
|
||||
// unwrap the accounting again
|
||||
wrappedIn, wrap = accounting.UnWrap(wrappedIn)
|
||||
hasher, err = hash.NewMultiHasherTypes(hash.NewHashSet(ht))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// add the hasher and re-wrap the accounting
|
||||
wrappedIn = io.TeeReader(wrappedIn, hasher)
|
||||
wrappedIn = wrap(wrappedIn)
|
||||
}
|
||||
|
||||
// Put the object
|
||||
o, err := put(ctx, wrappedIn, f.wrapInfo(src, f.c.generateDataName(src.Remote(), src.Size(), false), src.Size()), options...)
|
||||
//o, err := operations.Rcat(f, f.c.generateDataName(src.Remote(), src.Size(), false), wrappedIn, src.ModTime())
|
||||
o, err := put(ctx, wrappedIn, f.wrapInfo(src, makeDataName(src.Remote(), src.Size(), Uncompressed), src.Size()), options...)
|
||||
if err != nil {
|
||||
if o != nil {
|
||||
removeErr := o.Remove(ctx)
|
||||
@ -511,14 +513,19 @@ func (f *Fs) putUncompress(ctx context.Context, in io.Reader, src fs.ObjectInfo,
|
||||
return nil, nil, err
|
||||
}
|
||||
// Check the hashes of the compressed data if we were comparing them
|
||||
if ht != hash.None && hasher != nil {
|
||||
err := f.verifyObjectHash(ctx, o, hasher, ht)
|
||||
if ht != hash.None && verifyCompressedObject {
|
||||
err := f.verifyObjectHash(ctx, o, metaHasher, ht)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Return our object and metadata
|
||||
return o, newMetadata(o.Size(), Uncompressed, []uint32{}, metaHasher.Sum([]byte{}), mimeType), nil
|
||||
sum, err := metaHasher.Sum(hash.MD5)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return o, newMetadata(o.Size(), Uncompressed, sgzip.GzipMetadata{}, sum, mimeType), nil
|
||||
}
|
||||
|
||||
// This function will write a metadata struct to a metadata Object for an src. Returns a wrappable metadata object.
|
||||
@ -538,7 +545,7 @@ func (f *Fs) putMetadata(ctx context.Context, meta *ObjectMetadata, src fs.Objec
|
||||
metaReader := bytes.NewReader(b.Bytes())
|
||||
|
||||
// Put the data
|
||||
mo, err = put(ctx, metaReader, f.wrapInfo(src, generateMetadataName(src.Remote()), int64(b.Len())), options...)
|
||||
mo, err = put(ctx, metaReader, f.wrapInfo(src, makeMetadataName(src.Remote()), int64(b.Len())), options...)
|
||||
if err != nil {
|
||||
removeErr := mo.Remove(ctx)
|
||||
if removeErr != nil {
|
||||
@ -592,11 +599,11 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
|
||||
o, err := f.NewObject(ctx, src.Remote())
|
||||
if err == fs.ErrorObjectNotFound {
|
||||
// Get our file compressibility
|
||||
in, compressible, mimeType, err := f.c.checkFileCompressibilityAndType(in)
|
||||
in, mimeType, err := findMimeType(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f.putWithCustomFunctions(ctx, in, src, options, f.Fs.Put, f.Fs.Put, compressible, mimeType, true)
|
||||
return f.putWithCustomFunctions(ctx, in, src, options, f.Fs.Put, f.Fs.Put, isCompressible(mimeType), mimeType, true)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -612,10 +619,11 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
||||
}
|
||||
found := err == nil
|
||||
|
||||
in, compressible, mimeType, err := f.c.checkFileCompressibilityAndType(in)
|
||||
in, mimeType, err := findMimeType(in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
compressible := isCompressible(mimeType)
|
||||
newObj, err := f.putWithCustomFunctions(ctx, in, src, options, f.Fs.Features().PutStream, f.Fs.Put, compressible, mimeType, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -633,16 +641,18 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
||||
moveFs, ok := f.Fs.(fs.Mover)
|
||||
var wrapObj fs.Object
|
||||
if ok {
|
||||
wrapObj, err = moveFs.Move(ctx, newObj.Object, f.c.generateDataName(src.Remote(), newObj.size, compressible))
|
||||
wrapObj, err = moveFs.Move(ctx, newObj.Object, f.dataName(src.Remote(), newObj.size, compressible))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Couldn't rename streamed object.")
|
||||
}
|
||||
newObj.Object = wrapObj
|
||||
return newObj, nil
|
||||
}
|
||||
|
||||
// If we don't have move we'll need to resort to serverside copy and remove
|
||||
copyFs, ok := f.Fs.(fs.Copier)
|
||||
if ok {
|
||||
wrapObj, err := copyFs.Copy(ctx, newObj.Object, f.c.generateDataName(src.Remote(), newObj.size, compressible))
|
||||
wrapObj, err := copyFs.Copy(ctx, newObj.Object, f.dataName(src.Remote(), newObj.size, compressible))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Could't copy streamed object.")
|
||||
}
|
||||
@ -652,9 +662,7 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
||||
return wrapObj, errors.Wrap(err, "Couldn't remove original streamed object. Remote may be in an incositent state.")
|
||||
}
|
||||
}
|
||||
|
||||
newObj.Object = wrapObj
|
||||
|
||||
return newObj, nil
|
||||
}
|
||||
|
||||
@ -665,15 +673,6 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
|
||||
//
|
||||
// This will create a duplicate if we upload a new file without
|
||||
// checking to see if there is one already - use Put() for that.
|
||||
/*func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
// If PutUnchecked is supported, do it.
|
||||
// I'm unsure about this. With the current metadata model this might actually break things. Needs some manual testing.
|
||||
do := f.Fs.Features().PutUnchecked
|
||||
if do == nil {
|
||||
return nil, errors.New("can't PutUnchecked")
|
||||
}
|
||||
return f.putWithCustomFunctions(ctx, in, src, options, do, do, false)
|
||||
}*/
|
||||
|
||||
// Hashes returns the supported hash sets.
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
@ -744,13 +743,13 @@ func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newFilename := generateMetadataName(remote)
|
||||
newFilename := makeMetadataName(remote)
|
||||
moResult, err := do(ctx, o.mo, newFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Copy over data
|
||||
newFilename = generateDataNameFromCompressionMode(remote, src.Size(), o.meta.CompressionMode)
|
||||
newFilename = makeDataName(remote, src.Size(), o.meta.Mode)
|
||||
oResult, err := do(ctx, o.Object, newFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -794,14 +793,14 @@ func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newFilename := generateMetadataName(remote)
|
||||
newFilename := makeMetadataName(remote)
|
||||
moResult, err := do(ctx, o.mo, newFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Move data
|
||||
newFilename = generateDataNameFromCompressionMode(remote, src.Size(), o.meta.CompressionMode)
|
||||
newFilename = makeDataName(remote, src.Size(), o.meta.Mode)
|
||||
oResult, err := do(ctx, o.Object, newFilename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -908,7 +907,7 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
|
||||
case fs.EntryObject:
|
||||
// Note: All we really need to do to monitor the object is to check whether the metadata changed,
|
||||
// as the metadata contains the hash. This will work unless there's a hash collision and the sizes stay the same.
|
||||
wrappedPath = generateMetadataName(path)
|
||||
wrappedPath = makeMetadataName(path)
|
||||
default:
|
||||
fs.Errorf(path, "press ChangeNotify: ignoring unknown EntryType %d", entryType)
|
||||
return
|
||||
@ -936,11 +935,11 @@ func (f *Fs) PublicLink(ctx context.Context, remote string, duration fs.Duration
|
||||
|
||||
// ObjectMetadata describes the metadata for an Object.
|
||||
type ObjectMetadata struct {
|
||||
Size int64 // Uncompressed size of the file.
|
||||
CompressionMode int // Compression mode of the file.
|
||||
BlockData []uint32 // Block indexing data for the file.
|
||||
Hash []byte // MD5 hash of the file.
|
||||
MimeType string // Mime type of the file
|
||||
Size int64 // Uncompressed size of the file.
|
||||
Mode int // Compression mode of the file.
|
||||
Hash []byte // MD5 hash of the file.
|
||||
MimeType string // Mime type of the file
|
||||
CompressionMetadata sgzip.GzipMetadata
|
||||
}
|
||||
|
||||
// Object with external metadata
|
||||
@ -954,11 +953,11 @@ type Object struct {
|
||||
}
|
||||
|
||||
// This function generates a metadata object
|
||||
func newMetadata(size int64, compressionMode int, blockData []uint32, hash []byte, mimeType string) *ObjectMetadata {
|
||||
func newMetadata(size int64, mode int, cmeta sgzip.GzipMetadata, hash []byte, mimeType string) *ObjectMetadata {
|
||||
meta := new(ObjectMetadata)
|
||||
meta.Size = size
|
||||
meta.CompressionMode = compressionMode
|
||||
meta.BlockData = blockData
|
||||
meta.Mode = mode
|
||||
meta.CompressionMetadata = cmeta
|
||||
meta.Hash = hash
|
||||
meta.MimeType = mimeType
|
||||
return meta
|
||||
@ -1042,35 +1041,20 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||
return o.mo, o.mo.Update(ctx, in, src, options...)
|
||||
}
|
||||
|
||||
// Get our file compressibility
|
||||
in, compressible, mimeType, err := o.f.c.checkFileCompressibilityAndType(in)
|
||||
in, mimeType, err := findMimeType(in)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
compressible := isCompressible(mimeType)
|
||||
|
||||
// Since we're encoding the original filesize in the name we'll need to make sure that this name is updated before the actual update
|
||||
// Since we are storing the filesize in the name the new object may have different name than the old
|
||||
// We'll make sure to delete the old object in this case
|
||||
var newObject *Object
|
||||
origName := o.Remote()
|
||||
if o.meta.CompressionMode != Uncompressed || compressible {
|
||||
// If we aren't, we must either move-then-update or reupload-then-remove the object, and update the metadata.
|
||||
// Check if this FS supports moving
|
||||
moveFs, ok := o.f.Fs.(fs.Mover)
|
||||
if ok { // If this fs supports moving, use move-then-update. This may help keep some versioning alive.
|
||||
// First, move the object
|
||||
var movedObject fs.Object
|
||||
movedObject, err = moveFs.Move(ctx, o.Object, o.f.c.generateDataName(o.Remote(), src.Size(), compressible))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Create function that updates moved object, then update
|
||||
update := func(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
|
||||
return movedObject, movedObject.Update(ctx, in, src, options...)
|
||||
}
|
||||
newObject, err = o.f.putWithCustomFunctions(ctx, in, src, options, update, updateMeta, compressible, mimeType, true)
|
||||
} else { // If this fs does not support moving, fall back to reuploading the object then removing the old one.
|
||||
newObject, err = o.f.putWithCustomFunctions(ctx, in, o.f.wrapInfo(src, origName, src.Size()), options, o.f.Fs.Put, updateMeta, compressible, mimeType, true)
|
||||
removeErr := o.Object.Remove(ctx) // Note: We must do remove later so a failed update doesn't destroy data.
|
||||
if removeErr != nil {
|
||||
if o.meta.Mode != Uncompressed || compressible {
|
||||
newObject, err = o.f.putWithCustomFunctions(ctx, in, o.f.wrapInfo(src, origName, src.Size()), options, o.f.Fs.Put, updateMeta, compressible, mimeType, true)
|
||||
if newObject.Object.Remote() != o.Object.Remote() {
|
||||
if removeErr := o.Object.Remove(ctx); removeErr != nil {
|
||||
return removeErr
|
||||
}
|
||||
}
|
||||
@ -1220,7 +1204,7 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
|
||||
return nil, err
|
||||
}
|
||||
// If we're uncompressed, just pass this to the underlying object
|
||||
if o.meta.CompressionMode == Uncompressed {
|
||||
if o.meta.Mode == Uncompressed {
|
||||
return o.Object.Open(ctx, options...)
|
||||
}
|
||||
// Get offset and limit from OpenOptions, pass the rest to the underlying remote
|
||||
@ -1239,27 +1223,21 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
|
||||
// Get a chunkedreader for the wrapped object
|
||||
chunkedReader := chunkedreader.New(ctx, o.Object, initialChunkSize, maxChunkSize)
|
||||
// Get file handle
|
||||
c, err := NewCompressionPresetNumber(o.meta.CompressionMode)
|
||||
var file io.Reader
|
||||
if offset != 0 {
|
||||
file, err = sgzip.NewReaderAt(chunkedReader, &o.meta.CompressionMetadata, offset)
|
||||
} else {
|
||||
file, err = sgzip.NewReader(chunkedReader)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
FileHandle, _, err := c.DecompressFileExtData(chunkedReader, o.Object.Size(), o.meta.BlockData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Seek and limit according to the options given
|
||||
// Note: This if statement is not required anymore because all 0-size files will be uncompressed. I'm leaving this here just in case I come back here debugging.
|
||||
if offset != 0 { // Note: this if statement is only required because seeking to 0 on a 0-size file makes chunkedReader complain about an "invalid seek position".
|
||||
_, err = FileHandle.Seek(offset, io.SeekStart)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
var fileReader io.Reader
|
||||
if limit != -1 {
|
||||
fileReader = io.LimitReader(FileHandle, limit)
|
||||
fileReader = io.LimitReader(file, limit)
|
||||
} else {
|
||||
fileReader = FileHandle
|
||||
fileReader = file
|
||||
}
|
||||
// Return a ReadCloser
|
||||
return combineReaderAndCloser(fileReader, chunkedReader), nil
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
_ "github.com/rclone/rclone/backend/dropbox"
|
||||
_ "github.com/rclone/rclone/backend/local"
|
||||
"github.com/rclone/rclone/fstest"
|
||||
"github.com/rclone/rclone/fstest/fstests"
|
||||
@ -35,37 +36,6 @@ func TestIntegration(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestRemoteLz4 tests LZ4 compression
|
||||
func TestRemoteLz4(t *testing.T) {
|
||||
if *fstest.RemoteName != "" {
|
||||
t.Skip("Skipping as -remote set")
|
||||
}
|
||||
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-lz4")
|
||||
name := "TestPressLz4"
|
||||
fstests.Run(t, &fstests.Opt{
|
||||
RemoteName: name + ":",
|
||||
NilObject: (*Object)(nil),
|
||||
UnimplementableFsMethods: []string{
|
||||
"OpenWriterAt",
|
||||
"MergeDirs",
|
||||
"DirCacheFlush",
|
||||
"PutUnchecked",
|
||||
"PutStream",
|
||||
"UserInfo",
|
||||
"Disconnect",
|
||||
},
|
||||
UnimplementableObjectMethods: []string{
|
||||
"GetTier",
|
||||
"SetTier",
|
||||
},
|
||||
ExtraConfig: []fstests.ExtraConfigItem{
|
||||
{Name: name, Key: "type", Value: "press"},
|
||||
{Name: name, Key: "remote", Value: tempdir},
|
||||
{Name: name, Key: "compression_mode", Value: "lz4"},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// TestRemoteGzip tests GZIP compression
|
||||
func TestRemoteGzip(t *testing.T) {
|
||||
if *fstest.RemoteName != "" {
|
||||
@ -96,34 +66,3 @@ func TestRemoteGzip(t *testing.T) {
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// TestRemoteXz tests XZ compression
|
||||
func TestRemoteXz(t *testing.T) {
|
||||
if *fstest.RemoteName != "" {
|
||||
t.Skip("Skipping as -remote set")
|
||||
}
|
||||
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-xz")
|
||||
name := "TestPressXz"
|
||||
fstests.Run(t, &fstests.Opt{
|
||||
RemoteName: name + ":",
|
||||
NilObject: (*Object)(nil),
|
||||
UnimplementableFsMethods: []string{
|
||||
"OpenWriterAt",
|
||||
"MergeDirs",
|
||||
"DirCacheFlush",
|
||||
"PutUnchecked",
|
||||
"PutStream",
|
||||
"UserInfo",
|
||||
"Disconnect",
|
||||
},
|
||||
UnimplementableObjectMethods: []string{
|
||||
"GetTier",
|
||||
"SetTier",
|
||||
},
|
||||
ExtraConfig: []fstests.ExtraConfigItem{
|
||||
{Name: name, Key: "type", Value: "press"},
|
||||
{Name: name, Key: "remote", Value: tempdir},
|
||||
{Name: name, Key: "compression_mode", Value: "xz"},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user