press: Added experimental compression remote - implements #2098, #1356, #675

This commit is contained in:
id01 2019-06-14 14:00:46 -07:00 committed by buengese
parent 366e0e18cd
commit e41a88fb23
9 changed files with 2480 additions and 0 deletions

1
backend/press/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
test

98
backend/press/alg_exec.go Normal file
View File

@ -0,0 +1,98 @@
package press
// This file implements shell exec algorithms that require binaries.
import (
"bytes"
"io"
"os/exec"
)
// XZ command
const xzcommand = "xz" // Name of xz binary (if available)
// ExecHeader - Header we add to an exec file. We don't need this.
var ExecHeader = []byte{}
// Function that checks whether XZ is present in the system
func checkXZ() bool {
_, err := exec.LookPath("xz")
if err != nil {
return false
}
return true
}
// Function that gets binary paths if needed
func getBinPaths(c *Compression, mode int) (err error) {
err = nil
if mode == XZMin || mode == XZDefault {
c.BinPath, err = exec.LookPath(xzcommand)
}
return err
}
// Function that compresses a block using a shell command without wrapping in gzip. Requires an binary corresponding with the command.
func (c *Compression) compressBlockExec(in []byte, out io.Writer, binaryPath string, args []string) (compressedSize uint32, uncompressedSize int64, err error) {
// Initialize compression subprocess
subprocess := exec.Command(binaryPath, args...)
stdin, err := subprocess.StdinPipe()
if err != nil {
return 0, 0, err
}
// Run subprocess that creates compressed file
stdinError := make(chan error)
go func() {
_, err := stdin.Write(in)
_ = stdin.Close()
stdinError <- err
}()
// Get output
output, err := subprocess.Output()
if err != nil {
return 0, 0, err
}
// Copy over
n, err := io.Copy(out, bytes.NewReader(output))
if err != nil {
return uint32(n), int64(len(in)), err
}
// Check if there was an error and return
err = <-stdinError
return uint32(n), int64(len(in)), err
}
// Utility function to decompress a block range using a shell command which wasn't wrapped in gzip
func decompressBlockRangeExec(in io.Reader, out io.Writer, binaryPath string, args []string) (n int, err error) {
// Decompress actual compression
// Initialize decompression subprocess
subprocess := exec.Command(binaryPath, args...)
stdin, err := subprocess.StdinPipe()
if err != nil {
return 0, err
}
// Run subprocess that copies over compressed block
stdinError := make(chan error)
go func() {
_, err := io.Copy(stdin, in)
_ = stdin.Close()
stdinError <- err
}()
// Get output, copy, and return
output, err := subprocess.Output()
if err != nil {
return 0, err
}
n64, err := io.Copy(out, bytes.NewReader(output))
if err != nil {
return int(n64), err
}
err = <-stdinError
return int(n64), err
}

49
backend/press/alg_gzip.go Normal file
View File

@ -0,0 +1,49 @@
package press
// This file implements the gzip algorithm.
import (
"bufio"
"compress/gzip"
"io"
)
// GzipHeader - Header we add to a gzip file. We're contatenating GZIP files here, so we don't need this.
var GzipHeader = []byte{}
// Function that compresses a block using gzip
func (c *Compression) compressBlockGz(in []byte, out io.Writer, compressionLevel int) (compressedSize uint32, uncompressedSize int64, err error) {
// Initialize buffer
bufw := bufio.NewWriterSize(out, int(c.maxCompressedBlockSize()))
// Initialize block writer
outw, err := gzip.NewWriterLevel(bufw, compressionLevel)
if err != nil {
return 0, 0, err
}
// Compress block
_, err = outw.Write(in)
if err != nil {
return 0, 0, err
}
// Finalize gzip file, flush buffer and return
err = outw.Close()
if err != nil {
return 0, 0, err
}
blockSize := uint32(bufw.Buffered())
err = bufw.Flush()
return blockSize, int64(len(in)), err
}
// Utility function to decompress a block range using gzip
func decompressBlockRangeGz(in io.Reader, out io.Writer) (n int, err error) {
gzipReader, err := gzip.NewReader(in)
if err != nil {
return 0, err
}
written, err := io.Copy(out, gzipReader)
return int(written), err
}

95
backend/press/alg_lz4.go Normal file
View File

@ -0,0 +1,95 @@
package press
// This file implements the LZ4 algorithm.
import (
"bytes"
"encoding/binary"
"errors"
"io"
"github.com/OneOfOne/xxhash"
lz4 "github.com/id01/go-lz4"
)
/*
Structure of LZ4 header:
Flags:
Version = 01
Independent = 1
Block Checksum = 1
Content Size = 0
Content Checksum = 0
Reserved = 0
Dictionary ID = 0
BD byte:
Reserved = 0
Block Max Size = 101 (or 5; 256kb)
Reserved = 0000
Header checksum byte (xxhash(flags and bd byte) >> 1) & 0xff
*/
// LZ4Header - Header of our LZ4 file
var LZ4Header = []byte{0x04, 0x22, 0x4d, 0x18, 0x70, 0x50, 0x84}
// LZ4Footer - Footer of our LZ4 file
var LZ4Footer = []byte{0x00, 0x00, 0x00, 0x00} // This is just an empty block
// Function that compresses a block using lz4
func (c *Compression) compressBlockLz4(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
// Write lz4 compressed data
compressedBytes, err := lz4.Encode(nil, in)
if err != nil {
return 0, 0, err
}
// Write compressed bytes
n1, err := out.Write(compressedBytes)
if err != nil {
return 0, 0, err
}
// Get checksum
h := xxhash.New32()
_, err = h.Write(compressedBytes[4:]) // The checksum doesn't include the size
if err != nil {
return 0, 0, err
}
checksum := make([]byte, 4)
binary.LittleEndian.PutUint32(checksum, h.Sum32())
n2, err := out.Write(checksum)
if err != nil {
return 0, 0, err
}
// Return sizes
return uint32(n1 + n2), int64(len(in)), err
}
// Utility function to decompress a block using LZ4
func decompressBlockLz4(in io.Reader, out io.Writer, BlockSize int64) (n int, err error) {
// Get our compressed data
var b bytes.Buffer
_, err = io.Copy(&b, in)
if err != nil {
return 0, err
}
// Add the length in byte form to the begining of the buffer. Because the length is not equal to BlockSize for the last block, the last block might screw this code up.
compressedBytesWithHash := b.Bytes()
compressedBytes := compressedBytesWithHash[:len(compressedBytesWithHash)-4]
hash := compressedBytesWithHash[len(compressedBytesWithHash)-4:]
// Verify, decode, write, and return
h := xxhash.New32()
_, err = h.Write(compressedBytes[4:])
if err != nil {
return 0, err
}
if binary.LittleEndian.Uint32(hash) != h.Sum32() {
return 0, errors.New("XXHash checksum invalid")
}
dst := make([]byte, BlockSize*2)
decompressed, err := lz4.Decode(dst, compressedBytes)
if err != nil {
return 0, err
}
_, err = out.Write(decompressed)
return len(decompressed), err
}

View File

@ -0,0 +1,35 @@
package press
// This file implements compression/decompression using snappy.
import (
"bytes"
"io"
"github.com/golang/snappy"
)
// SnappyHeader - Header we add to a snappy file. We don't need this.
var SnappyHeader = []byte{}
// Function that compresses a block using snappy
func (c *Compression) compressBlockSnappy(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
// Compress and return
outBytes := snappy.Encode(nil, in)
_, err = out.Write(outBytes)
return uint32(len(outBytes)), int64(len(in)), err
}
// Utility function to decompress a block using snappy
func decompressBlockSnappy(in io.Reader, out io.Writer) (n int, err error) {
var b bytes.Buffer
_, err = io.Copy(&b, in)
if err != nil {
return 0, err
}
decompressed, err := snappy.Decode(nil, b.Bytes())
if err != nil {
return 0, err
}
_, err = out.Write(decompressed)
return len(decompressed), err
}

View File

@ -0,0 +1,626 @@
// Package press provides wrappers for Fs and Object which implement compression.
// This file is the backend implementation for seekable compression.
package press
/*
NOTES:
Structure of the metadata we store is:
gzipExtraify(gzip([4-byte header size][4-byte block size] ... [4-byte block size][4-byte raw size of last block]))
This is appended to any compressed file, and is ignored as trailing garbage in our LZ4 and SNAPPY implementations, and seen as empty archives in our GZIP and XZ_IN_GZ implementations.
There are two possible compression/decompression function pairs to be used:
The two functions that store data internally are:
- Compression.CompressFileAppendingBlockData. Appends block data in extra data fields of empty gzip files at the end.
- DecompressFile. Reads block data from extra fields of these empty gzip files.
The two functions that require externally stored data are:
- Compression.CompressFileReturningBlockData. Returns a []uint32 containing raw (uncompressed and unencoded) block data, which must be externally stored.
- DecompressFileExtData. Takes in the []uint32 that was returned by Compression.CompressFileReturningBlockData
WARNING: These function pairs are incompatible with each other. Don't use CompressFileAppendingBlockData with DecompressFileExtData, or the other way around. It won't work.
*/
import (
"bufio"
"bytes"
"errors"
"io"
"io/ioutil"
"log"
)
// Compression modes
const (
Uncompressed = -1
GzipStore = 0
GzipMin = 1
GzipDefault = 2
GzipMax = 3
LZ4 = 4
Snappy = 5
XZMin = 6
XZDefault = 7
)
// Errors
var (
ErrMetadataCorrupted = errors.New("metadata may have been corrupted")
)
// DEBUG - flag for debug mode
const DEBUG = false
// Compression is a struct containing configurable variables (what used to be constants)
type Compression struct {
CompressionMode int // Compression mode
BlockSize uint32 // Size of blocks. Higher block size means better compression but more download bandwidth needed for small downloads
// ~1MB is recommended for xz, while ~128KB is recommended for gzip and lz4
HeuristicBytes int64 // Bytes to perform gzip heuristic on to determine whether a file should be compressed
NumThreads int // Number of threads to use for compression
MaxCompressionRatio float64 // Maximum compression ratio for a file to be considered compressible
BinPath string // Path to compression binary. This is used for all non-gzip compression.
}
// NewCompressionPreset creates a Compression object with a preset mode/bs
func NewCompressionPreset(preset string) (*Compression, error) {
switch preset {
case "gzip-store":
return NewCompression(GzipStore, 131070) // GZIP-store (dummy) compression
case "lz4":
return NewCompression(LZ4, 262140) // LZ4 compression (very fast)
case "snappy":
return NewCompression(Snappy, 262140) // Snappy compression (like LZ4, but slower and worse)
case "gzip-min":
return NewCompression(GzipMin, 131070) // GZIP-min compression (fast)
case "gzip-default":
return NewCompression(GzipDefault, 131070) // GZIP-default compression (medium)
case "xz-min":
return NewCompression(XZMin, 524288) // XZ-min compression (slow)
case "xz-default":
return NewCompression(XZDefault, 1048576) // XZ-default compression (very slow)
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompressionPresetNumber creates a Compression object with a preset mode/bs
func NewCompressionPresetNumber(preset int) (*Compression, error) {
switch preset {
case GzipStore:
return NewCompression(GzipStore, 131070) // GZIP-store (dummy) compression
case LZ4:
return NewCompression(LZ4, 262140) // LZ4 compression (very fast)
case Snappy:
return NewCompression(Snappy, 262140) // Snappy compression (like LZ4, but slower and worse)
case GzipMin:
return NewCompression(GzipMin, 131070) // GZIP-min compression (fast)
case GzipDefault:
return NewCompression(GzipDefault, 131070) // GZIP-default compression (medium)
case XZMin:
return NewCompression(XZMin, 524288) // XZ-min compression (slow)
case XZDefault:
return NewCompression(XZDefault, 1048576) // XZ-default compression (very slow)
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompression creates a Compression object with some default configuration values
func NewCompression(mode int, bs uint32) (*Compression, error) {
return NewCompressionAdvanced(mode, bs, 1048576, 12, 0.9)
}
// NewCompressionAdvanced creates a Compression object
func NewCompressionAdvanced(mode int, bs uint32, hb int64, threads int, mcr float64) (c *Compression, err error) {
// Set vars
c = new(Compression)
c.CompressionMode = mode
c.BlockSize = bs
c.HeuristicBytes = hb
c.NumThreads = threads
c.MaxCompressionRatio = mcr
// Get binary path if needed
err = getBinPaths(c, mode)
return c, err
}
/*** UTILITY FUNCTIONS ***/
// Gets an overestimate for the maximum compressed block size
func (c *Compression) maxCompressedBlockSize() uint32 {
return c.BlockSize + (c.BlockSize >> 2) + 256
}
// GetFileExtension gets a file extension for current compression mode
func (c *Compression) GetFileExtension() string {
switch c.CompressionMode {
case GzipStore, GzipMin, GzipDefault, GzipMax:
return ".gz"
case XZMin, XZDefault:
return ".xzgz"
case LZ4:
return ".lz4"
case Snappy:
return ".snap"
}
panic("Compression mode doesn't exist")
}
// GetFileCompressionInfo gets a file extension along with compressibility of file
// It is currently not being used but may be usable in the future.
func (c *Compression) GetFileCompressionInfo(reader io.Reader) (compressable bool, extension string, err error) {
// Use our compression algorithm to do a heuristic on the first few bytes
var emulatedBlock, emulatedBlockCompressed bytes.Buffer
_, err = io.CopyN(&emulatedBlock, reader, c.HeuristicBytes)
if err != nil && err != io.EOF {
return false, "", err
}
compressedSize, uncompressedSize, err := c.compressBlock(emulatedBlock.Bytes(), &emulatedBlockCompressed)
if err != nil {
return false, "", err
}
compressionRatio := float64(compressedSize) / float64(uncompressedSize)
// If the data is not compressible, return so
if compressionRatio > c.MaxCompressionRatio {
return false, ".bin", nil
}
// If the file is compressible, select file extension based on compression mode
return true, c.GetFileExtension(), nil
}
// Gets the file header we add to files of the currently used algorithm. Currently only used for lz4.
func (c *Compression) getHeader() []byte {
switch c.CompressionMode {
case GzipStore, GzipMin, GzipDefault, GzipMax:
return GzipHeader
case XZMin, XZDefault:
return ExecHeader
case LZ4:
return LZ4Header
case Snappy:
return SnappyHeader
}
panic("Compression mode doesn't exist")
}
// Gets the file footer we add to files of the currently used algorithm. Currently only used for lz4.
func (c *Compression) getFooter() []byte {
switch c.CompressionMode {
case GzipStore, GzipMin, GzipDefault, GzipMax:
return []byte{}
case XZMin, XZDefault:
return []byte{}
case LZ4:
return LZ4Footer
case Snappy:
return []byte{}
}
panic("Compression mode doesn't exist")
}
/*** BLOCK COMPRESSION FUNCTIONS ***/
// Wrapper function to compress a block
func (c *Compression) compressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
switch c.CompressionMode { // Select compression function (and arguments) based on compression mode
case GzipStore:
return c.compressBlockGz(in, out, 0)
case GzipMin:
return c.compressBlockGz(in, out, 1)
case GzipDefault:
return c.compressBlockGz(in, out, 6)
case GzipMax:
return c.compressBlockGz(in, out, 9)
case XZDefault:
return c.compressBlockExec(in, out, c.BinPath, []string{"-c"})
case XZMin:
return c.compressBlockExec(in, out, c.BinPath, []string{"-c1"})
case LZ4:
return c.compressBlockLz4(in, out)
case Snappy:
return c.compressBlockSnappy(in, out)
}
panic("Compression mode doesn't exist")
}
/*** MAIN COMPRESSION INTERFACE ***/
// compressionResult represents the result of compression for a single block (gotten by a single thread)
type compressionResult struct {
buffer *bytes.Buffer
n int64
err error
}
// CompressFileReturningBlockData compresses a file returning the block data for that file.
func (c *Compression) CompressFileReturningBlockData(in io.Reader, out io.Writer) (blockData []uint32, err error) {
// Initialize buffered writer
bufw := bufio.NewWriterSize(out, int(c.maxCompressedBlockSize()*uint32(c.NumThreads)))
// Get blockData, copy over header, add length of header to blockData
blockData = make([]uint32, 0)
header := c.getHeader()
_, err = bufw.Write(header)
if err != nil {
return nil, err
}
blockData = append(blockData, uint32(len(header)))
// Compress blocks
for {
// Loop through threads, spawning a go procedure for each thread. If we get eof on one thread, set eofAt to that thread and break
compressionResults := make([]chan compressionResult, c.NumThreads)
eofAt := -1
for i := 0; i < c.NumThreads; i++ {
// Create thread channel and allocate buffer to pass to thread
compressionResults[i] = make(chan compressionResult)
var inputBuffer bytes.Buffer
_, err = io.CopyN(&inputBuffer, in, int64(c.BlockSize))
if err == io.EOF {
eofAt = i
} else if err != nil {
return nil, err
}
// Run thread
go func(i int, in []byte) {
// Initialize thread writer and result struct
var res compressionResult
var buffer bytes.Buffer
// Compress block
_, n, err := c.compressBlock(in, &buffer)
if err != nil && err != io.EOF { // This errored out.
res.buffer = nil
res.n = 0
res.err = err
compressionResults[i] <- res
return
}
// Pass our data back to the main thread as a compression result
res.buffer = &buffer
res.n = n
res.err = err
compressionResults[i] <- res
return
}(i, inputBuffer.Bytes())
// If we have reached eof, we don't need more threads
if eofAt != -1 {
break
}
}
// Process writers in order
for i := 0; i < c.NumThreads; i++ {
if compressionResults[i] != nil {
// Get current compression result, get buffer, and copy buffer over to output
res := <-compressionResults[i]
close(compressionResults[i])
if res.buffer == nil {
return nil, res.err
}
blockSize := uint32(res.buffer.Len())
_, err = io.Copy(bufw, res.buffer)
if err != nil {
return nil, err
}
if DEBUG {
log.Printf("%d %d\n", res.n, blockSize)
}
// Append block size to block data
blockData = append(blockData, blockSize)
// If this is the last block, add the raw size of the last block to the end of blockData and break
if eofAt == i {
if DEBUG {
log.Printf("%d %d %d\n", res.n, byte(res.n%256), byte(res.n/256))
}
blockData = append(blockData, uint32(res.n))
break
}
}
}
// Get number of bytes written in this block (they should all be in the bufio buffer), then close gzip and flush buffer
err = bufw.Flush()
if err != nil {
return nil, err
}
// If eof happened, break
if eofAt != -1 {
if DEBUG {
log.Printf("%d", eofAt)
log.Printf("%v", blockData)
}
break
}
}
// Write footer and flush
_, err = bufw.Write(c.getFooter())
if err != nil {
return nil, err
}
err = bufw.Flush()
// Return
return blockData, err
}
/*** BLOCK DECOMPRESSION FUNCTIONS ***/
// Wrapper function to decompress a block
func (d *Decompressor) decompressBlock(in io.Reader, out io.Writer) (n int, err error) {
switch d.c.CompressionMode { // Select decompression function based off compression mode
case GzipStore, GzipMin, GzipDefault, GzipMax:
return decompressBlockRangeGz(in, out)
case XZMin:
return decompressBlockRangeExec(in, out, d.c.BinPath, []string{"-dc1"})
case XZDefault:
return decompressBlockRangeExec(in, out, d.c.BinPath, []string{"-dc"})
case LZ4:
return decompressBlockLz4(in, out, int64(d.c.BlockSize))
case Snappy:
return decompressBlockSnappy(in, out)
}
panic("Compression mode doesn't exist") // If none of the above returned
}
// Wrapper function for decompressBlock that implements multithreading
// decompressionResult represents the result of decompressing a block
type decompressionResult struct {
err error
buffer *bytes.Buffer
}
func (d *Decompressor) decompressBlockRangeMultithreaded(in io.Reader, out io.Writer, startingBlock uint32) (n int, err error) {
// First, use bufio.Reader to reduce the number of reads and bufio.Writer to reduce the number of writes
bufin := bufio.NewReader(in)
bufout := bufio.NewWriter(out)
// Decompress each block individually.
currBatch := startingBlock // Block # of start of current batch of blocks
totalBytesCopied := 0
for {
// Loop through threads
eofAt := -1
decompressionResults := make([]chan decompressionResult, d.c.NumThreads)
for i := 0; i < d.c.NumThreads; i++ {
// Get currBlock
currBlock := currBatch + uint32(i)
// Create channel
decompressionResults[i] = make(chan decompressionResult)
// Check if we've reached EOF
if currBlock >= d.numBlocks {
eofAt = i
break
}
// Get block to decompress
var compressedBlock bytes.Buffer
var err error
n, err := io.CopyN(&compressedBlock, bufin, d.blockStarts[currBlock+1]-d.blockStarts[currBlock])
if err != nil || n == 0 { // End of stream
eofAt = i
break
}
// Spawn thread to decompress block
if DEBUG {
log.Printf("Spawning %d", i)
}
go func(i int, currBlock uint32, in io.Reader) {
var block bytes.Buffer
var res decompressionResult
// Decompress block
_, res.err = d.decompressBlock(in, &block)
res.buffer = &block
decompressionResults[i] <- res
}(i, currBlock, &compressedBlock)
}
if DEBUG {
log.Printf("Eof at %d", eofAt)
}
// Process results
for i := 0; i < d.c.NumThreads; i++ {
// If we got EOF, return
if eofAt == i {
return totalBytesCopied, bufout.Flush() // Flushing bufout is needed to prevent us from getting all nulls
}
// Get result and close
res := <-decompressionResults[i]
close(decompressionResults[i])
if res.err != nil {
return totalBytesCopied, res.err
}
// Copy to output and add to total bytes copied
n, err := io.Copy(bufout, res.buffer)
totalBytesCopied += int(n)
if err != nil {
return totalBytesCopied, err
}
}
// Add NumThreads to currBatch
currBatch += uint32(d.c.NumThreads)
}
}
/*** MAIN DECOMPRESSION INTERFACE ***/
// Decompressor is the ReadSeeker implementation for decompression
type Decompressor struct {
cursorPos *int64 // The current location we have seeked to
blockStarts []int64 // The start of each block. These will be recovered from the block sizes
numBlocks uint32 // Number of blocks
decompressedSize int64 // Decompressed size of the file.
in io.ReadSeeker // Input
c *Compression // Compression options
}
// Parses block data. Returns the number of blocks, the block start locations for each block, and the decompressed size of the entire file.
func parseBlockData(blockData []uint32, BlockSize uint32) (numBlocks uint32, blockStarts []int64, decompressedSize int64) {
// Parse the block data
blockDataLen := len(blockData)
numBlocks = uint32(blockDataLen - 1)
if DEBUG {
log.Printf("%v\n", blockData)
log.Printf("metadata len, numblocks = %d, %d", blockDataLen, numBlocks)
}
blockStarts = make([]int64, numBlocks+1) // Starts with start of first block (and end of header), ends with end of last block
currentBlockPosition := int64(0)
for i := uint32(0); i < numBlocks; i++ { // Loop through block data, getting starts of blocks.
currentBlockSize := blockData[i]
currentBlockPosition += int64(currentBlockSize)
blockStarts[i] = currentBlockPosition
}
blockStarts[numBlocks] = currentBlockPosition // End of last block
//log.Printf("Block Starts: %v\n", d.blockStarts)
numBlocks-- // Subtract 1 from number of blocks because our header technically isn't a block
// Get uncompressed size of last block and derive uncompressed size of file
lastBlockRawSize := blockData[blockDataLen-1]
decompressedSize = int64(numBlocks-1)*int64(BlockSize) + int64(lastBlockRawSize)
if DEBUG {
log.Printf("Decompressed size = %d", decompressedSize)
}
return numBlocks, blockStarts, decompressedSize
}
// Initializes decompressor with the block data specified.
func (d *Decompressor) initWithBlockData(c *Compression, in io.ReadSeeker, size int64, blockData []uint32) (err error) {
// Copy over compression object
d.c = c
// Initialize cursor position
d.cursorPos = new(int64)
// Parse the block data
d.numBlocks, d.blockStarts, d.decompressedSize = parseBlockData(blockData, d.c.BlockSize)
// Initialize cursor position value and copy over reader
*d.cursorPos = 0
_, err = in.Seek(0, io.SeekStart)
d.in = in
return err
}
// Read reads data using a decompressor
func (d Decompressor) Read(p []byte) (int, error) {
if DEBUG {
log.Printf("Cursor pos before: %d\n", *d.cursorPos)
}
// Check if we're at the end of the file or before the beginning of the file
if *d.cursorPos >= d.decompressedSize || *d.cursorPos < 0 {
if DEBUG {
log.Println("Out of bounds EOF")
}
return 0, io.EOF
}
// Get block range to read
blockNumber := *d.cursorPos / int64(d.c.BlockSize)
blockStart := d.blockStarts[blockNumber] // Start position of blocks to read
dataOffset := *d.cursorPos % int64(d.c.BlockSize) // Offset of data to read in blocks to read
bytesToRead := len(p) // Number of bytes to read
blocksToRead := (int64(bytesToRead)+dataOffset)/int64(d.c.BlockSize) + 1 // Number of blocks to read
returnEOF := false
if blockNumber+blocksToRead > int64(d.numBlocks) { // Overflowed the last block
blocksToRead = int64(d.numBlocks) - blockNumber
returnEOF = true
}
var blockEnd int64 // End position of blocks to read
blockEnd = d.blockStarts[blockNumber+blocksToRead] // Start of the block after the last block we want to get is the end of the last block we want to get
blockLen := blockEnd - blockStart
// Read compressed block range into buffer
var compressedBlocks bytes.Buffer
_, err := d.in.Seek(blockStart, io.SeekStart)
if err != nil {
return 0, err
}
n1, err := io.CopyN(&compressedBlocks, d.in, blockLen)
if DEBUG {
log.Printf("block # = %d @ %d <- %d, len %d, copied %d bytes", blockNumber, blockStart, *d.cursorPos, blockLen, n1)
}
if err != nil {
if DEBUG {
log.Println("Copy Error")
}
return 0, err
}
// Decompress block range
var b bytes.Buffer
n, err := d.decompressBlockRangeMultithreaded(&compressedBlocks, &b, uint32(blockNumber))
if err != nil {
log.Println("Decompression error")
return n, err
}
// Calculate bytes read
readOverflow := *d.cursorPos + int64(bytesToRead) - d.decompressedSize
if readOverflow < 0 {
readOverflow = 0
}
bytesRead := int64(bytesToRead) - readOverflow
if DEBUG {
log.Printf("Read offset = %d, overflow = %d", dataOffset, readOverflow)
log.Printf("Decompressed %d bytes; read %d out of %d bytes\n", n, bytesRead, bytesToRead)
// log.Printf("%v", b.Bytes())
}
// If we read 0 bytes, we reached the end of the file
if bytesRead == 0 {
log.Println("EOF")
return 0, io.EOF
}
// Copy from buffer+offset to p
_, err = io.CopyN(ioutil.Discard, &b, dataOffset)
if err != nil {
return 0, err
}
n, err = b.Read(p) // Note: everything after bytesToRead bytes will be discarded; we are returning bytesToRead instead of n
if err != nil {
return n, err
}
// Increment cursor position and return
*d.cursorPos += bytesRead
if returnEOF {
if DEBUG {
log.Println("EOF")
}
return int(bytesRead), io.EOF
}
return int(bytesRead), nil
}
// Seek seeks to a location in compressed stream
func (d Decompressor) Seek(offset int64, whence int) (int64, error) {
// Seek to offset in cursorPos
if whence == io.SeekStart {
*d.cursorPos = offset
} else if whence == io.SeekCurrent {
*d.cursorPos += offset
} else if whence == io.SeekEnd {
*d.cursorPos = d.decompressedSize + offset
}
// Return
return offset, nil
}
// DecompressFileExtData decompresses a file using external block data. Argument "size" is very useful here.
func (c *Compression) DecompressFileExtData(in io.ReadSeeker, size int64, blockData []uint32) (FileHandle io.ReadSeeker, decompressedSize int64, err error) {
var decompressor Decompressor
err = decompressor.initWithBlockData(c, in, size, blockData)
return decompressor, decompressor.decompressedSize, err
}

View File

@ -0,0 +1,136 @@
package press
import (
"bufio"
"bytes"
"crypto/md5"
"encoding/base64"
"io"
"io/ioutil"
"math/rand"
"os"
"strings"
"testing"
)
const TestStringSmall = "The quick brown fox jumps over the lazy dog."
const TestSizeLarge = 2097152 // 2 megabytes
// Tests compression and decompression for a preset
func testCompressDecompress(t *testing.T, preset string, testString string) {
// Create compression instance
comp, err := NewCompressionPreset(preset)
if err != nil {
t.Fatal(err)
}
// Open files and hashers
testFile := strings.NewReader(testString)
testFileHasher := md5.New()
if err != nil {
t.Fatal(err)
}
compressedFile, err := ioutil.TempFile(os.TempDir(), "rclone_compression_test")
if err != nil {
t.Fatal(err)
}
outHasher := md5.New()
// Compress file and hash it (size doesn't matter here)
testFileReader, testFileWriter := io.Pipe()
go func() {
_, err := io.Copy(io.MultiWriter(testFileHasher, testFileWriter), testFile)
if err != nil {
t.Fatal("Failed to write compressed file")
}
err = testFileWriter.Close()
if err != nil {
t.Log("Failed to close compressed file")
}
}()
var blockData []uint32
blockData, err = comp.CompressFileReturningBlockData(testFileReader, compressedFile)
if err != nil {
t.Fatalf("Compression failed with error: %v", err)
}
testFileHash := testFileHasher.Sum(nil)
// Get the size, seek to the beginning of the compressed file
size, err := compressedFile.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
_, err = compressedFile.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
t.Logf("Compressed size: %d\n", size)
// Decompress file into a hasher
var FileHandle io.ReadSeeker
var decompressedSize int64
FileHandle, decompressedSize, err = comp.DecompressFileExtData(compressedFile, size, blockData)
if err != nil {
t.Fatal(err)
}
t.Logf("Decompressed size: %d\n", decompressedSize)
bufr := bufio.NewReaderSize(FileHandle, 12345678)
_, err = io.Copy(outHasher, bufr)
if err != nil && err != io.EOF {
t.Fatal(err)
}
decompressedFileHash := outHasher.Sum(nil)
// Clean up
err = compressedFile.Close()
if err != nil {
t.Log("Warning: cannot close compressed test file")
}
err = os.Remove(compressedFile.Name())
if err != nil {
t.Log("Warning: cannot remove compressed test file")
}
// Compare hashes
if !bytes.Equal(testFileHash, decompressedFileHash) {
t.Logf("Hash of original file: %x\n", testFileHash)
t.Logf("Hash of recovered file: %x\n", decompressedFileHash)
t.Fatal("Hashes do not match!")
}
}
// Tests both small and large strings for a preset
func testSmallLarge(t *testing.T, preset string) {
testStringLarge := getCompressibleString(TestSizeLarge)
t.Run("TestSmall", func(t *testing.T) {
testCompressDecompress(t, preset, TestStringSmall)
})
t.Run("TestLarge", func(t *testing.T) {
testCompressDecompress(t, preset, testStringLarge)
})
}
// Gets a compressible string
func getCompressibleString(size int) string {
// Get pseudorandom bytes
prbytes := make([]byte, size*3/4+16)
prsource := rand.New(rand.NewSource(0))
prsource.Read(prbytes)
// Encode in base64
encoding := base64.NewEncoding("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/")
return encoding.EncodeToString(prbytes)[:size]
}
func TestCompression(t *testing.T) {
testCases := []string{"lz4", "snappy", "gzip-min"}
if checkXZ() {
testCases = append(testCases, "xz-min")
} else {
t.Log("XZ binary not found on current system. Not testing xz.")
}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
testSmallLarge(t, tc)
})
}
}

1352
backend/press/press.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,88 @@
// Test Crypt filesystem interface
package press
import (
"os"
"path/filepath"
"testing"
_ "github.com/ncw/rclone/backend/local"
"github.com/ncw/rclone/fstest"
"github.com/ncw/rclone/fstest/fstests"
)
// TestIntegration runs integration tests against the remote
func TestIntegration(t *testing.T) {
if *fstest.RemoteName == "" {
t.Skip("Skipping as -remote not set")
}
fstests.Run(t, &fstests.Opt{
RemoteName: *fstest.RemoteName,
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{"OpenWriterAt"},
UnimplementableObjectMethods: []string{},
})
}
// TestRemoteLz4 tests LZ4 compression
func TestRemoteLz4(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-lz4")
name := "TestPressLz4"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{"OpenWriterAt"},
UnimplementableObjectMethods: []string{},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "lz4"},
},
})
}
// TestRemoteGzip tests GZIP compression
func TestRemoteGzip(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-gzip")
name := "TestPressGzip"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{"OpenWriterAt"},
UnimplementableObjectMethods: []string{},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "gzip-min"},
},
})
}
// TestRemoteXZ tests XZ compression
func TestRemoteXZ(t *testing.T) {
if !checkXZ() {
t.Skip("XZ binary not found on current system")
}
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-xz")
name := "TestPressXZ"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{"OpenWriterAt"},
UnimplementableObjectMethods: []string{},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "xz-min"},
},
})
}