press: Added experimental compression remote - implements #2098, #1356, #675

2025-06-24 14:01:31 +02:00 · 2019-06-14 14:00:46 -07:00 · 2019-06-14 14:00:46 -07:00 · e41a88fb23
commit e41a88fb23
parent 366e0e18cd
9 changed files with 2480 additions and 0 deletions
--- a/backend/press/.gitignore
+++ b/backend/press/.gitignore
@ -0,0 +1 @@
+test
--- a/backend/press/alg_exec.go
+++ b/backend/press/alg_exec.go
@ -0,0 +1,98 @@
+package press
+
+// This file implements shell exec algorithms that require binaries.
+import (
+	"bytes"
+	"io"
+	"os/exec"
+)
+
+// XZ command
+const xzcommand = "xz" // Name of xz binary (if available)
+
+// ExecHeader - Header we add to an exec file. We don't need this.
+var ExecHeader = []byte{}
+
+// Function that checks whether XZ is present in the system
+func checkXZ() bool {
+	_, err := exec.LookPath("xz")
+	if err != nil {
+		return false
+	}
+	return true
+}
+
+// Function that gets binary paths if needed
+func getBinPaths(c *Compression, mode int) (err error) {
+	err = nil
+	if mode == XZMin || mode == XZDefault {
+		c.BinPath, err = exec.LookPath(xzcommand)
+	}
+	return err
+}
+
+// Function that compresses a block using a shell command without wrapping in gzip. Requires an binary corresponding with the command.
+func (c *Compression) compressBlockExec(in []byte, out io.Writer, binaryPath string, args []string) (compressedSize uint32, uncompressedSize int64, err error) {
+	// Initialize compression subprocess
+	subprocess := exec.Command(binaryPath, args...)
+	stdin, err := subprocess.StdinPipe()
+	if err != nil {
+		return 0, 0, err
+	}
+
+	// Run subprocess that creates compressed file
+	stdinError := make(chan error)
+	go func() {
+		_, err := stdin.Write(in)
+		_ = stdin.Close()
+		stdinError <- err
+	}()
+
+	// Get output
+	output, err := subprocess.Output()
+	if err != nil {
+		return 0, 0, err
+	}
+
+	// Copy over
+	n, err := io.Copy(out, bytes.NewReader(output))
+	if err != nil {
+		return uint32(n), int64(len(in)), err
+	}
+
+	// Check if there was an error and return
+	err = <-stdinError
+
+	return uint32(n), int64(len(in)), err
+}
+
+// Utility function to decompress a block range using a shell command which wasn't wrapped in gzip
+func decompressBlockRangeExec(in io.Reader, out io.Writer, binaryPath string, args []string) (n int, err error) {
+	// Decompress actual compression
+	// Initialize decompression subprocess
+	subprocess := exec.Command(binaryPath, args...)
+	stdin, err := subprocess.StdinPipe()
+	if err != nil {
+		return 0, err
+	}
+
+	// Run subprocess that copies over compressed block
+	stdinError := make(chan error)
+	go func() {
+		_, err := io.Copy(stdin, in)
+		_ = stdin.Close()
+		stdinError <- err
+	}()
+
+	// Get output, copy, and return
+	output, err := subprocess.Output()
+	if err != nil {
+		return 0, err
+	}
+	n64, err := io.Copy(out, bytes.NewReader(output))
+	if err != nil {
+		return int(n64), err
+	}
+	err = <-stdinError
+	return int(n64), err
+}
--- a/backend/press/alg_gzip.go
+++ b/backend/press/alg_gzip.go
@ -0,0 +1,49 @@
+package press
+
+// This file implements the gzip algorithm.
+import (
+	"bufio"
+	"compress/gzip"
+	"io"
+)
+
+// GzipHeader - Header we add to a gzip file. We're contatenating GZIP files here, so we don't need this.
+var GzipHeader = []byte{}
+
+// Function that compresses a block using gzip
+func (c *Compression) compressBlockGz(in []byte, out io.Writer, compressionLevel int) (compressedSize uint32, uncompressedSize int64, err error) {
+	// Initialize buffer
+	bufw := bufio.NewWriterSize(out, int(c.maxCompressedBlockSize()))
+
+	// Initialize block writer
+	outw, err := gzip.NewWriterLevel(bufw, compressionLevel)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	// Compress block
+	_, err = outw.Write(in)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	// Finalize gzip file, flush buffer and return
+	err = outw.Close()
+	if err != nil {
+		return 0, 0, err
+	}
+	blockSize := uint32(bufw.Buffered())
+	err = bufw.Flush()
+
+	return blockSize, int64(len(in)), err
+}
+
+// Utility function to decompress a block range using gzip
+func decompressBlockRangeGz(in io.Reader, out io.Writer) (n int, err error) {
+	gzipReader, err := gzip.NewReader(in)
+	if err != nil {
+		return 0, err
+	}
+	written, err := io.Copy(out, gzipReader)
+	return int(written), err
+}
--- a/backend/press/alg_lz4.go
+++ b/backend/press/alg_lz4.go
@ -0,0 +1,95 @@
+package press
+
+// This file implements the LZ4 algorithm.
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"io"
+
+	"github.com/OneOfOne/xxhash"
+	lz4 "github.com/id01/go-lz4"
+)
+
+/*
+Structure of LZ4 header:
+Flags:
+	Version = 01
+	Independent = 1
+	Block Checksum = 1
+	Content Size = 0
+	Content Checksum = 0
+	Reserved = 0
+	Dictionary ID = 0
+
+BD byte:
+	Reserved = 0
+	Block Max Size = 101 (or 5; 256kb)
+	Reserved = 0000
+
+Header checksum byte (xxhash(flags and bd byte) >> 1) & 0xff
+*/
+
+// LZ4Header - Header of our LZ4 file
+var LZ4Header = []byte{0x04, 0x22, 0x4d, 0x18, 0x70, 0x50, 0x84}
+
+// LZ4Footer - Footer of our LZ4 file
+var LZ4Footer = []byte{0x00, 0x00, 0x00, 0x00} // This is just an empty block
+
+// Function that compresses a block using lz4
+func (c *Compression) compressBlockLz4(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
+	// Write lz4 compressed data
+	compressedBytes, err := lz4.Encode(nil, in)
+	if err != nil {
+		return 0, 0, err
+	}
+	// Write compressed bytes
+	n1, err := out.Write(compressedBytes)
+	if err != nil {
+		return 0, 0, err
+	}
+	// Get checksum
+	h := xxhash.New32()
+	_, err = h.Write(compressedBytes[4:]) // The checksum doesn't include the size
+	if err != nil {
+		return 0, 0, err
+	}
+	checksum := make([]byte, 4)
+	binary.LittleEndian.PutUint32(checksum, h.Sum32())
+	n2, err := out.Write(checksum)
+	if err != nil {
+		return 0, 0, err
+	}
+	// Return sizes
+	return uint32(n1 + n2), int64(len(in)), err
+}
+
+// Utility function to decompress a block using LZ4
+func decompressBlockLz4(in io.Reader, out io.Writer, BlockSize int64) (n int, err error) {
+	// Get our compressed data
+	var b bytes.Buffer
+	_, err = io.Copy(&b, in)
+	if err != nil {
+		return 0, err
+	}
+	// Add the length in byte form to the begining of the buffer. Because the length is not equal to BlockSize for the last block, the last block might screw this code up.
+	compressedBytesWithHash := b.Bytes()
+	compressedBytes := compressedBytesWithHash[:len(compressedBytesWithHash)-4]
+	hash := compressedBytesWithHash[len(compressedBytesWithHash)-4:]
+	// Verify, decode, write, and return
+	h := xxhash.New32()
+	_, err = h.Write(compressedBytes[4:])
+	if err != nil {
+		return 0, err
+	}
+	if binary.LittleEndian.Uint32(hash) != h.Sum32() {
+		return 0, errors.New("XXHash checksum invalid")
+	}
+	dst := make([]byte, BlockSize*2)
+	decompressed, err := lz4.Decode(dst, compressedBytes)
+	if err != nil {
+		return 0, err
+	}
+	_, err = out.Write(decompressed)
+	return len(decompressed), err
+}
--- a/backend/press/alg_snappy.go
+++ b/backend/press/alg_snappy.go
@ -0,0 +1,35 @@
+package press
+
+// This file implements compression/decompression using snappy.
+import (
+	"bytes"
+	"io"
+
+	"github.com/golang/snappy"
+)
+
+// SnappyHeader - Header we add to a snappy file. We don't need this.
+var SnappyHeader = []byte{}
+
+// Function that compresses a block using snappy
+func (c *Compression) compressBlockSnappy(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
+	// Compress and return
+	outBytes := snappy.Encode(nil, in)
+	_, err = out.Write(outBytes)
+	return uint32(len(outBytes)), int64(len(in)), err
+}
+
+// Utility function to decompress a block using snappy
+func decompressBlockSnappy(in io.Reader, out io.Writer) (n int, err error) {
+	var b bytes.Buffer
+	_, err = io.Copy(&b, in)
+	if err != nil {
+		return 0, err
+	}
+	decompressed, err := snappy.Decode(nil, b.Bytes())
+	if err != nil {
+		return 0, err
+	}
+	_, err = out.Write(decompressed)
+	return len(decompressed), err
+}
--- a/backend/press/compression.go
+++ b/backend/press/compression.go
@ -0,0 +1,626 @@
+// Package press provides wrappers for Fs and Object which implement compression.
+// This file is the backend implementation for seekable compression.
+package press
+
+/*
+NOTES:
+Structure of the metadata we store is:
+gzipExtraify(gzip([4-byte header size][4-byte block size] ... [4-byte block size][4-byte raw size of last block]))
+This is appended to any compressed file, and is ignored as trailing garbage in our LZ4 and SNAPPY implementations, and seen as empty archives in our GZIP and XZ_IN_GZ implementations.
+
+There are two possible compression/decompression function pairs to be used:
+The two functions that store data internally are:
+- Compression.CompressFileAppendingBlockData. Appends block data in extra data fields of empty gzip files at the end.
+- DecompressFile. Reads block data from extra fields of these empty gzip files.
+The two functions that require externally stored data are:
+- Compression.CompressFileReturningBlockData. Returns a []uint32 containing raw (uncompressed and unencoded) block data, which must be externally stored.
+- DecompressFileExtData. Takes in the []uint32 that was returned by Compression.CompressFileReturningBlockData
+WARNING: These function pairs are incompatible with each other. Don't use CompressFileAppendingBlockData with DecompressFileExtData, or the other way around. It won't work.
+*/
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"io"
+	"io/ioutil"
+	"log"
+)
+
+// Compression modes
+const (
+	Uncompressed = -1
+	GzipStore    = 0
+	GzipMin      = 1
+	GzipDefault  = 2
+	GzipMax      = 3
+	LZ4          = 4
+	Snappy       = 5
+	XZMin        = 6
+	XZDefault    = 7
+)
+
+// Errors
+var (
+	ErrMetadataCorrupted = errors.New("metadata may have been corrupted")
+)
+
+// DEBUG - flag for debug mode
+const DEBUG = false
+
+// Compression is a struct containing configurable variables (what used to be constants)
+type Compression struct {
+	CompressionMode int    // Compression mode
+	BlockSize       uint32 // Size of blocks. Higher block size means better compression but more download bandwidth needed for small downloads
+	// ~1MB is recommended for xz, while ~128KB is recommended for gzip and lz4
+	HeuristicBytes      int64   // Bytes to perform gzip heuristic on to determine whether a file should be compressed
+	NumThreads          int     // Number of threads to use for compression
+	MaxCompressionRatio float64 // Maximum compression ratio for a file to be considered compressible
+	BinPath             string  // Path to compression binary. This is used for all non-gzip compression.
+}
+
+// NewCompressionPreset creates a Compression object with a preset mode/bs
+func NewCompressionPreset(preset string) (*Compression, error) {
+	switch preset {
+	case "gzip-store":
+		return NewCompression(GzipStore, 131070) // GZIP-store (dummy) compression
+	case "lz4":
+		return NewCompression(LZ4, 262140) // LZ4 compression (very fast)
+	case "snappy":
+		return NewCompression(Snappy, 262140) // Snappy compression (like LZ4, but slower and worse)
+	case "gzip-min":
+		return NewCompression(GzipMin, 131070) // GZIP-min compression (fast)
+	case "gzip-default":
+		return NewCompression(GzipDefault, 131070) // GZIP-default compression (medium)
+	case "xz-min":
+		return NewCompression(XZMin, 524288) // XZ-min compression (slow)
+	case "xz-default":
+		return NewCompression(XZDefault, 1048576) // XZ-default compression (very slow)
+	}
+	return nil, errors.New("Compression mode doesn't exist")
+}
+
+// NewCompressionPresetNumber creates a Compression object with a preset mode/bs
+func NewCompressionPresetNumber(preset int) (*Compression, error) {
+	switch preset {
+	case GzipStore:
+		return NewCompression(GzipStore, 131070) // GZIP-store (dummy) compression
+	case LZ4:
+		return NewCompression(LZ4, 262140) // LZ4 compression (very fast)
+	case Snappy:
+		return NewCompression(Snappy, 262140) // Snappy compression (like LZ4, but slower and worse)
+	case GzipMin:
+		return NewCompression(GzipMin, 131070) // GZIP-min compression (fast)
+	case GzipDefault:
+		return NewCompression(GzipDefault, 131070) // GZIP-default compression (medium)
+	case XZMin:
+		return NewCompression(XZMin, 524288) // XZ-min compression (slow)
+	case XZDefault:
+		return NewCompression(XZDefault, 1048576) // XZ-default compression (very slow)
+	}
+	return nil, errors.New("Compression mode doesn't exist")
+}
+
+// NewCompression creates a Compression object with some default configuration values
+func NewCompression(mode int, bs uint32) (*Compression, error) {
+	return NewCompressionAdvanced(mode, bs, 1048576, 12, 0.9)
+}
+
+// NewCompressionAdvanced creates a Compression object
+func NewCompressionAdvanced(mode int, bs uint32, hb int64, threads int, mcr float64) (c *Compression, err error) {
+	// Set vars
+	c = new(Compression)
+	c.CompressionMode = mode
+	c.BlockSize = bs
+	c.HeuristicBytes = hb
+	c.NumThreads = threads
+	c.MaxCompressionRatio = mcr
+	// Get binary path if needed
+	err = getBinPaths(c, mode)
+	return c, err
+}
+
+/*** UTILITY FUNCTIONS ***/
+// Gets an overestimate for the maximum compressed block size
+func (c *Compression) maxCompressedBlockSize() uint32 {
+	return c.BlockSize + (c.BlockSize >> 2) + 256
+}
+
+// GetFileExtension gets a file extension for current compression mode
+func (c *Compression) GetFileExtension() string {
+	switch c.CompressionMode {
+	case GzipStore, GzipMin, GzipDefault, GzipMax:
+		return ".gz"
+	case XZMin, XZDefault:
+		return ".xzgz"
+	case LZ4:
+		return ".lz4"
+	case Snappy:
+		return ".snap"
+	}
+	panic("Compression mode doesn't exist")
+}
+
+// GetFileCompressionInfo gets a file extension along with compressibility of file
+// It is currently not being used but may be usable in the future.
+func (c *Compression) GetFileCompressionInfo(reader io.Reader) (compressable bool, extension string, err error) {
+	// Use our compression algorithm to do a heuristic on the first few bytes
+	var emulatedBlock, emulatedBlockCompressed bytes.Buffer
+	_, err = io.CopyN(&emulatedBlock, reader, c.HeuristicBytes)
+	if err != nil && err != io.EOF {
+		return false, "", err
+	}
+	compressedSize, uncompressedSize, err := c.compressBlock(emulatedBlock.Bytes(), &emulatedBlockCompressed)
+	if err != nil {
+		return false, "", err
+	}
+	compressionRatio := float64(compressedSize) / float64(uncompressedSize)
+
+	// If the data is not compressible, return so
+	if compressionRatio > c.MaxCompressionRatio {
+		return false, ".bin", nil
+	}
+
+	// If the file is compressible, select file extension based on compression mode
+	return true, c.GetFileExtension(), nil
+}
+
+// Gets the file header we add to files of the currently used algorithm. Currently only used for lz4.
+func (c *Compression) getHeader() []byte {
+	switch c.CompressionMode {
+	case GzipStore, GzipMin, GzipDefault, GzipMax:
+		return GzipHeader
+	case XZMin, XZDefault:
+		return ExecHeader
+	case LZ4:
+		return LZ4Header
+	case Snappy:
+		return SnappyHeader
+	}
+	panic("Compression mode doesn't exist")
+}
+
+// Gets the file footer we add to files of the currently used algorithm. Currently only used for lz4.
+func (c *Compression) getFooter() []byte {
+	switch c.CompressionMode {
+	case GzipStore, GzipMin, GzipDefault, GzipMax:
+		return []byte{}
+	case XZMin, XZDefault:
+		return []byte{}
+	case LZ4:
+		return LZ4Footer
+	case Snappy:
+		return []byte{}
+	}
+	panic("Compression mode doesn't exist")
+}
+
+/*** BLOCK COMPRESSION FUNCTIONS ***/
+// Wrapper function to compress a block
+func (c *Compression) compressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize int64, err error) {
+	switch c.CompressionMode { // Select compression function (and arguments) based on compression mode
+	case GzipStore:
+		return c.compressBlockGz(in, out, 0)
+	case GzipMin:
+		return c.compressBlockGz(in, out, 1)
+	case GzipDefault:
+		return c.compressBlockGz(in, out, 6)
+	case GzipMax:
+		return c.compressBlockGz(in, out, 9)
+	case XZDefault:
+		return c.compressBlockExec(in, out, c.BinPath, []string{"-c"})
+	case XZMin:
+		return c.compressBlockExec(in, out, c.BinPath, []string{"-c1"})
+	case LZ4:
+		return c.compressBlockLz4(in, out)
+	case Snappy:
+		return c.compressBlockSnappy(in, out)
+	}
+	panic("Compression mode doesn't exist")
+}
+
+/*** MAIN COMPRESSION INTERFACE ***/
+// compressionResult represents the result of compression for a single block (gotten by a single thread)
+type compressionResult struct {
+	buffer *bytes.Buffer
+	n      int64
+	err    error
+}
+
+// CompressFileReturningBlockData compresses a file returning the block data for that file.
+func (c *Compression) CompressFileReturningBlockData(in io.Reader, out io.Writer) (blockData []uint32, err error) {
+	// Initialize buffered writer
+	bufw := bufio.NewWriterSize(out, int(c.maxCompressedBlockSize()*uint32(c.NumThreads)))
+
+	// Get blockData, copy over header, add length of header to blockData
+	blockData = make([]uint32, 0)
+	header := c.getHeader()
+	_, err = bufw.Write(header)
+	if err != nil {
+		return nil, err
+	}
+	blockData = append(blockData, uint32(len(header)))
+
+	// Compress blocks
+	for {
+		// Loop through threads, spawning a go procedure for each thread. If we get eof on one thread, set eofAt to that thread and break
+		compressionResults := make([]chan compressionResult, c.NumThreads)
+		eofAt := -1
+		for i := 0; i < c.NumThreads; i++ {
+			// Create thread channel and allocate buffer to pass to thread
+			compressionResults[i] = make(chan compressionResult)
+			var inputBuffer bytes.Buffer
+			_, err = io.CopyN(&inputBuffer, in, int64(c.BlockSize))
+			if err == io.EOF {
+				eofAt = i
+			} else if err != nil {
+				return nil, err
+			}
+			// Run thread
+			go func(i int, in []byte) {
+				// Initialize thread writer and result struct
+				var res compressionResult
+				var buffer bytes.Buffer
+
+				// Compress block
+				_, n, err := c.compressBlock(in, &buffer)
+				if err != nil && err != io.EOF { // This errored out.
+					res.buffer = nil
+					res.n = 0
+					res.err = err
+					compressionResults[i] <- res
+					return
+				}
+				// Pass our data back to the main thread as a compression result
+				res.buffer = &buffer
+				res.n = n
+				res.err = err
+				compressionResults[i] <- res
+				return
+			}(i, inputBuffer.Bytes())
+			// If we have reached eof, we don't need more threads
+			if eofAt != -1 {
+				break
+			}
+		}
+
+		// Process writers in order
+		for i := 0; i < c.NumThreads; i++ {
+			if compressionResults[i] != nil {
+				// Get current compression result, get buffer, and copy buffer over to output
+				res := <-compressionResults[i]
+				close(compressionResults[i])
+				if res.buffer == nil {
+					return nil, res.err
+				}
+				blockSize := uint32(res.buffer.Len())
+				_, err = io.Copy(bufw, res.buffer)
+				if err != nil {
+					return nil, err
+				}
+				if DEBUG {
+					log.Printf("%d %d\n", res.n, blockSize)
+				}
+
+				// Append block size to block data
+				blockData = append(blockData, blockSize)
+
+				// If this is the last block, add the raw size of the last block to the end of blockData and break
+				if eofAt == i {
+					if DEBUG {
+						log.Printf("%d %d %d\n", res.n, byte(res.n%256), byte(res.n/256))
+					}
+					blockData = append(blockData, uint32(res.n))
+					break
+				}
+			}
+		}
+
+		// Get number of bytes written in this block (they should all be in the bufio buffer), then close gzip and flush buffer
+		err = bufw.Flush()
+		if err != nil {
+			return nil, err
+		}
+
+		// If eof happened, break
+		if eofAt != -1 {
+			if DEBUG {
+				log.Printf("%d", eofAt)
+				log.Printf("%v", blockData)
+			}
+			break
+		}
+	}
+
+	// Write footer and flush
+	_, err = bufw.Write(c.getFooter())
+	if err != nil {
+		return nil, err
+	}
+	err = bufw.Flush()
+
+	// Return
+	return blockData, err
+}
+
+/*** BLOCK DECOMPRESSION FUNCTIONS ***/
+// Wrapper function to decompress a block
+func (d *Decompressor) decompressBlock(in io.Reader, out io.Writer) (n int, err error) {
+	switch d.c.CompressionMode { // Select decompression function based off compression mode
+	case GzipStore, GzipMin, GzipDefault, GzipMax:
+		return decompressBlockRangeGz(in, out)
+	case XZMin:
+		return decompressBlockRangeExec(in, out, d.c.BinPath, []string{"-dc1"})
+	case XZDefault:
+		return decompressBlockRangeExec(in, out, d.c.BinPath, []string{"-dc"})
+	case LZ4:
+		return decompressBlockLz4(in, out, int64(d.c.BlockSize))
+	case Snappy:
+		return decompressBlockSnappy(in, out)
+	}
+	panic("Compression mode doesn't exist") // If none of the above returned
+}
+
+// Wrapper function for decompressBlock that implements multithreading
+// decompressionResult represents the result of decompressing a block
+type decompressionResult struct {
+	err    error
+	buffer *bytes.Buffer
+}
+
+func (d *Decompressor) decompressBlockRangeMultithreaded(in io.Reader, out io.Writer, startingBlock uint32) (n int, err error) {
+	// First, use bufio.Reader to reduce the number of reads and bufio.Writer to reduce the number of writes
+	bufin := bufio.NewReader(in)
+	bufout := bufio.NewWriter(out)
+
+	// Decompress each block individually.
+	currBatch := startingBlock // Block # of start of current batch of blocks
+	totalBytesCopied := 0
+	for {
+		// Loop through threads
+		eofAt := -1
+		decompressionResults := make([]chan decompressionResult, d.c.NumThreads)
+
+		for i := 0; i < d.c.NumThreads; i++ {
+			// Get currBlock
+			currBlock := currBatch + uint32(i)
+
+			// Create channel
+			decompressionResults[i] = make(chan decompressionResult)
+
+			// Check if we've reached EOF
+			if currBlock >= d.numBlocks {
+				eofAt = i
+				break
+			}
+
+			// Get block to decompress
+			var compressedBlock bytes.Buffer
+			var err error
+			n, err := io.CopyN(&compressedBlock, bufin, d.blockStarts[currBlock+1]-d.blockStarts[currBlock])
+			if err != nil || n == 0 { // End of stream
+				eofAt = i
+				break
+			}
+
+			// Spawn thread to decompress block
+			if DEBUG {
+				log.Printf("Spawning %d", i)
+			}
+			go func(i int, currBlock uint32, in io.Reader) {
+				var block bytes.Buffer
+				var res decompressionResult
+
+				// Decompress block
+				_, res.err = d.decompressBlock(in, &block)
+				res.buffer = &block
+				decompressionResults[i] <- res
+			}(i, currBlock, &compressedBlock)
+		}
+		if DEBUG {
+			log.Printf("Eof at %d", eofAt)
+		}
+
+		// Process results
+		for i := 0; i < d.c.NumThreads; i++ {
+			// If we got EOF, return
+			if eofAt == i {
+				return totalBytesCopied, bufout.Flush() // Flushing bufout is needed to prevent us from getting all nulls
+			}
+
+			// Get result and close
+			res := <-decompressionResults[i]
+			close(decompressionResults[i])
+			if res.err != nil {
+				return totalBytesCopied, res.err
+			}
+
+			// Copy to output and add to total bytes copied
+			n, err := io.Copy(bufout, res.buffer)
+			totalBytesCopied += int(n)
+			if err != nil {
+				return totalBytesCopied, err
+			}
+		}
+
+		// Add NumThreads to currBatch
+		currBatch += uint32(d.c.NumThreads)
+	}
+}
+
+/*** MAIN DECOMPRESSION INTERFACE ***/
+
+// Decompressor is the ReadSeeker implementation for decompression
+type Decompressor struct {
+	cursorPos        *int64        // The current location we have seeked to
+	blockStarts      []int64       // The start of each block. These will be recovered from the block sizes
+	numBlocks        uint32        // Number of blocks
+	decompressedSize int64         // Decompressed size of the file.
+	in               io.ReadSeeker // Input
+	c                *Compression  // Compression options
+}
+
+// Parses block data. Returns the number of blocks, the block start locations for each block, and the decompressed size of the entire file.
+func parseBlockData(blockData []uint32, BlockSize uint32) (numBlocks uint32, blockStarts []int64, decompressedSize int64) {
+	// Parse the block data
+	blockDataLen := len(blockData)
+	numBlocks = uint32(blockDataLen - 1)
+	if DEBUG {
+		log.Printf("%v\n", blockData)
+		log.Printf("metadata len, numblocks = %d, %d", blockDataLen, numBlocks)
+	}
+	blockStarts = make([]int64, numBlocks+1) // Starts with start of first block (and end of header), ends with end of last block
+	currentBlockPosition := int64(0)
+	for i := uint32(0); i < numBlocks; i++ { // Loop through block data, getting starts of blocks.
+		currentBlockSize := blockData[i]
+		currentBlockPosition += int64(currentBlockSize)
+		blockStarts[i] = currentBlockPosition
+	}
+	blockStarts[numBlocks] = currentBlockPosition // End of last block
+
+	//log.Printf("Block Starts: %v\n", d.blockStarts)
+
+	numBlocks-- // Subtract 1 from number of blocks because our header technically isn't a block
+
+	// Get uncompressed size of last block and derive uncompressed size of file
+	lastBlockRawSize := blockData[blockDataLen-1]
+	decompressedSize = int64(numBlocks-1)*int64(BlockSize) + int64(lastBlockRawSize)
+	if DEBUG {
+		log.Printf("Decompressed size = %d", decompressedSize)
+	}
+
+	return numBlocks, blockStarts, decompressedSize
+}
+
+// Initializes decompressor with the block data specified.
+func (d *Decompressor) initWithBlockData(c *Compression, in io.ReadSeeker, size int64, blockData []uint32) (err error) {
+	// Copy over compression object
+	d.c = c
+
+	// Initialize cursor position
+	d.cursorPos = new(int64)
+
+	// Parse the block data
+	d.numBlocks, d.blockStarts, d.decompressedSize = parseBlockData(blockData, d.c.BlockSize)
+
+	// Initialize cursor position value and copy over reader
+	*d.cursorPos = 0
+	_, err = in.Seek(0, io.SeekStart)
+	d.in = in
+
+	return err
+}
+
+// Read reads data using a decompressor
+func (d Decompressor) Read(p []byte) (int, error) {
+	if DEBUG {
+		log.Printf("Cursor pos before: %d\n", *d.cursorPos)
+	}
+	// Check if we're at the end of the file or before the beginning of the file
+	if *d.cursorPos >= d.decompressedSize || *d.cursorPos < 0 {
+		if DEBUG {
+			log.Println("Out of bounds EOF")
+		}
+		return 0, io.EOF
+	}
+
+	// Get block range to read
+	blockNumber := *d.cursorPos / int64(d.c.BlockSize)
+	blockStart := d.blockStarts[blockNumber]                                 // Start position of blocks to read
+	dataOffset := *d.cursorPos % int64(d.c.BlockSize)                        // Offset of data to read in blocks to read
+	bytesToRead := len(p)                                                    // Number of bytes to read
+	blocksToRead := (int64(bytesToRead)+dataOffset)/int64(d.c.BlockSize) + 1 // Number of blocks to read
+	returnEOF := false
+	if blockNumber+blocksToRead > int64(d.numBlocks) { // Overflowed the last block
+		blocksToRead = int64(d.numBlocks) - blockNumber
+		returnEOF = true
+	}
+	var blockEnd int64                                 // End position of blocks to read
+	blockEnd = d.blockStarts[blockNumber+blocksToRead] // Start of the block after the last block we want to get is the end of the last block we want to get
+	blockLen := blockEnd - blockStart
+
+	// Read compressed block range into buffer
+	var compressedBlocks bytes.Buffer
+	_, err := d.in.Seek(blockStart, io.SeekStart)
+	if err != nil {
+		return 0, err
+	}
+	n1, err := io.CopyN(&compressedBlocks, d.in, blockLen)
+	if DEBUG {
+		log.Printf("block # = %d @ %d <- %d, len %d, copied %d bytes", blockNumber, blockStart, *d.cursorPos, blockLen, n1)
+	}
+	if err != nil {
+		if DEBUG {
+			log.Println("Copy Error")
+		}
+		return 0, err
+	}
+
+	// Decompress block range
+	var b bytes.Buffer
+	n, err := d.decompressBlockRangeMultithreaded(&compressedBlocks, &b, uint32(blockNumber))
+	if err != nil {
+		log.Println("Decompression error")
+		return n, err
+	}
+
+	// Calculate bytes read
+	readOverflow := *d.cursorPos + int64(bytesToRead) - d.decompressedSize
+	if readOverflow < 0 {
+		readOverflow = 0
+	}
+	bytesRead := int64(bytesToRead) - readOverflow
+	if DEBUG {
+		log.Printf("Read offset = %d, overflow = %d", dataOffset, readOverflow)
+		log.Printf("Decompressed %d bytes; read %d out of %d bytes\n", n, bytesRead, bytesToRead)
+		//	log.Printf("%v", b.Bytes())
+	}
+
+	// If we read 0 bytes, we reached the end of the file
+	if bytesRead == 0 {
+		log.Println("EOF")
+		return 0, io.EOF
+	}
+
+	// Copy from buffer+offset to p
+	_, err = io.CopyN(ioutil.Discard, &b, dataOffset)
+	if err != nil {
+		return 0, err
+	}
+	n, err = b.Read(p) // Note: everything after bytesToRead bytes will be discarded; we are returning bytesToRead instead of n
+	if err != nil {
+		return n, err
+	}
+
+	// Increment cursor position and return
+	*d.cursorPos += bytesRead
+	if returnEOF {
+		if DEBUG {
+			log.Println("EOF")
+		}
+		return int(bytesRead), io.EOF
+	}
+	return int(bytesRead), nil
+}
+
+// Seek seeks to a location in compressed stream
+func (d Decompressor) Seek(offset int64, whence int) (int64, error) {
+	// Seek to offset in cursorPos
+	if whence == io.SeekStart {
+		*d.cursorPos = offset
+	} else if whence == io.SeekCurrent {
+		*d.cursorPos += offset
+	} else if whence == io.SeekEnd {
+		*d.cursorPos = d.decompressedSize + offset
+	}
+
+	// Return
+	return offset, nil
+}
+
+// DecompressFileExtData decompresses a file using external block data. Argument "size" is very useful here.
+func (c *Compression) DecompressFileExtData(in io.ReadSeeker, size int64, blockData []uint32) (FileHandle io.ReadSeeker, decompressedSize int64, err error) {
+	var decompressor Decompressor
+	err = decompressor.initWithBlockData(c, in, size, blockData)
+	return decompressor, decompressor.decompressedSize, err
+}
--- a/backend/press/compression_test.go
+++ b/backend/press/compression_test.go
@ -0,0 +1,136 @@
+package press
+
+import (
+	"bufio"
+	"bytes"
+	"crypto/md5"
+	"encoding/base64"
+	"io"
+	"io/ioutil"
+	"math/rand"
+	"os"
+	"strings"
+	"testing"
+)
+
+const TestStringSmall = "The quick brown fox jumps over the lazy dog."
+const TestSizeLarge = 2097152 // 2 megabytes
+
+// Tests compression and decompression for a preset
+func testCompressDecompress(t *testing.T, preset string, testString string) {
+	// Create compression instance
+	comp, err := NewCompressionPreset(preset)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Open files and hashers
+	testFile := strings.NewReader(testString)
+	testFileHasher := md5.New()
+	if err != nil {
+		t.Fatal(err)
+	}
+	compressedFile, err := ioutil.TempFile(os.TempDir(), "rclone_compression_test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	outHasher := md5.New()
+
+	// Compress file and hash it (size doesn't matter here)
+	testFileReader, testFileWriter := io.Pipe()
+	go func() {
+		_, err := io.Copy(io.MultiWriter(testFileHasher, testFileWriter), testFile)
+		if err != nil {
+			t.Fatal("Failed to write compressed file")
+		}
+		err = testFileWriter.Close()
+		if err != nil {
+			t.Log("Failed to close compressed file")
+		}
+	}()
+	var blockData []uint32
+	blockData, err = comp.CompressFileReturningBlockData(testFileReader, compressedFile)
+	if err != nil {
+		t.Fatalf("Compression failed with error: %v", err)
+	}
+	testFileHash := testFileHasher.Sum(nil)
+
+	// Get the size, seek to the beginning of the compressed file
+	size, err := compressedFile.Seek(0, io.SeekEnd)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = compressedFile.Seek(0, io.SeekStart)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Compressed size: %d\n", size)
+
+	// Decompress file into a hasher
+	var FileHandle io.ReadSeeker
+	var decompressedSize int64
+	FileHandle, decompressedSize, err = comp.DecompressFileExtData(compressedFile, size, blockData)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Decompressed size: %d\n", decompressedSize)
+	bufr := bufio.NewReaderSize(FileHandle, 12345678)
+	_, err = io.Copy(outHasher, bufr)
+	if err != nil && err != io.EOF {
+		t.Fatal(err)
+	}
+	decompressedFileHash := outHasher.Sum(nil)
+
+	// Clean up
+	err = compressedFile.Close()
+	if err != nil {
+		t.Log("Warning: cannot close compressed test file")
+	}
+	err = os.Remove(compressedFile.Name())
+	if err != nil {
+		t.Log("Warning: cannot remove compressed test file")
+	}
+
+	// Compare hashes
+	if !bytes.Equal(testFileHash, decompressedFileHash) {
+		t.Logf("Hash of original file: %x\n", testFileHash)
+		t.Logf("Hash of recovered file: %x\n", decompressedFileHash)
+		t.Fatal("Hashes do not match!")
+	}
+}
+
+// Tests both small and large strings for a preset
+func testSmallLarge(t *testing.T, preset string) {
+	testStringLarge := getCompressibleString(TestSizeLarge)
+	t.Run("TestSmall", func(t *testing.T) {
+		testCompressDecompress(t, preset, TestStringSmall)
+	})
+	t.Run("TestLarge", func(t *testing.T) {
+		testCompressDecompress(t, preset, testStringLarge)
+	})
+}
+
+// Gets a compressible string
+func getCompressibleString(size int) string {
+	// Get pseudorandom bytes
+	prbytes := make([]byte, size*3/4+16)
+	prsource := rand.New(rand.NewSource(0))
+	prsource.Read(prbytes)
+	// Encode in base64
+	encoding := base64.NewEncoding("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/")
+	return encoding.EncodeToString(prbytes)[:size]
+}
+
+func TestCompression(t *testing.T) {
+	testCases := []string{"lz4", "snappy", "gzip-min"}
+	if checkXZ() {
+		testCases = append(testCases, "xz-min")
+	} else {
+		t.Log("XZ binary not found on current system. Not testing xz.")
+	}
+	for _, tc := range testCases {
+		t.Run(tc, func(t *testing.T) {
+			testSmallLarge(t, tc)
+		})
+	}
+}
--- a/backend/press/press.go
+++ b/backend/press/press.go
--- a/backend/press/press_test.go
+++ b/backend/press/press_test.go
@ -0,0 +1,88 @@
+// Test Crypt filesystem interface
+package press
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	_ "github.com/ncw/rclone/backend/local"
+	"github.com/ncw/rclone/fstest"
+	"github.com/ncw/rclone/fstest/fstests"
+)
+
+// TestIntegration runs integration tests against the remote
+func TestIntegration(t *testing.T) {
+	if *fstest.RemoteName == "" {
+		t.Skip("Skipping as -remote not set")
+	}
+	fstests.Run(t, &fstests.Opt{
+		RemoteName:                   *fstest.RemoteName,
+		NilObject:                    (*Object)(nil),
+		UnimplementableFsMethods:     []string{"OpenWriterAt"},
+		UnimplementableObjectMethods: []string{},
+	})
+}
+
+// TestRemoteLz4 tests LZ4 compression
+func TestRemoteLz4(t *testing.T) {
+	if *fstest.RemoteName != "" {
+		t.Skip("Skipping as -remote set")
+	}
+	tempdir := filepath.Join(os.TempDir(), "rclone-press-test-lz4")
+	name := "TestPressLz4"
+	fstests.Run(t, &fstests.Opt{
+		RemoteName:                   name + ":",
+		NilObject:                    (*Object)(nil),
+		UnimplementableFsMethods:     []string{"OpenWriterAt"},
+		UnimplementableObjectMethods: []string{},
+		ExtraConfig: []fstests.ExtraConfigItem{
+			{Name: name, Key: "type", Value: "press"},
+			{Name: name, Key: "remote", Value: tempdir},
+			{Name: name, Key: "compression_mode", Value: "lz4"},
+		},
+	})
+}
+
+// TestRemoteGzip tests GZIP compression
+func TestRemoteGzip(t *testing.T) {
+	if *fstest.RemoteName != "" {
+		t.Skip("Skipping as -remote set")
+	}
+	tempdir := filepath.Join(os.TempDir(), "rclone-press-test-gzip")
+	name := "TestPressGzip"
+	fstests.Run(t, &fstests.Opt{
+		RemoteName:                   name + ":",
+		NilObject:                    (*Object)(nil),
+		UnimplementableFsMethods:     []string{"OpenWriterAt"},
+		UnimplementableObjectMethods: []string{},
+		ExtraConfig: []fstests.ExtraConfigItem{
+			{Name: name, Key: "type", Value: "press"},
+			{Name: name, Key: "remote", Value: tempdir},
+			{Name: name, Key: "compression_mode", Value: "gzip-min"},
+		},
+	})
+}
+
+// TestRemoteXZ tests XZ compression
+func TestRemoteXZ(t *testing.T) {
+	if !checkXZ() {
+		t.Skip("XZ binary not found on current system")
+	}
+	if *fstest.RemoteName != "" {
+		t.Skip("Skipping as -remote set")
+	}
+	tempdir := filepath.Join(os.TempDir(), "rclone-press-test-xz")
+	name := "TestPressXZ"
+	fstests.Run(t, &fstests.Opt{
+		RemoteName:                   name + ":",
+		NilObject:                    (*Object)(nil),
+		UnimplementableFsMethods:     []string{"OpenWriterAt"},
+		UnimplementableObjectMethods: []string{},
+		ExtraConfig: []fstests.ExtraConfigItem{
+			{Name: name, Key: "type", Value: "press"},
+			{Name: name, Key: "remote", Value: tempdir},
+			{Name: name, Key: "compression_mode", Value: "xz-min"},
+		},
+	})
+}