zip: backend to read or create zip files FIXME WIP

Use as `:zip:remote:path/to/file.zip` for reading or writing.

- reading zip files works - can mount zip files
- writing works
- unknowns in writing like end?
- lots of bodges
This commit is contained in:
Nick Craig-Wood 2020-07-12 16:52:33 +01:00
parent 0cccda61db
commit bc279ec4b0
2 changed files with 573 additions and 0 deletions

View File

@ -54,5 +54,6 @@ import (
_ "github.com/rclone/rclone/backend/uptobox"
_ "github.com/rclone/rclone/backend/webdav"
_ "github.com/rclone/rclone/backend/yandex"
_ "github.com/rclone/rclone/backend/zip"
_ "github.com/rclone/rclone/backend/zoho"
)

572
backend/zip/zip.go Normal file
View File

@ -0,0 +1,572 @@
// Package zip provides wrappers for Fs and Object which read and
// write a zip file.
//
// Use like :zip:remote:path/to/file.zip
package zip
/*
FIXME could maybe make an append to zip file with specially named zip
files having more info in?
So have base zip file file.zip then file.extra000.zip etc.
We read the objects sequentially but can transfer them in parallel.
FIXME what happens when we want to copy a file out of the zip?
So rclone copy remote:file.zip/file/inside.zip?
Maybe rclone copy remote:file.zip#file/inside.zip?
Or just look for first .zip file?
FIXME what happens when writing - need to know the end... Don't have a
backend Shutdown call.
Could have a read only Feature flag so we turn that on when we have a
zip. Then if placed in dest position could error. Might be useful for
http also.
FIXME this will perform poorly for unpacking as the VFS Reader is bad
at multiple streams.
FIXME not writing directories
cf zipinfo
drwxr-xr-x 3.0 unx 0 bx stor 19-Oct-05 12:14 rclone-v1.49.5-linux-amd64/
FIXME can probably check directories better than trailing /
FIXME enormous atexit bodge
FIXME crc32 skip on write bodge
Bodge bodge bodge
*/
import (
"archive/zip"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"strings"
"sync"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/cache"
"github.com/rclone/rclone/fs/config/configmap"
"github.com/rclone/rclone/fs/config/configstruct"
"github.com/rclone/rclone/fs/dirtree"
"github.com/rclone/rclone/fs/fspath"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/log"
"github.com/rclone/rclone/lib/atexit"
"github.com/rclone/rclone/lib/readers"
"github.com/rclone/rclone/vfs"
)
// Globals
// Register with Fs
func init() {
fs.Register(&fs.RegInfo{
Name: "zip",
Description: "Read or write a zip file",
NewFs: NewFs,
Options: []fs.Option{},
})
}
// NewFs constructs an Fs from the path, container:path
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
// Parse config into Options struct
opt := new(Options)
err := configstruct.Set(m, opt)
if err != nil {
return nil, err
}
// Parse the root which should be remote:path/to/file.zip
parent, leaf, err := fspath.Split(root)
if err != nil {
return nil, fmt.Errorf("failed to parse root %q: %w", root, err)
}
if leaf == "" {
return nil, fmt.Errorf("failed to parse root %q: not pointing to a file", root)
}
root = strings.TrimRight(parent, "/")
// Create the remote from the cache
wrappedFs, err := cache.Get(ctx, root)
if err != nil {
return nil, fmt.Errorf("failed to make %q to wrap: %w", root, err)
}
// FIXME vfs cache?
// FIXME could factor out ReadFileHandle and just use that rather than the full VFS
VFS := vfs.New(wrappedFs, nil)
if err != nil {
return nil, fmt.Errorf("failed to create VFS for %q: %w", root, err)
}
node, err := VFS.Stat(leaf)
if err != nil && !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to find %q object: %w", leaf, err)
}
if node != nil && node.IsDir() {
return nil, fmt.Errorf("can't unzip a directory %q", leaf)
}
f := &Fs{
f: wrappedFs,
name: name,
root: root,
opt: *opt,
vfs: VFS,
node: node,
leaf: leaf,
}
// FIXME Maybe delay this until first read size if writing we don't need it?
if node != nil {
err = f.readZip()
if err != nil {
return nil, fmt.Errorf("failed to open zip file: %w", err)
}
}
// FIXME
// the features here are ones we could support, and they are
// ANDed with the ones from wrappedFs
//
// FIXME some of these need to be forced on - CanHaveEmptyDirectories
f.features = (&fs.Features{
CaseInsensitive: false,
DuplicateFiles: false,
ReadMimeType: false, // MimeTypes not supported with gzip
WriteMimeType: false,
BucketBased: false,
CanHaveEmptyDirectories: true,
}).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs)
return f, nil
}
// Options defines the configuration for this backend
type Options struct {
}
// Fs represents a wrapped fs.Fs
type Fs struct {
f fs.Fs
wrapper fs.Fs
name string
root string
opt Options
features *fs.Features // optional features
vfs *vfs.VFS
node vfs.Node // zip file object - set if reading
leaf string // leaf name of the zip file object
dt dirtree.DirTree // read from zipfile
wrmu sync.Mutex // writing mutex protects the below
wh vfs.Handle // write handle
zw *zip.Writer
}
// Name of the remote (as passed into NewFs)
func (f *Fs) Name() string {
return f.name
}
// Root of the remote (as passed into NewFs)
func (f *Fs) Root() string {
return f.root
}
// Features returns the optional features of this Fs
func (f *Fs) Features() *fs.Features {
return f.features
}
// String returns a description of the FS
func (f *Fs) String() string {
return fmt.Sprintf("Zip '%s:%s'", f.name, f.root)
}
// readZip the zip file into f
func (f *Fs) readZip() (err error) {
if f.node == nil {
return fs.ErrorDirNotFound
}
size := f.node.Size()
if size < 0 {
return errors.New("can't read from zip file with unknown size")
}
r, err := f.node.Open(os.O_RDONLY)
if err != nil {
return fmt.Errorf("failed to open zip file: %w", err)
}
zr, err := zip.NewReader(r, size)
if err != nil {
return fmt.Errorf("failed to read zip file: %w", err)
}
dt := dirtree.New()
for _, file := range zr.File {
remote := strings.Trim(path.Clean(file.Name), "/")
if remote == "." {
remote = ""
}
if strings.HasSuffix(file.Name, "/") {
dir := fs.NewDir(remote, file.Modified)
dt.AddDir(dir)
} else {
o := &Object{
f: f,
remote: remote,
fh: &file.FileHeader,
file: file,
}
dt.Add(o)
}
}
dt.CheckParents("")
dt.Sort()
f.dt = dt
fs.Debugf(nil, "dt = %#v", dt)
return nil
}
// List the objects and directories in dir into entries. The
// entries can be returned in any order but should be for a
// complete directory.
//
// dir should be "" to list the root, and should not have
// trailing slashes.
//
// This should return ErrDirNotFound if the directory isn't
// found.
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
defer log.Trace(f, "dir=%q", dir)("entries=%v, err=%v", &entries, &err)
entries, ok := f.dt[dir]
if !ok {
return nil, fs.ErrorDirNotFound
}
fs.Debugf(f, "dir=%q, entries=%v", dir, entries)
return entries, nil
}
// ListR lists the objects and directories of the Fs starting
// from dir recursively into out.
//
// dir should be "" to start from the root, and should not
// have trailing slashes.
//
// This should return ErrDirNotFound if the directory isn't
// found.
//
// It should call callback for each tranche of entries read.
// These need not be returned in any particular order. If
// callback returns an error then the listing will stop
// immediately.
//
// Don't implement this unless you have a more efficient way
// of listing recursively that doing a directory traversal.
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
for _, entries := range f.dt {
err = callback(entries)
if err != nil {
return err
}
}
return nil
}
// NewObject finds the Object at remote.
func (f *Fs) NewObject(ctx context.Context, remote string) (o fs.Object, err error) {
defer log.Trace(f, "remote=%q", remote)("obj=%v, err=%v", &o, &err)
if f.dt == nil {
return nil, fs.ErrorObjectNotFound
}
_, entry := f.dt.Find(remote)
if entry == nil {
return nil, fs.ErrorObjectNotFound
}
o, ok := entry.(*Object)
if !ok {
return nil, fs.ErrorNotAFile
}
return o, nil
}
// Precision of the ModTimes in this Fs
func (f *Fs) Precision() time.Duration {
return time.Second
}
// Mkdir makes the directory (container, bucket)
//
// Shouldn't return an error if it already exists
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
// FIXME should this create a "directory" entry if not already created?
return nil
}
// Rmdir removes the directory (container, bucket) if empty
//
// Return an error if it doesn't exist or isn't empty
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
return errors.New("can't remove directories from zip file")
}
// Wrapper for src to make it into os.FileInfo
type fileInfo struct {
src fs.ObjectInfo
}
// Name - base name of the file (actually full name for zip)
func (fi fileInfo) Name() string {
return fi.src.Remote()
}
// Size - length in bytes for regular files; system-dependent for others
func (fi fileInfo) Size() int64 {
return fi.src.Size()
}
// Mode - file mode bits
func (fi fileInfo) Mode() os.FileMode {
return 0777
}
// ModTime - modification time
func (fi fileInfo) ModTime() time.Time {
return fi.src.ModTime(context.Background())
}
// IsDir - abbreviation for Mode().IsDir()
func (fi fileInfo) IsDir() bool {
return false
}
// Sys - underlying data source (can return nil)
func (fi fileInfo) Sys() interface{} {
return nil
}
// check type
var _ os.FileInfo = fileInfo{}
// Put in to the remote path with the modTime given of the given size
//
// May create the object even if it returns an error - if so
// will return the object and the error, otherwise will return
// nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (o fs.Object, err error) {
defer log.Trace(f, "src=%v", src)("obj=%v, err=%v", &o, &err)
f.wrmu.Lock()
defer f.wrmu.Unlock()
size := src.Size()
if size < 0 {
return nil, errors.New("can't zip unknown sized objects")
}
if f.zw == nil {
wh, err := f.vfs.OpenFile(f.leaf, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)
if err != nil {
return nil, fmt.Errorf("zip put: failed to open output file: %w", err)
}
f.wh = wh
f.zw = zip.NewWriter(f.wh)
if err != nil {
return nil, fmt.Errorf("zip put: failed to create zip writer: %w", err)
}
// FIXME enormous bodge to work around no lifecycle for backends
atexit.Register(func() {
err = f.zw.Close()
if err != nil {
fs.Errorf(f, "Error closing zip writer: %v", err)
}
err = f.wh.Close()
if err != nil {
fs.Errorf(f, "Error closing zip file: %v", err)
}
})
}
// FIXME not putting in directories?
fh, err := zip.FileInfoHeader(fileInfo{src})
if err != nil {
return nil, fmt.Errorf("zip put: failed to create file header: %w", err)
}
w, err := f.zw.CreateHeader(fh)
if err != nil {
return nil, fmt.Errorf("zip put: failed to open file: %w", err)
}
// FIXME just a guess!
// should probably try a better heuristic?
if size < 10 {
fh.Method = zip.Store
} else {
fh.Method = zip.Deflate
}
_, err = io.CopyN(w, in, size)
if err != nil {
return nil, fmt.Errorf("zip put: failed to copy file: %w", err)
}
// err = w.Close()
// if err != nil {
// return nil,fmt.Errorf("zip put: failed to close file: %w", err)
// }
o = &Object{
f: f,
fh: fh,
remote: src.Remote(),
}
return o, nil
}
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() hash.Set {
return hash.Set(hash.CRC32)
}
// UnWrap returns the Fs that this Fs is wrapping
func (f *Fs) UnWrap() fs.Fs {
return f.f
}
// WrapFs returns the Fs that is wrapping this Fs
func (f *Fs) WrapFs() fs.Fs {
return f.wrapper
}
// SetWrapper sets the Fs that is wrapping this Fs
func (f *Fs) SetWrapper(wrapper fs.Fs) {
f.wrapper = wrapper
}
// Object describes an object to be read from the raw zip file
type Object struct {
f *Fs
remote string
fh *zip.FileHeader
file *zip.File
}
// Fs returns read only access to the Fs that this object is part of
func (o *Object) Fs() fs.Info {
return o.f
}
// Return a string version
func (o *Object) String() string {
if o == nil {
return "<nil>"
}
return o.Remote()
}
// Remote returns the remote path
func (o *Object) Remote() string {
return o.remote
}
// Size returns the size of the file
func (o *Object) Size() int64 {
return int64(o.fh.UncompressedSize64)
}
// ModTime returns the modification time of the object
//
// It attempts to read the objects mtime and if that isn't present the
// LastModified returned in the http headers
func (o *Object) ModTime(ctx context.Context) time.Time {
return o.fh.Modified
}
// SetModTime sets the modification time of the local fs object
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
fs.Debugf(o, "Can't set mod time - ignoring")
return nil
}
// Storable raturns a boolean indicating if this object is storable
func (o *Object) Storable() bool {
return true
}
// Hash returns the selected checksum of the file
// If no checksum is available it returns ""
func (o *Object) Hash(ctx context.Context, ht hash.Type) (string, error) {
if ht == hash.CRC32 {
// FIXME return empty CRC if writing
if o.f.dt == nil {
return "", nil
}
return fmt.Sprintf("%08x", o.fh.CRC32), nil
}
return "", hash.ErrUnsupported
}
// Open opens the file for read. Call Close() on the returned io.ReadCloser
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) {
var offset, limit int64 = 0, -1
for _, option := range options {
switch x := option.(type) {
case *fs.SeekOption:
offset = x.Offset
case *fs.RangeOption:
offset, limit = x.Decode(o.Size())
default:
if option.Mandatory() {
fs.Logf(o, "Unsupported mandatory option: %v", option)
}
}
}
rc, err = o.file.Open()
if err != nil {
return nil, err
}
// discard data from start as necessary
if offset > 0 {
_, err = io.CopyN(ioutil.Discard, rc, offset)
if err != nil {
return nil, err
}
}
// If limited then don't return everything
if limit >= 0 {
return readers.NewLimitedReadCloser(rc, limit-offset), nil
}
return rc, nil
}
// Update in to the object with the modTime given of the given size
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
return errors.New("can't Update zip files")
}
// Remove an object
func (o *Object) Remove(ctx context.Context) error {
return errors.New("can't Remove zip file objects")
}
// Check the interfaces are satisfied
var (
_ fs.Fs = (*Fs)(nil)
_ fs.UnWrapper = (*Fs)(nil)
_ fs.ListRer = (*Fs)(nil)
// _ fs.Abouter = (*Fs)(nil) - FIXME can implemnet
_ fs.Wrapper = (*Fs)(nil)
_ fs.Object = (*Object)(nil)
)