mirror of
https://github.com/rclone/rclone.git
synced 2024-12-22 07:01:46 +01:00
zip: backend to read or create zip files FIXME WIP
Use as `:zip:remote:path/to/file.zip` for reading or writing. - reading zip files works - can mount zip files - writing works - unknowns in writing like end? - lots of bodges
This commit is contained in:
parent
0cccda61db
commit
bc279ec4b0
@ -54,5 +54,6 @@ import (
|
||||
_ "github.com/rclone/rclone/backend/uptobox"
|
||||
_ "github.com/rclone/rclone/backend/webdav"
|
||||
_ "github.com/rclone/rclone/backend/yandex"
|
||||
_ "github.com/rclone/rclone/backend/zip"
|
||||
_ "github.com/rclone/rclone/backend/zoho"
|
||||
)
|
||||
|
572
backend/zip/zip.go
Normal file
572
backend/zip/zip.go
Normal file
@ -0,0 +1,572 @@
|
||||
// Package zip provides wrappers for Fs and Object which read and
|
||||
// write a zip file.
|
||||
//
|
||||
// Use like :zip:remote:path/to/file.zip
|
||||
package zip
|
||||
|
||||
/*
|
||||
FIXME could maybe make an append to zip file with specially named zip
|
||||
files having more info in?
|
||||
|
||||
So have base zip file file.zip then file.extra000.zip etc.
|
||||
|
||||
We read the objects sequentially but can transfer them in parallel.
|
||||
|
||||
FIXME what happens when we want to copy a file out of the zip?
|
||||
|
||||
So rclone copy remote:file.zip/file/inside.zip?
|
||||
Maybe rclone copy remote:file.zip#file/inside.zip?
|
||||
Or just look for first .zip file?
|
||||
|
||||
FIXME what happens when writing - need to know the end... Don't have a
|
||||
backend Shutdown call.
|
||||
|
||||
Could have a read only Feature flag so we turn that on when we have a
|
||||
zip. Then if placed in dest position could error. Might be useful for
|
||||
http also.
|
||||
|
||||
FIXME this will perform poorly for unpacking as the VFS Reader is bad
|
||||
at multiple streams.
|
||||
|
||||
FIXME not writing directories
|
||||
|
||||
cf zipinfo
|
||||
drwxr-xr-x 3.0 unx 0 bx stor 19-Oct-05 12:14 rclone-v1.49.5-linux-amd64/
|
||||
|
||||
FIXME can probably check directories better than trailing /
|
||||
|
||||
FIXME enormous atexit bodge
|
||||
|
||||
FIXME crc32 skip on write bodge
|
||||
|
||||
Bodge bodge bodge
|
||||
|
||||
*/
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/cache"
|
||||
"github.com/rclone/rclone/fs/config/configmap"
|
||||
"github.com/rclone/rclone/fs/config/configstruct"
|
||||
"github.com/rclone/rclone/fs/dirtree"
|
||||
"github.com/rclone/rclone/fs/fspath"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/log"
|
||||
"github.com/rclone/rclone/lib/atexit"
|
||||
"github.com/rclone/rclone/lib/readers"
|
||||
"github.com/rclone/rclone/vfs"
|
||||
)
|
||||
|
||||
// Globals
|
||||
// Register with Fs
|
||||
func init() {
|
||||
fs.Register(&fs.RegInfo{
|
||||
Name: "zip",
|
||||
Description: "Read or write a zip file",
|
||||
NewFs: NewFs,
|
||||
Options: []fs.Option{},
|
||||
})
|
||||
}
|
||||
|
||||
// NewFs constructs an Fs from the path, container:path
|
||||
func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
|
||||
// Parse config into Options struct
|
||||
opt := new(Options)
|
||||
err := configstruct.Set(m, opt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse the root which should be remote:path/to/file.zip
|
||||
parent, leaf, err := fspath.Split(root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse root %q: %w", root, err)
|
||||
}
|
||||
if leaf == "" {
|
||||
return nil, fmt.Errorf("failed to parse root %q: not pointing to a file", root)
|
||||
}
|
||||
root = strings.TrimRight(parent, "/")
|
||||
|
||||
// Create the remote from the cache
|
||||
wrappedFs, err := cache.Get(ctx, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to make %q to wrap: %w", root, err)
|
||||
}
|
||||
// FIXME vfs cache?
|
||||
// FIXME could factor out ReadFileHandle and just use that rather than the full VFS
|
||||
VFS := vfs.New(wrappedFs, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create VFS for %q: %w", root, err)
|
||||
}
|
||||
node, err := VFS.Stat(leaf)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("failed to find %q object: %w", leaf, err)
|
||||
}
|
||||
if node != nil && node.IsDir() {
|
||||
return nil, fmt.Errorf("can't unzip a directory %q", leaf)
|
||||
}
|
||||
f := &Fs{
|
||||
f: wrappedFs,
|
||||
name: name,
|
||||
root: root,
|
||||
opt: *opt,
|
||||
vfs: VFS,
|
||||
node: node,
|
||||
leaf: leaf,
|
||||
}
|
||||
// FIXME Maybe delay this until first read size if writing we don't need it?
|
||||
if node != nil {
|
||||
err = f.readZip()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open zip file: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME
|
||||
// the features here are ones we could support, and they are
|
||||
// ANDed with the ones from wrappedFs
|
||||
//
|
||||
// FIXME some of these need to be forced on - CanHaveEmptyDirectories
|
||||
f.features = (&fs.Features{
|
||||
CaseInsensitive: false,
|
||||
DuplicateFiles: false,
|
||||
ReadMimeType: false, // MimeTypes not supported with gzip
|
||||
WriteMimeType: false,
|
||||
BucketBased: false,
|
||||
CanHaveEmptyDirectories: true,
|
||||
}).Fill(ctx, f).Mask(ctx, wrappedFs).WrapsFs(f, wrappedFs)
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// Options defines the configuration for this backend
|
||||
type Options struct {
|
||||
}
|
||||
|
||||
// Fs represents a wrapped fs.Fs
|
||||
type Fs struct {
|
||||
f fs.Fs
|
||||
wrapper fs.Fs
|
||||
name string
|
||||
root string
|
||||
opt Options
|
||||
features *fs.Features // optional features
|
||||
vfs *vfs.VFS
|
||||
node vfs.Node // zip file object - set if reading
|
||||
leaf string // leaf name of the zip file object
|
||||
dt dirtree.DirTree // read from zipfile
|
||||
|
||||
wrmu sync.Mutex // writing mutex protects the below
|
||||
wh vfs.Handle // write handle
|
||||
zw *zip.Writer
|
||||
}
|
||||
|
||||
// Name of the remote (as passed into NewFs)
|
||||
func (f *Fs) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
// Root of the remote (as passed into NewFs)
|
||||
func (f *Fs) Root() string {
|
||||
return f.root
|
||||
}
|
||||
|
||||
// Features returns the optional features of this Fs
|
||||
func (f *Fs) Features() *fs.Features {
|
||||
return f.features
|
||||
}
|
||||
|
||||
// String returns a description of the FS
|
||||
func (f *Fs) String() string {
|
||||
return fmt.Sprintf("Zip '%s:%s'", f.name, f.root)
|
||||
}
|
||||
|
||||
// readZip the zip file into f
|
||||
func (f *Fs) readZip() (err error) {
|
||||
if f.node == nil {
|
||||
return fs.ErrorDirNotFound
|
||||
}
|
||||
size := f.node.Size()
|
||||
if size < 0 {
|
||||
return errors.New("can't read from zip file with unknown size")
|
||||
}
|
||||
r, err := f.node.Open(os.O_RDONLY)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open zip file: %w", err)
|
||||
}
|
||||
zr, err := zip.NewReader(r, size)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read zip file: %w", err)
|
||||
}
|
||||
dt := dirtree.New()
|
||||
for _, file := range zr.File {
|
||||
remote := strings.Trim(path.Clean(file.Name), "/")
|
||||
if remote == "." {
|
||||
remote = ""
|
||||
}
|
||||
if strings.HasSuffix(file.Name, "/") {
|
||||
dir := fs.NewDir(remote, file.Modified)
|
||||
dt.AddDir(dir)
|
||||
} else {
|
||||
o := &Object{
|
||||
f: f,
|
||||
remote: remote,
|
||||
fh: &file.FileHeader,
|
||||
file: file,
|
||||
}
|
||||
dt.Add(o)
|
||||
}
|
||||
}
|
||||
dt.CheckParents("")
|
||||
dt.Sort()
|
||||
f.dt = dt
|
||||
fs.Debugf(nil, "dt = %#v", dt)
|
||||
return nil
|
||||
}
|
||||
|
||||
// List the objects and directories in dir into entries. The
|
||||
// entries can be returned in any order but should be for a
|
||||
// complete directory.
|
||||
//
|
||||
// dir should be "" to list the root, and should not have
|
||||
// trailing slashes.
|
||||
//
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
defer log.Trace(f, "dir=%q", dir)("entries=%v, err=%v", &entries, &err)
|
||||
entries, ok := f.dt[dir]
|
||||
if !ok {
|
||||
return nil, fs.ErrorDirNotFound
|
||||
}
|
||||
fs.Debugf(f, "dir=%q, entries=%v", dir, entries)
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// ListR lists the objects and directories of the Fs starting
|
||||
// from dir recursively into out.
|
||||
//
|
||||
// dir should be "" to start from the root, and should not
|
||||
// have trailing slashes.
|
||||
//
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
//
|
||||
// It should call callback for each tranche of entries read.
|
||||
// These need not be returned in any particular order. If
|
||||
// callback returns an error then the listing will stop
|
||||
// immediately.
|
||||
//
|
||||
// Don't implement this unless you have a more efficient way
|
||||
// of listing recursively that doing a directory traversal.
|
||||
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
|
||||
for _, entries := range f.dt {
|
||||
err = callback(entries)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewObject finds the Object at remote.
|
||||
func (f *Fs) NewObject(ctx context.Context, remote string) (o fs.Object, err error) {
|
||||
defer log.Trace(f, "remote=%q", remote)("obj=%v, err=%v", &o, &err)
|
||||
if f.dt == nil {
|
||||
return nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
_, entry := f.dt.Find(remote)
|
||||
if entry == nil {
|
||||
return nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
o, ok := entry.(*Object)
|
||||
if !ok {
|
||||
return nil, fs.ErrorNotAFile
|
||||
}
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// Precision of the ModTimes in this Fs
|
||||
func (f *Fs) Precision() time.Duration {
|
||||
return time.Second
|
||||
}
|
||||
|
||||
// Mkdir makes the directory (container, bucket)
|
||||
//
|
||||
// Shouldn't return an error if it already exists
|
||||
func (f *Fs) Mkdir(ctx context.Context, dir string) error {
|
||||
// FIXME should this create a "directory" entry if not already created?
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rmdir removes the directory (container, bucket) if empty
|
||||
//
|
||||
// Return an error if it doesn't exist or isn't empty
|
||||
func (f *Fs) Rmdir(ctx context.Context, dir string) error {
|
||||
return errors.New("can't remove directories from zip file")
|
||||
}
|
||||
|
||||
// Wrapper for src to make it into os.FileInfo
|
||||
type fileInfo struct {
|
||||
src fs.ObjectInfo
|
||||
}
|
||||
|
||||
// Name - base name of the file (actually full name for zip)
|
||||
func (fi fileInfo) Name() string {
|
||||
return fi.src.Remote()
|
||||
}
|
||||
|
||||
// Size - length in bytes for regular files; system-dependent for others
|
||||
func (fi fileInfo) Size() int64 {
|
||||
return fi.src.Size()
|
||||
}
|
||||
|
||||
// Mode - file mode bits
|
||||
func (fi fileInfo) Mode() os.FileMode {
|
||||
return 0777
|
||||
}
|
||||
|
||||
// ModTime - modification time
|
||||
func (fi fileInfo) ModTime() time.Time {
|
||||
return fi.src.ModTime(context.Background())
|
||||
}
|
||||
|
||||
// IsDir - abbreviation for Mode().IsDir()
|
||||
func (fi fileInfo) IsDir() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Sys - underlying data source (can return nil)
|
||||
func (fi fileInfo) Sys() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
// check type
|
||||
var _ os.FileInfo = fileInfo{}
|
||||
|
||||
// Put in to the remote path with the modTime given of the given size
|
||||
//
|
||||
// May create the object even if it returns an error - if so
|
||||
// will return the object and the error, otherwise will return
|
||||
// nil and the error
|
||||
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (o fs.Object, err error) {
|
||||
defer log.Trace(f, "src=%v", src)("obj=%v, err=%v", &o, &err)
|
||||
f.wrmu.Lock()
|
||||
defer f.wrmu.Unlock()
|
||||
|
||||
size := src.Size()
|
||||
if size < 0 {
|
||||
return nil, errors.New("can't zip unknown sized objects")
|
||||
}
|
||||
|
||||
if f.zw == nil {
|
||||
wh, err := f.vfs.OpenFile(f.leaf, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zip put: failed to open output file: %w", err)
|
||||
}
|
||||
f.wh = wh
|
||||
f.zw = zip.NewWriter(f.wh)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zip put: failed to create zip writer: %w", err)
|
||||
}
|
||||
|
||||
// FIXME enormous bodge to work around no lifecycle for backends
|
||||
atexit.Register(func() {
|
||||
err = f.zw.Close()
|
||||
if err != nil {
|
||||
fs.Errorf(f, "Error closing zip writer: %v", err)
|
||||
}
|
||||
err = f.wh.Close()
|
||||
if err != nil {
|
||||
fs.Errorf(f, "Error closing zip file: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// FIXME not putting in directories?
|
||||
|
||||
fh, err := zip.FileInfoHeader(fileInfo{src})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zip put: failed to create file header: %w", err)
|
||||
}
|
||||
w, err := f.zw.CreateHeader(fh)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zip put: failed to open file: %w", err)
|
||||
}
|
||||
// FIXME just a guess!
|
||||
// should probably try a better heuristic?
|
||||
if size < 10 {
|
||||
fh.Method = zip.Store
|
||||
} else {
|
||||
fh.Method = zip.Deflate
|
||||
}
|
||||
_, err = io.CopyN(w, in, size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("zip put: failed to copy file: %w", err)
|
||||
}
|
||||
|
||||
// err = w.Close()
|
||||
// if err != nil {
|
||||
// return nil,fmt.Errorf("zip put: failed to close file: %w", err)
|
||||
// }
|
||||
|
||||
o = &Object{
|
||||
f: f,
|
||||
fh: fh,
|
||||
remote: src.Remote(),
|
||||
}
|
||||
|
||||
return o, nil
|
||||
|
||||
}
|
||||
|
||||
// Hashes returns the supported hash sets.
|
||||
func (f *Fs) Hashes() hash.Set {
|
||||
return hash.Set(hash.CRC32)
|
||||
}
|
||||
|
||||
// UnWrap returns the Fs that this Fs is wrapping
|
||||
func (f *Fs) UnWrap() fs.Fs {
|
||||
return f.f
|
||||
}
|
||||
|
||||
// WrapFs returns the Fs that is wrapping this Fs
|
||||
func (f *Fs) WrapFs() fs.Fs {
|
||||
return f.wrapper
|
||||
}
|
||||
|
||||
// SetWrapper sets the Fs that is wrapping this Fs
|
||||
func (f *Fs) SetWrapper(wrapper fs.Fs) {
|
||||
f.wrapper = wrapper
|
||||
}
|
||||
|
||||
// Object describes an object to be read from the raw zip file
|
||||
type Object struct {
|
||||
f *Fs
|
||||
remote string
|
||||
fh *zip.FileHeader
|
||||
file *zip.File
|
||||
}
|
||||
|
||||
// Fs returns read only access to the Fs that this object is part of
|
||||
func (o *Object) Fs() fs.Info {
|
||||
return o.f
|
||||
}
|
||||
|
||||
// Return a string version
|
||||
func (o *Object) String() string {
|
||||
if o == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return o.Remote()
|
||||
}
|
||||
|
||||
// Remote returns the remote path
|
||||
func (o *Object) Remote() string {
|
||||
return o.remote
|
||||
}
|
||||
|
||||
// Size returns the size of the file
|
||||
func (o *Object) Size() int64 {
|
||||
return int64(o.fh.UncompressedSize64)
|
||||
}
|
||||
|
||||
// ModTime returns the modification time of the object
|
||||
//
|
||||
// It attempts to read the objects mtime and if that isn't present the
|
||||
// LastModified returned in the http headers
|
||||
func (o *Object) ModTime(ctx context.Context) time.Time {
|
||||
return o.fh.Modified
|
||||
}
|
||||
|
||||
// SetModTime sets the modification time of the local fs object
|
||||
func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
|
||||
fs.Debugf(o, "Can't set mod time - ignoring")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Storable raturns a boolean indicating if this object is storable
|
||||
func (o *Object) Storable() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Hash returns the selected checksum of the file
|
||||
// If no checksum is available it returns ""
|
||||
func (o *Object) Hash(ctx context.Context, ht hash.Type) (string, error) {
|
||||
if ht == hash.CRC32 {
|
||||
// FIXME return empty CRC if writing
|
||||
if o.f.dt == nil {
|
||||
return "", nil
|
||||
}
|
||||
return fmt.Sprintf("%08x", o.fh.CRC32), nil
|
||||
}
|
||||
return "", hash.ErrUnsupported
|
||||
}
|
||||
|
||||
// Open opens the file for read. Call Close() on the returned io.ReadCloser
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) {
|
||||
var offset, limit int64 = 0, -1
|
||||
for _, option := range options {
|
||||
switch x := option.(type) {
|
||||
case *fs.SeekOption:
|
||||
offset = x.Offset
|
||||
case *fs.RangeOption:
|
||||
offset, limit = x.Decode(o.Size())
|
||||
default:
|
||||
if option.Mandatory() {
|
||||
fs.Logf(o, "Unsupported mandatory option: %v", option)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc, err = o.file.Open()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// discard data from start as necessary
|
||||
if offset > 0 {
|
||||
_, err = io.CopyN(ioutil.Discard, rc, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// If limited then don't return everything
|
||||
if limit >= 0 {
|
||||
return readers.NewLimitedReadCloser(rc, limit-offset), nil
|
||||
}
|
||||
|
||||
return rc, nil
|
||||
}
|
||||
|
||||
// Update in to the object with the modTime given of the given size
|
||||
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
|
||||
return errors.New("can't Update zip files")
|
||||
}
|
||||
|
||||
// Remove an object
|
||||
func (o *Object) Remove(ctx context.Context) error {
|
||||
return errors.New("can't Remove zip file objects")
|
||||
}
|
||||
|
||||
// Check the interfaces are satisfied
|
||||
var (
|
||||
_ fs.Fs = (*Fs)(nil)
|
||||
_ fs.UnWrapper = (*Fs)(nil)
|
||||
_ fs.ListRer = (*Fs)(nil)
|
||||
// _ fs.Abouter = (*Fs)(nil) - FIXME can implemnet
|
||||
_ fs.Wrapper = (*Fs)(nil)
|
||||
_ fs.Object = (*Object)(nil)
|
||||
)
|
Loading…
Reference in New Issue
Block a user