From 7dec72a1bf9179394b4bd1a61d9ba64a850df1a1 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 28 Feb 2025 17:01:29 +0000 Subject: [PATCH] vfs: add --vfs-metadata-extension to expose metadata sidecar files This adds --vfs-metadata-extension which can be used to expose sidecar files with file metadata in. These files don't exist in the listings until they are accessed. --- vfs/dir.go | 66 ++++++++++++++++++++++++++++++++++++- vfs/dir_test.go | 70 ++++++++++++++++++++++++++++++++++++++++ vfs/vfs.go | 12 +++++++ vfs/vfs.md | 40 +++++++++++++++++++++++ vfs/vfs_test.go | 14 ++++++++ vfs/vfscommon/options.go | 6 ++++ 6 files changed, 207 insertions(+), 1 deletion(-) diff --git a/vfs/dir.go b/vfs/dir.go index d54e01e92..12d9591a0 100644 --- a/vfs/dir.go +++ b/vfs/dir.go @@ -2,6 +2,7 @@ package vfs import ( "context" + "encoding/json" "fmt" "os" "path" @@ -15,6 +16,7 @@ import ( "github.com/rclone/rclone/fs/dirtree" "github.com/rclone/rclone/fs/list" "github.com/rclone/rclone/fs/log" + "github.com/rclone/rclone/fs/object" "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/vfs/vfscommon" @@ -817,6 +819,51 @@ func (d *Dir) readDir() error { return d._readDir() } +// jsonErrorf formats the string according to a format specifier and +// returns the resulting string as a JSON blob with key "error" +func jsonErrorf(format string, a ...any) []byte { + errMsg := fmt.Sprintf(format, a...) + jsonBlob, _ := json.MarshalIndent(map[string]string{"error": errMsg}, "", "\t") + return jsonBlob +} + +// stat a single metadata item in the directory +// +// Returns true if it is a metadata name +func (d *Dir) statMetadata(leaf, baseLeaf string) (metaNode Node, err error) { + // Find original file - note that this is recursing into stat() + node, err := d.stat(baseLeaf) + if err != nil { + return node, err + } + // Read the metadata from the original entry into a JSON dump + entry := node.DirEntry() + var metadataDump []byte + if entry != nil { + metadata, err := fs.GetMetadata(context.TODO(), entry) + if err != nil { + metadataDump = jsonErrorf("failed to read metadata: %v", err) + } else if metadata == nil { + metadataDump = []byte("{}") // no metadata to read + } else { + metadataDump, err = json.MarshalIndent(metadata, "", "\t") + if err != nil { + metadataDump = jsonErrorf("failed to write metadata: %v", err) + } + } + } else { + metadataDump = []byte("{}") // no metadata yet when an object is being written + } + // Make a memory based file with metadataDump in + remote := path.Join(d.path, leaf) + o := object.NewMemoryObject(remote, entry.ModTime(context.TODO()), metadataDump) + f := newFile(d, d.path, o, leaf) + // Base the metadata inode number off the real file inode number + // to keep it constant + f.inode = node.Inode() ^ (1 << 63) + return f, nil +} + // stat a single item in the directory // // returns ENOENT if not found. @@ -824,22 +871,38 @@ func (d *Dir) readDir() error { // contains files with names that differ only by case. func (d *Dir) stat(leaf string) (Node, error) { d.mu.Lock() - defer d.mu.Unlock() err := d._readDir() if err != nil { + d.mu.Unlock() return nil, err } item, ok := d.items[leaf] + d.mu.Unlock() + + // Look for a metadata file + if !ok { + if baseLeaf, found := d.vfs.isMetadataFile(leaf); found { + node, err := d.statMetadata(leaf, baseLeaf) + if err != nil { + return nil, err + } + // Add metadata file to directory as virtual object + d.addObject(node) + return node, nil + } + } ci := fs.GetConfig(context.TODO()) normUnicode := !ci.NoUnicodeNormalization normCase := ci.IgnoreCaseSync || d.vfs.Opt.CaseInsensitive if !ok && (normUnicode || normCase) { leafNormalized := operations.ToNormal(leaf, normUnicode, normCase) // this handles both case and unicode normalization + d.mu.Lock() for name, node := range d.items { if operations.ToNormal(name, normUnicode, normCase) == leafNormalized { if ok { // duplicate normalized match is an error + d.mu.Unlock() return nil, fmt.Errorf("duplicate filename %q detected with case/unicode normalization settings", leaf) } // found a normalized match @@ -847,6 +910,7 @@ func (d *Dir) stat(leaf string) (Node, error) { item = node } } + d.mu.Unlock() } if !ok { diff --git a/vfs/dir_test.go b/vfs/dir_test.go index 7bcbba0f6..8205b33da 100644 --- a/vfs/dir_test.go +++ b/vfs/dir_test.go @@ -2,6 +2,7 @@ package vfs import ( "context" + "encoding/json" "fmt" "os" "runtime" @@ -692,3 +693,72 @@ func TestDirEntryModTimeInvalidation(t *testing.T) { t.Error("ModTime not invalidated") } } + +func TestDirMetadataExtension(t *testing.T) { + r, vfs, dir, _ := dirCreate(t) + root, err := vfs.Root() + require.NoError(t, err) + features := r.Fremote.Features() + + checkListing(t, dir, []string{"file1,14,false"}) + checkListing(t, root, []string{"dir,0,true"}) + + node, err := vfs.Stat("dir/file1") + require.NoError(t, err) + require.True(t, node.IsFile()) + + node, err = vfs.Stat("dir") + require.NoError(t, err) + require.True(t, node.IsDir()) + + // Check metadata files do not exist + _, err = vfs.Stat("dir/file1.metadata") + require.Error(t, err, ENOENT) + _, err = vfs.Stat("dir.metadata") + require.Error(t, err, ENOENT) + + // Configure metadata extension + vfs.Opt.MetadataExtension = ".metadata" + + // Check metadata for file does exist + node, err = vfs.Stat("dir/file1.metadata") + require.NoError(t, err) + require.True(t, node.IsFile()) + size := node.Size() + assert.Greater(t, size, int64(1)) + modTime := node.ModTime() + + // ...and is now in the listing + checkListing(t, dir, []string{"file1,14,false", fmt.Sprintf("file1.metadata,%d,false", size)}) + + // ...and is a JSON blob with correct "mtime" key + blob, err := vfs.ReadFile("dir/file1.metadata") + require.NoError(t, err) + var metadata map[string]string + err = json.Unmarshal(blob, &metadata) + require.NoError(t, err) + if features.ReadMetadata { + assert.Equal(t, modTime.Format(time.RFC3339Nano), metadata["mtime"]) + } + + // Check metadata for dir does exist + node, err = vfs.Stat("dir.metadata") + require.NoError(t, err) + require.True(t, node.IsFile()) + size = node.Size() + assert.Greater(t, size, int64(1)) + modTime = node.ModTime() + + // ...and is now in the listing + checkListing(t, root, []string{"dir,0,true", fmt.Sprintf("dir.metadata,%d,false", size)}) + + // ...and is a JSON blob with correct "mtime" key + blob, err = vfs.ReadFile("dir.metadata") + require.NoError(t, err) + clear(metadata) + err = json.Unmarshal(blob, &metadata) + require.NoError(t, err) + if features.ReadDirMetadata { + assert.Equal(t, modTime.Format(time.RFC3339Nano), metadata["mtime"]) + } +} diff --git a/vfs/vfs.go b/vfs/vfs.go index 01124ecc7..ecde90b19 100644 --- a/vfs/vfs.go +++ b/vfs/vfs.go @@ -900,3 +900,15 @@ func (vfs *VFS) Symlink(oldname, newname string) error { _, err := vfs.CreateSymlink(oldname, newname) return err } + +// Return true if name represents a metadata file +// +// It returns the underlying path +func (vfs *VFS) isMetadataFile(name string) (rawName string, found bool) { + ext := vfs.Opt.MetadataExtension + if ext == "" { + return name, false + } + rawName, found = strings.CutSuffix(name, ext) + return rawName, found +} diff --git a/vfs/vfs.md b/vfs/vfs.md index 3d0d5622c..0d742277d 100644 --- a/vfs/vfs.md +++ b/vfs/vfs.md @@ -423,3 +423,43 @@ and compute the total used space itself. _WARNING._ Contrary to `rclone size`, this flag ignores filters so that the result is accurate. However, this is very inefficient and may cost lots of API calls resulting in extra charges. Use it as a last resort and only with caching. + +### VFS Metadata + +If you use the `--vfs-metadata-extension` flag you can get the VFS to +expose files which contain the [metadata](/docs/#metadata) as a JSON +blob. These files will not appear in the directory listing, but can be +`stat`-ed and opened and once they have been they **will** appear in +directory listings until the directory cache expires. + +Note that some backends won't create metadata unless you pass in the +`--metadata` flag. + +For example, using `rclone mount` with `--metadata --vfs-metadata-extension .metadata` +we get + +``` +$ ls -l /mnt/ +total 1048577 +-rw-rw-r-- 1 user user 1073741824 Mar 3 16:03 1G + +$ cat /mnt/1G.metadata +{ + "atime": "2025-03-04T17:34:22.317069787Z", + "btime": "2025-03-03T16:03:37.708253808Z", + "gid": "1000", + "mode": "100664", + "mtime": "2025-03-03T16:03:39.640238323Z", + "uid": "1000" +} + +$ ls -l /mnt/ +total 1048578 +-rw-rw-r-- 1 user user 1073741824 Mar 3 16:03 1G +-rw-rw-r-- 1 user user 185 Mar 3 16:03 1G.metadata +``` + +If the file has no metadata it will be returned as `{}` and if there +is an error reading the metadata the error will be returned as +`{"error":"error string"}`. + diff --git a/vfs/vfs_test.go b/vfs/vfs_test.go index 2bd35ee1d..f175e0efc 100644 --- a/vfs/vfs_test.go +++ b/vfs/vfs_test.go @@ -487,3 +487,17 @@ func TestFillInMissingSizes(t *testing.T) { }) } } + +func TestVFSIsMetadataFile(t *testing.T) { + _, vfs := newTestVFS(t) + + rawName, found := vfs.isMetadataFile("leaf.metadata") + assert.Equal(t, "leaf.metadata", rawName) + assert.Equal(t, false, found) + + vfs.Opt.MetadataExtension = ".metadata" + + rawName, found = vfs.isMetadataFile("leaf.metadata") + assert.Equal(t, "leaf", rawName) + assert.Equal(t, true, found) +} diff --git a/vfs/vfscommon/options.go b/vfs/vfscommon/options.go index 650aedd97..7f222edac 100644 --- a/vfs/vfscommon/options.go +++ b/vfs/vfscommon/options.go @@ -165,6 +165,11 @@ var OptionsInfo = fs.Options{{ Default: getGID(), Help: "Override the gid field set by the filesystem (not supported on Windows)", Groups: "VFS", +}, { + Name: "vfs_metadata_extension", + Default: "", + Help: "Set the extension to read metadata from.", + Groups: "VFS", }} func init() { @@ -204,6 +209,7 @@ type Options struct { UsedIsSize bool `config:"vfs_used_is_size"` // if true, use the `rclone size` algorithm for Used size FastFingerprint bool `config:"vfs_fast_fingerprint"` // if set use fast fingerprints DiskSpaceTotalSize fs.SizeSuffix `config:"vfs_disk_space_total_size"` + MetadataExtension string `config:"vfs_metadata_extension"` // if set respond to files with this extension with metadata } // Opt is the default options modified by the environment variables and command line flags