From b8591b230dbabc24e8089776ec953fcc14f464d3 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 19 Sep 2023 12:09:51 +0100 Subject: [PATCH] onedrive: implement ListR method which gives --fast-list support This implents ListR for onedrive. The API only allows doing this at the root so it is inefficient to use it not at the root. Fixes #7317 --- backend/onedrive/api/types.go | 10 ++ backend/onedrive/onedrive.go | 235 ++++++++++++++++++++++++++-------- docs/content/onedrive.md | 23 ++++ docs/content/overview.md | 2 +- 4 files changed, 218 insertions(+), 52 deletions(-) diff --git a/backend/onedrive/api/types.go b/backend/onedrive/api/types.go index 5649c9ff7..c8e7a5fd1 100644 --- a/backend/onedrive/api/types.go +++ b/backend/onedrive/api/types.go @@ -99,6 +99,16 @@ type ItemReference struct { DriveType string `json:"driveType"` // Type of the drive, Read-Only } +// GetID returns a normalized ID of the item +// If DriveID is known it will be prefixed to the ID with # separator +// Can be parsed using onedrive.parseNormalizedID(normalizedID) +func (i *ItemReference) GetID() string { + if !strings.Contains(i.ID, "#") { + return i.DriveID + "#" + i.ID + } + return i.ID +} + // RemoteItemFacet groups data needed to reference a OneDrive remote item type RemoteItemFacet struct { ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only. diff --git a/backend/onedrive/onedrive.go b/backend/onedrive/onedrive.go index 101295371..7a90e28ad 100644 --- a/backend/onedrive/onedrive.go +++ b/backend/onedrive/onedrive.go @@ -1095,32 +1095,29 @@ func (f *Fs) CreateDir(ctx context.Context, dirID, leaf string) (newID string, e // If directories is set it only sends directories // User function to process a File item from listAll // -// Should return true to finish processing -type listAllFn func(*api.Item) bool +// If an error is returned then processing stops +type listAllFn func(*api.Item) error // Lists the directory required calling the user function on each item found // // If the user fn ever returns true then it early exits with found = true -func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (found bool, err error) { - // Top parameter asks for bigger pages of data - // https://dev.onedrive.com/odata/optional-query-parameters.htm - opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk)) -OUTER: +// +// This listing function works on both normal listings and delta listings +func (f *Fs) _listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn, opts *rest.Opts, result any, pValue *[]api.Item, pNextLink *string) (err error) { for { - var result api.ListChildrenResponse var resp *http.Response err = f.pacer.Call(func() (bool, error) { - resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + resp, err = f.srv.CallJSON(ctx, opts, nil, result) return shouldRetry(ctx, resp, err) }) if err != nil { - return found, fmt.Errorf("couldn't list files: %w", err) + return fmt.Errorf("couldn't list files: %w", err) } - if len(result.Value) == 0 { + if len(*pValue) == 0 { break } - for i := range result.Value { - item := &result.Value[i] + for i := range *pValue { + item := &(*pValue)[i] isFolder := item.GetFolder() != nil if isFolder { if filesOnly { @@ -1135,18 +1132,60 @@ OUTER: continue } item.Name = f.opt.Enc.ToStandardName(item.GetName()) - if fn(item) { - found = true - break OUTER + err = fn(item) + if err != nil { + return err } } - if result.NextLink == "" { + if *pNextLink == "" { break } opts.Path = "" - opts.RootURL = result.NextLink + opts.Parameters = nil + opts.RootURL = *pNextLink + // reset results + *pNextLink = "" + *pValue = nil } - return + return nil +} + +// Lists the directory required calling the user function on each item found +// +// If the user fn ever returns true then it early exits with found = true +func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (err error) { + // Top parameter asks for bigger pages of data + // https://dev.onedrive.com/odata/optional-query-parameters.htm + opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk)) + var result api.ListChildrenResponse + return f._listAll(ctx, dirID, directoriesOnly, filesOnly, fn, &opts, &result, &result.Value, &result.NextLink) +} + +// Convert a list item into a DirEntry +// +// Can return nil for an item which should be skipped +func (f *Fs) itemToDirEntry(ctx context.Context, dir string, info *api.Item) (entry fs.DirEntry, err error) { + if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote { + fs.Debugf(info.Name, "OneNote file not shown in directory listing") + return nil, nil + } + remote := path.Join(dir, info.GetName()) + folder := info.GetFolder() + if folder != nil { + // cache the directory ID for later lookups + id := info.GetID() + f.dirCache.Put(remote, id) + d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id) + d.SetItems(folder.ChildCount) + entry = d + } else { + o, err := f.newObjectWithInfo(ctx, remote, info) + if err != nil { + return nil, err + } + entry = o + } + return entry, nil } // List the objects and directories in dir into entries. The @@ -1163,41 +1202,137 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e if err != nil { return nil, err } - var iErr error - _, err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) bool { - if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote { - fs.Debugf(info.Name, "OneNote file not shown in directory listing") - return false + err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) error { + entry, err := f.itemToDirEntry(ctx, dir, info) + if err == nil { + entries = append(entries, entry) } - - remote := path.Join(dir, info.GetName()) - folder := info.GetFolder() - if folder != nil { - // cache the directory ID for later lookups - id := info.GetID() - f.dirCache.Put(remote, id) - d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id) - d.SetItems(folder.ChildCount) - entries = append(entries, d) - } else { - o, err := f.newObjectWithInfo(ctx, remote, info) - if err != nil { - iErr = err - return true - } - entries = append(entries, o) - } - return false + return err }) if err != nil { return nil, err } - if iErr != nil { - return nil, iErr - } return entries, nil } +// ListR lists the objects and directories of the Fs starting +// from dir recursively into out. +// +// dir should be "" to start from the root, and should not +// have trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +// +// It should call callback for each tranche of entries read. +// These need not be returned in any particular order. If +// callback returns an error then the listing will stop +// immediately. +// +// Don't implement this unless you have a more efficient way +// of listing recursively than doing a directory traversal. +func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) { + // Make sure this ID is in the directory cache + directoryID, err := f.dirCache.FindDir(ctx, dir, false) + if err != nil { + return err + } + + // ListR only works at the root of a onedrive, not on a folder + // So we have to filter things outside of the root which is + // inefficient. + + list := walk.NewListRHelper(callback) + + // list a folder conventionally - used for shared folders + var listFolder func(dir string) error + listFolder = func(dir string) error { + entries, err := f.List(ctx, dir) + if err != nil { + return err + } + for _, entry := range entries { + err = list.Add(entry) + if err != nil { + return err + } + if _, isDir := entry.(fs.Directory); isDir { + err = listFolder(entry.Remote()) + if err != nil { + return err + } + } + } + return nil + } + + // This code relies on the fact that directories are sent before their children. This isn't + // mentioned in the docs though, so maybe it shouldn't be relied on. + seen := map[string]struct{}{} + fn := func(info *api.Item) error { + var parentPath string + var ok bool + id := info.GetID() + // The API can produce duplicates, so skip them + if _, found := seen[id]; found { + return nil + } + seen[id] = struct{}{} + // Skip the root directory + if id == directoryID { + return nil + } + // Skip deleted items + if info.Deleted != nil { + return nil + } + dirID := info.GetParentReference().GetID() + // Skip files that don't have their parent directory + // cached as they are outside the root. + parentPath, ok = f.dirCache.GetInv(dirID) + if !ok { + return nil + } + // Skip files not under the root directory + remote := path.Join(parentPath, info.GetName()) + if dir != "" && !strings.HasPrefix(remote, dir+"/") { + return nil + } + entry, err := f.itemToDirEntry(ctx, parentPath, info) + if err != nil { + return err + } + err = list.Add(entry) + if err != nil { + return err + } + // If this is a shared folder, we'll need list it too + if info.RemoteItem != nil && info.RemoteItem.Folder != nil { + fs.Debugf(remote, "Listing shared directory") + return listFolder(remote) + } + return nil + } + + opts := rest.Opts{ + Method: "GET", + Path: "/root/delta", + Parameters: map[string][]string{ + // "token": {token}, + "$top": {fmt.Sprintf("%d", f.opt.ListChunk)}, + }, + } + + var result api.DeltaResponse + err = f._listAll(ctx, "", false, false, fn, &opts, &result, &result.Value, &result.NextLink) + if err != nil { + return err + } + + return list.Flush() + +} + // Creates from the parameters passed in a half finished Object which // must have setMetaData called on it // @@ -1266,15 +1401,12 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) error { } if check { // check to see if there are any items - found, err := f.listAll(ctx, rootID, false, false, func(item *api.Item) bool { - return true + err := f.listAll(ctx, rootID, false, false, func(item *api.Item) error { + return fs.ErrorDirectoryNotEmpty }) if err != nil { return err } - if found { - return fs.ErrorDirectoryNotEmpty - } } err = f.deleteObject(ctx, rootID) if err != nil { @@ -2578,6 +2710,7 @@ var ( _ fs.Abouter = (*Fs)(nil) _ fs.PublicLinker = (*Fs)(nil) _ fs.CleanUpper = (*Fs)(nil) + _ fs.ListRer = (*Fs)(nil) _ fs.Object = (*Object)(nil) _ fs.MimeTyper = &Object{} _ fs.IDer = &Object{} diff --git a/docs/content/onedrive.md b/docs/content/onedrive.md index aff4cd426..cf7ca5175 100644 --- a/docs/content/onedrive.md +++ b/docs/content/onedrive.md @@ -183,6 +183,29 @@ your workflow. For all types of OneDrive you can use the `--checksum` flag. +### --fast-list + +This remote supports `--fast-list` which allows you to use fewer +transactions in exchange for more memory. See the [rclone +docs](/docs/#fast-list) for more details. + +It does this by using the delta listing facilities of OneDrive which +returns all the files in the remote very efficiently. This is much +more efficient than listing directories recursively and is Microsoft's +recommended way of reading all the file information from a drive. + +This can be useful with `rclone mount` and [rclone rc vfs/refresh +recursive=true](/rc/#vfs-refresh)) to very quickly fill the mount with +information about all the files. + +The API used for the recursive listing (`ListR`) only supports listing +from the root of the drive. This will become increasingly inefficient +the further away you get from the root as rclone will have to discard +files outside of the directory you are using. + +Some commands (like `rclone lsf -R`) will use `ListR` by default - you +can turn this off with `--disable ListR` if you need to. + ### Restricted filename characters In addition to the [default restricted characters set](/overview/#restricted-characters) diff --git a/docs/content/overview.md b/docs/content/overview.md index 37e10c589..a17c74d3b 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -492,7 +492,7 @@ upon backend-specific capabilities. | Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | | Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No | | Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No | -| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | +| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | | OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes | | OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No | | Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No |