rclone/cmd/bisync/march.go
nielash 9cf783677e bisync: support files with unknown length, including Google Docs - fixes #5696
Before this change, bisync intentionally ignored Google Docs (albeit in a
buggy way that caused problems during --resync.) After this change, Google Docs
(including Google Sheets, Slides, etc.) are now supported in bisync, subject to
the same options, defaults, and limitations as in `rclone sync`. When bisyncing
drive with non-drive backends, the drive -> non-drive direction is controlled
by `--drive-export-formats` (default `"docx,xlsx,pptx,svg"`) and the non-drive
-> drive direction is controlled by `--drive-import-formats` (default none.)

For example, with the default export/import formats, a Google Sheet on the
drive side will be synced to an `.xlsx` file on the non-drive side. In the
reverse direction, `.xlsx` files with filenames that match an existing Google
Sheet will be synced to that Google Sheet, while `.xlsx` files that do NOT
match an existing Google Sheet will be copied to drive as normal `.xlsx` files
(without conversion to Sheets, although the Google Drive web browser UI may
still give you the option to open it as one.)

If `--drive-import-formats` is set (it's not, by default), then all of the
specified formats will be converted to Google Docs, if there is no existing
Google Doc with a matching name. Caution: such conversion can be quite lossy,
and in most cases it's probably not what you want!

To bisync Google Docs as URL shortcut links (in a manner similar to "Drive for
Desktop"), use: `--drive-export-formats url` (or alternatives.)

Note that these link files cannot be edited on the non-drive side -- you will
get errors if you try to sync an edited link file back to drive. They CAN be
deleted (it will result in deleting the corresponding Google Doc.) If you
create a `.url` file on the non-drive side that does not match an existing
Google Doc, bisyncing it will just result in copying the literal `.url` file
over to drive (no Google Doc will be created.) So, as a general rule of thumb,
think of them as read-only placeholders on the non-drive side, and make all
your changes on the drive side.

Likewise, even with other export-formats, it is best to only move/rename Google
Docs on the drive side. This is because otherwise, bisync will interpret this
as a file deleted and another created, and accordingly, it will delete the
Google Doc and create a new file at the new path. (Whether or not that new file
is a Google Doc depends on `--drive-import-formats`.)

Lastly, take note that all Google Docs on the drive side have a size of `-1`
and no checksum. Therefore, they cannot be reliably synced with the
`--checksum` or `--size-only` flags. (To be exact: they will still get
created/deleted, and bisync's delta engine will notice changes and queue them
for syncing, but the underlying sync function will consider them identical and
skip them.) To work around this, use the default (modtime and size) instead of
`--checksum` or `--size-only`.

To ignore Google Docs entirely, use `--drive-skip-gdocs`.

Nearly all of the Google Docs logic is outsourced to the Drive backend, so
future changes should also be supported by bisync.
2024-01-20 14:50:08 -05:00

225 lines
5.2 KiB
Go

package bisync
import (
"context"
"sync"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/march"
)
var ls1 = newFileList()
var ls2 = newFileList()
var err error
var firstErr error
var marchAliasLock sync.Mutex
var marchLsLock sync.Mutex
var marchErrLock sync.Mutex
var marchCtx context.Context
func (b *bisyncRun) makeMarchListing(ctx context.Context) (*fileList, *fileList, error) {
ci := fs.GetConfig(ctx)
marchCtx = ctx
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctx,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctx)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
// save files
err = ls1.save(ctx, b.newListing1)
if err != nil {
b.abort = true
}
err = ls2.save(ctx, b.newListing2)
if err != nil {
b.abort = true
}
return ls1, ls2, err
}
// SrcOnly have an object which is on path1 only
func (b *bisyncRun) SrcOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path1 only")
b.parse(o, true)
return isDir(o)
}
// DstOnly have an object which is on path2 only
func (b *bisyncRun) DstOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path2 only")
b.parse(o, false)
return isDir(o)
}
// Match is called when object exists on both path1 and path2 (whether equal or not)
func (b *bisyncRun) Match(ctx context.Context, o2, o1 fs.DirEntry) (recurse bool) {
fs.Debugf(o1, "both path1 and path2")
marchAliasLock.Lock()
b.aliases.Add(o1.Remote(), o2.Remote())
marchAliasLock.Unlock()
b.parse(o1, true)
b.parse(o2, false)
return isDir(o1)
}
func isDir(e fs.DirEntry) bool {
switch x := e.(type) {
case fs.Object:
fs.Debugf(x, "is Object")
return false
case fs.Directory:
fs.Debugf(x, "is Dir")
return true
default:
fs.Debugf(e, "is unknown")
}
return false
}
func (b *bisyncRun) parse(e fs.DirEntry, isPath1 bool) {
switch x := e.(type) {
case fs.Object:
b.ForObject(x, isPath1)
case fs.Directory:
if b.opt.CreateEmptySrcDirs {
b.ForDir(x, isPath1)
}
default:
fs.Debugf(e, "is unknown")
}
}
func (b *bisyncRun) setupListing() {
ls1 = newFileList()
ls2 = newFileList()
hashType1 := hash.None
hashType2 := hash.None
if !b.opt.IgnoreListingChecksum {
// Currently bisync just honors --ignore-listing-checksum
// (note that this is different from --ignore-checksum)
// TODO add full support for checksums and related flags
hashType1 = b.fs1.Hashes().GetOne()
hashType2 = b.fs2.Hashes().GetOne()
}
ls1.hash = hashType1
ls2.hash = hashType2
}
func (b *bisyncRun) ForObject(o fs.Object, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing file - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
var (
hashVal string
hashErr error
)
ls := whichLs(isPath1)
hashType := ls.hash
if hashType != hash.None {
hashVal, hashErr = o.Hash(marchCtx, hashType)
marchErrLock.Lock()
if firstErr == nil {
firstErr = hashErr
}
marchErrLock.Unlock()
}
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "-" // "-" for a file and "d" for a directory
marchLsLock.Lock()
ls.put(o.Remote(), o.Size(), time, hashVal, id, flags)
marchLsLock.Unlock()
}
func (b *bisyncRun) ForDir(o fs.Directory, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing dir - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
ls := whichLs(isPath1)
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "d" // "-" for a file and "d" for a directory
marchLsLock.Lock()
ls.put(o.Remote(), -1, time, "", id, flags)
marchLsLock.Unlock()
}
func whichLs(isPath1 bool) *fileList {
ls := ls1
if !isPath1 {
ls = ls2
}
return ls
}
func whichPath(isPath1 bool) string {
s := "Path1"
if !isPath1 {
s = "Path2"
}
return s
}
func (b *bisyncRun) findCheckFiles(ctx context.Context) (*fileList, *fileList, error) {
ctxCheckFile, filterCheckFile := filter.AddConfig(ctx)
b.handleErr(b.opt.CheckFilename, "error adding CheckFilename to filter", filterCheckFile.Add(true, b.opt.CheckFilename), true, true)
b.handleErr(b.opt.CheckFilename, "error adding ** exclusion to filter", filterCheckFile.Add(false, "**"), true, true)
ci := fs.GetConfig(ctxCheckFile)
marchCtx = ctxCheckFile
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctxCheckFile,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctxCheckFile)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
return ls1, ls2, err
}