From 5173ca0454556cd75c9ccc26a37c6be9d7949999 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 3 Jun 2025 09:47:03 +0100 Subject: [PATCH] march: fix syncing with a duplicate file and directory As part of the out of memory syncing code, in this commit 0148bd46688c1d1a march: Implement callback based syncing we changed the syncing method to use a sorted stream of directory entries. Unfortunately as part of this change the sort order of files and directories became undefined. This meant that if there existed both a file `foo` and a directory `foo` in the same directory (as is common on object storage systems) then these could be matched up incorrectly. They could be matched up correctly like this - `foo` (directory) - `foo` (directory) - `foo` (file) - `foo` (file) Or incorrectly like this (one of many possibilities) - no match - `foo` (file) - `foo` (directory) - `foo` (directory) - `foo` (file) - no match Just depending on how the input listings were ordered. This in turn made container based syncing with a duplicated file and directory name erratic, deleting files when it shouldn't. This patch ensures that directories always sync before files by adding a suffix to the sort key depending on whether the entry was a file or directory. --- fs/march/march.go | 7 +++++++ fs/march/march_test.go | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/march/march.go b/fs/march/march.go index a879871af..578aea512 100644 --- a/fs/march/march.go +++ b/fs/march/march.go @@ -89,6 +89,13 @@ func (m *March) key(entry fs.DirEntry) string { for _, transform := range m.transforms { name = transform(name) } + // Suffix entries to make identically named files and + // directories sort consistently with directories first. + if _, isDirectory := entry.(fs.Directory); isDirectory { + name += "D" + } else { + name += "F" + } return name } diff --git a/fs/march/march_test.go b/fs/march/march_test.go index 2d1d03e74..ad71e6dd3 100644 --- a/fs/march/march_test.go +++ b/fs/march/march_test.go @@ -477,6 +477,17 @@ func TestMatchListings(t *testing.T) { {dirA, dirA}, }, }, + { + what: "Sync with duplicate files and dirs", + input: fs.DirEntries{ + dirA, A, + A, dirA, + }, + matches: []matchPair{ + {dirA, dirA}, + {A, A}, + }, + }, } { t.Run(fmt.Sprintf("TestMatchListings-%s", test.what), func(t *testing.T) { ctx := context.Background()