rclone/vfs/vfs_case_test.go
nielash fd8faeb0e6 vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization,
which caused problems particularly for users of macOS. While attempts were made
to handle it with various `-o modules=iconv` combinations, this was an imperfect
solution, as no one combination allowed both NFC and NFD content to
simultaneously be both visible and editable via Finder.

After this change, the VFS supports `--no-unicode-normalization` (default `false`)
via the existing `--vfs-case-insensitive` logic, which is extended to apply to both
case insensitivity and unicode normalization form.

This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a
probably rare but potentially possible scenario where a directory contains
multiple duplicate filenames after applying case and unicode normalization
settings. In such a scenario, this flag (disabled by default) hides the
duplicates. This comes with a performance tradeoff, as rclone will have to scan
the entire directory for duplicates when listing a directory. For this reason,
it is recommended to leave this disabled if not needed. However, macOS users may
wish to consider using it, as otherwise, if a remote directory contains both NFC
and NFD versions of the same filename, an odd situation will occur: both
versions of the file will be visible in the mount, and both will appear to be
editable, however, editing either version will actually result in only the NFD
version getting edited under the hood. `--vfs-block-norm-dupes` prevents this
confusion by detecting this scenario, hiding the duplicates, and logging an
error, similar to how this is handled in `rclone sync`.
2024-03-06 16:12:13 +00:00

198 lines
5.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package vfs
import (
"context"
"os"
"testing"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/vfs/vfscommon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/text/unicode/norm"
)
func TestCaseSensitivity(t *testing.T) {
r := fstest.NewRun(t)
if r.Fremote.Features().CaseInsensitive {
t.Skip("Can't test case sensitivity - this remote is officially not case-sensitive")
}
// Create test files
ctx := context.Background()
file1 := r.WriteObject(ctx, "FiLeA", "data1", t1)
file2 := r.WriteObject(ctx, "FiLeB", "data2", t2)
r.CheckRemoteItems(t, file1, file2)
// Create file3 with name differing from file2 name only by case.
// On a case-Sensitive remote this will be a separate file.
// On a case-INsensitive remote this file will either not exist
// or overwrite file2 depending on how file system diverges.
// On a box.com remote this step will even fail.
file3 := r.WriteObject(ctx, "FilEb", "data3", t3)
// Create a case-Sensitive and case-INsensitive VFS
optCS := vfscommon.DefaultOpt
optCS.CaseInsensitive = false
vfsCS := New(r.Fremote, &optCS)
defer cleanupVFS(t, vfsCS)
optCI := vfscommon.DefaultOpt
optCI.CaseInsensitive = true
vfsCI := New(r.Fremote, &optCI)
defer cleanupVFS(t, vfsCI)
// Run basic checks that must pass on VFS of any type.
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
// Detect case sensitivity of the underlying remote.
remoteIsOK := true
if !checkFileDataVFS(t, vfsCS, "FiLeA", "data1") {
remoteIsOK = false
}
if !checkFileDataVFS(t, vfsCS, "FiLeB", "data2") {
remoteIsOK = false
}
if !checkFileDataVFS(t, vfsCS, "FilEb", "data3") {
remoteIsOK = false
}
// The remaining test is only meaningful on a case-Sensitive file system.
if !remoteIsOK {
t.Skip("Can't test case sensitivity - this remote doesn't comply as case-sensitive")
}
// Continue with test as the underlying remote is fully case-Sensitive.
r.CheckRemoteItems(t, file1, file2, file3)
// See how VFS handles case-INsensitive flag
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
assertFileDataVFS(t, vfsCI, "fileA", "data1")
assertFileDataVFS(t, vfsCI, "filea", "data1")
assertFileDataVFS(t, vfsCI, "FILEA", "data1")
assertFileDataVFS(t, vfsCI, "FiLeB", "data2")
assertFileDataVFS(t, vfsCI, "FilEb", "data3")
fd, err := vfsCI.OpenFile("fileb", os.O_RDONLY, 0777)
assert.Nil(t, fd)
assert.Error(t, err)
assert.NotEqual(t, err, ENOENT)
fd, err = vfsCI.OpenFile("FILEB", os.O_RDONLY, 0777)
assert.Nil(t, fd)
assert.Error(t, err)
assert.NotEqual(t, err, ENOENT)
// Run the same set of checks with case-Sensitive VFS, for comparison.
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
assertFileAbsentVFS(t, vfsCS, "fileA")
assertFileAbsentVFS(t, vfsCS, "filea")
assertFileAbsentVFS(t, vfsCS, "FILEA")
assertFileDataVFS(t, vfsCS, "FiLeB", "data2")
assertFileDataVFS(t, vfsCS, "FilEb", "data3")
assertFileAbsentVFS(t, vfsCS, "fileb")
assertFileAbsentVFS(t, vfsCS, "FILEB")
}
func checkFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) bool {
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
if fd == nil || err != nil {
return false
}
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
_ = fd.Close()
}()
fh, ok := fd.(*ReadFileHandle)
if !ok {
return false
}
size := len(expect)
buf := make([]byte, size)
num, err := fh.Read(buf)
if err != nil || num != size {
return false
}
return string(buf) == expect
}
func assertFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) {
fd, errOpen := vfs.OpenFile(name, os.O_RDONLY, 0777)
assert.NotNil(t, fd)
assert.NoError(t, errOpen)
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
if errOpen == nil && fd != nil {
_ = fd.Close()
}
}()
fh, ok := fd.(*ReadFileHandle)
require.True(t, ok)
size := len(expect)
buf := make([]byte, size)
numRead, errRead := fh.Read(buf)
assert.NoError(t, errRead)
assert.Equal(t, numRead, size)
assert.Equal(t, string(buf), expect)
}
func assertFileAbsentVFS(t *testing.T, vfs *VFS, name string) {
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
if err == nil && fd != nil {
_ = fd.Close()
}
}()
assert.Nil(t, fd)
assert.Error(t, err)
assert.Equal(t, err, ENOENT)
}
func TestUnicodeNormalization(t *testing.T) {
r := fstest.NewRun(t)
var (
nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ"))
nfd = norm.NFD.String(nfc)
both = "normal name with no special characters.txt"
)
// Create test files
ctx := context.Background()
file1 := r.WriteObject(ctx, both, "data1", t1)
file2 := r.WriteObject(ctx, nfc, "data2", t2)
r.CheckRemoteItems(t, file1, file2)
// Create VFS
opt := vfscommon.DefaultOpt
vfs := New(r.Fremote, &opt)
defer cleanupVFS(t, vfs)
// assert that both files are found under NFD-normalized names
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
assertFileDataVFS(t, vfs, nfd, "data2")
// change ci.NoUnicodeNormalization to true and verify that only file1 is found
ci := fs.GetConfig(ctx) // need to set the global config here as the *Dir methods don't take a ctx param
oldVal := ci.NoUnicodeNormalization
defer func() { fs.GetConfig(ctx).NoUnicodeNormalization = oldVal }() // restore the prior value after the test
ci.NoUnicodeNormalization = true
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
assertFileAbsentVFS(t, vfs, nfd)
}