diff --git a/lib/transform/gen_help.go b/lib/transform/gen_help.go index 292c51d4e..f4ef769af 100644 --- a/lib/transform/gen_help.go +++ b/lib/transform/gen_help.go @@ -37,6 +37,9 @@ var commandList = []commands{ {command: "--name-transform replace=old:new", description: "Replaces occurrences of old with new in the file name."}, {command: "--name-transform date={YYYYMMDD}", description: "Appends or prefixes the specified date format."}, {command: "--name-transform truncate=N", description: "Truncates the file name to a maximum of N characters."}, + {command: "--name-transform truncate_keep_extension=N", description: "Truncates the file name to a maximum of N characters while preserving the original file extension."}, + {command: "--name-transform truncate_bytes=N", description: "Truncates the file name to a maximum of N bytes (not characters)."}, + {command: "--name-transform truncate_bytes_keep_extension=N", description: "Truncates the file name to a maximum of N bytes (not characters) while preserving the original file extension."}, {command: "--name-transform base64encode", description: "Encodes the file name in Base64."}, {command: "--name-transform base64decode", description: "Decodes a Base64-encoded file name."}, {command: "--name-transform encoder=ENCODING", description: "Converts the file name to the specified encoding (e.g., ISO-8859-1, Windows-1252, Macintosh)."}, diff --git a/lib/transform/options.go b/lib/transform/options.go index 3c02b7318..47024c740 100644 --- a/lib/transform/options.go +++ b/lib/transform/options.go @@ -159,6 +159,12 @@ func (t *transform) requiresValue() bool { return true case ConvTruncate: return true + case ConvTruncateKeepExtension: + return true + case ConvTruncateBytes: + return true + case ConvTruncateBytesKeepExtension: + return true case ConvEncoder: return true case ConvDecoder: @@ -190,6 +196,9 @@ const ( ConvIndex ConvDate ConvTruncate + ConvTruncateKeepExtension + ConvTruncateBytes + ConvTruncateBytesKeepExtension ConvBase64Encode ConvBase64Decode ConvEncoder @@ -211,35 +220,38 @@ type transformChoices struct{} func (transformChoices) Choices() []string { return []string{ - ConvNone: "none", - ConvToNFC: "nfc", - ConvToNFD: "nfd", - ConvToNFKC: "nfkc", - ConvToNFKD: "nfkd", - ConvFindReplace: "replace", - ConvPrefix: "prefix", - ConvSuffix: "suffix", - ConvSuffixKeepExtension: "suffix_keep_extension", - ConvTrimPrefix: "trimprefix", - ConvTrimSuffix: "trimsuffix", - ConvIndex: "index", - ConvDate: "date", - ConvTruncate: "truncate", - ConvBase64Encode: "base64encode", - ConvBase64Decode: "base64decode", - ConvEncoder: "encoder", - ConvDecoder: "decoder", - ConvISO8859_1: "ISO-8859-1", - ConvWindows1252: "Windows-1252", - ConvMacintosh: "Macintosh", - ConvCharmap: "charmap", - ConvLowercase: "lowercase", - ConvUppercase: "uppercase", - ConvTitlecase: "titlecase", - ConvASCII: "ascii", - ConvURL: "url", - ConvRegex: "regex", - ConvCommand: "command", + ConvNone: "none", + ConvToNFC: "nfc", + ConvToNFD: "nfd", + ConvToNFKC: "nfkc", + ConvToNFKD: "nfkd", + ConvFindReplace: "replace", + ConvPrefix: "prefix", + ConvSuffix: "suffix", + ConvSuffixKeepExtension: "suffix_keep_extension", + ConvTrimPrefix: "trimprefix", + ConvTrimSuffix: "trimsuffix", + ConvIndex: "index", + ConvDate: "date", + ConvTruncate: "truncate", + ConvTruncateKeepExtension: "truncate_keep_extension", + ConvTruncateBytes: "truncate_bytes", + ConvTruncateBytesKeepExtension: "truncate_bytes_keep_extension", + ConvBase64Encode: "base64encode", + ConvBase64Decode: "base64decode", + ConvEncoder: "encoder", + ConvDecoder: "decoder", + ConvISO8859_1: "ISO-8859-1", + ConvWindows1252: "Windows-1252", + ConvMacintosh: "Macintosh", + ConvCharmap: "charmap", + ConvLowercase: "lowercase", + ConvUppercase: "uppercase", + ConvTitlecase: "titlecase", + ConvASCII: "ascii", + ConvURL: "url", + ConvRegex: "regex", + ConvCommand: "command", } } diff --git a/lib/transform/transform.go b/lib/transform/transform.go index 6c7a826a2..0ae81cea5 100644 --- a/lib/transform/transform.go +++ b/lib/transform/transform.go @@ -165,14 +165,25 @@ func transformPathSegment(s string, t transform) (string, error) { if err != nil { return s, err } - if max <= 0 { - return s, nil + return truncateChars(s, max, false), nil + case ConvTruncateKeepExtension: + max, err := strconv.Atoi(t.value) + if err != nil { + return s, err } - if utf8.RuneCountInString(s) <= max { - return s, nil + return truncateChars(s, max, true), nil + case ConvTruncateBytes: + max, err := strconv.Atoi(t.value) + if err != nil { + return s, err } - runes := []rune(s) - return string(runes[:max]), nil + return truncateBytes(s, max, false) + case ConvTruncateBytesKeepExtension: + max, err := strconv.Atoi(t.value) + if err != nil { + return s, err + } + return truncateBytes(s, max, true) case ConvEncoder: var enc encoder.MultiEncoder err := enc.Set(t.value) @@ -231,9 +242,13 @@ func transformPathSegment(s string, t transform) (string, error) { // // i.e. file.txt becomes file_somesuffix.txt not file.txt_somesuffix func SuffixKeepExtension(remote string, suffix string) string { + base, exts := splitExtension(remote) + return base + suffix + exts +} + +func splitExtension(remote string) (base, exts string) { + base = remote var ( - base = remote - exts = "" first = true ext = path.Ext(remote) ) @@ -248,7 +263,45 @@ func SuffixKeepExtension(remote string, suffix string) string { first = false ext = path.Ext(base) } - return base + suffix + exts + return base, exts +} + +func truncateChars(s string, max int, keepExtension bool) string { + if max <= 0 { + return s + } + if utf8.RuneCountInString(s) <= max { + return s + } + exts := "" + if keepExtension { + s, exts = splitExtension(s) + } + runes := []rune(s) + return string(runes[:max-utf8.RuneCountInString(exts)]) + exts +} + +// truncateBytes is like truncateChars but counts the number of bytes, not UTF-8 characters +func truncateBytes(s string, max int, keepExtension bool) (string, error) { + if max <= 0 { + return s, nil + } + if len(s) <= max { + return s, nil + } + exts := "" + if keepExtension { + s, exts = splitExtension(s) + } + + // ensure we don't split a multi-byte UTF-8 character + for i := max - len(exts); i > 0; i-- { + b := append([]byte(s)[:i], exts...) + if len(b) <= max && utf8.Valid(b) { + return string(b), nil + } + } + return "", errors.New("could not truncate to valid UTF-8") } // forbid transformations that add/remove path separators diff --git a/lib/transform/transform_test.go b/lib/transform/transform_test.go index bd5e0c9a7..c10eaf1b9 100644 --- a/lib/transform/transform_test.go +++ b/lib/transform/transform_test.go @@ -128,6 +128,12 @@ func TestVarious(t *testing.T) { {"stories/The Quick Brown 🦊 Fox Went to the Café!.txt", "stories/The Quick Brown _ Fox Went to the Caf_!.txt", []string{"all,charmap=ISO-8859-7"}}, {"stories/The Quick Brown Fox: A Memoir [draft].txt", "stories/The Quick Brown Fox: A Memoir [draft].txt", []string{"all,encoder=Colon,SquareBracket"}}, {"stories/The Quick Brown 🦊 Fox Went to the Café!.txt", "stories/The Quick Brown 🦊 Fox", []string{"all,truncate=21"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором байтов больше, чем символов.txt", []string{"truncate=70"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором байтов больше, чем символ", []string{"truncate=60"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором байтов больше, чем символов.txt", []string{"truncate_bytes=300"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором бай", []string{"truncate_bytes=70"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором байтов больше, чем си.txt", []string{"truncate_keep_extension=60"}}, + {"stories/Вот русское предложение, в котором байтов больше, чем символов.txt", "stories/Вот русское предложение, в котором б.txt", []string{"truncate_bytes_keep_extension=70"}}, {"stories/The Quick Brown Fox!.txt", "stories/The Quick Brown Fox!.txt", []string{"all,command=echo"}}, {"stories/The Quick Brown Fox!.txt", "stories/The Quick Brown Fox!.txt-" + time.Now().Local().Format("20060102"), []string{"date=-{YYYYMMDD}"}}, {"stories/The Quick Brown Fox!.txt", "stories/The Quick Brown Fox!.txt-" + time.Now().Local().Format("2006-01-02 0304PM"), []string{"date=-{macfriendlytime}"}},