mirror of
https://github.com/sharkdp/bat.git
synced 2025-06-19 17:18:49 +02:00
Fix UTF-8 BOM file type detection for first-line syntax patterns - Fixes #3314
This commit is contained in:
parent
0da4084064
commit
17e6952ab8
@ -7,6 +7,7 @@
|
||||
|
||||
## Bugfixes
|
||||
|
||||
- Fix UTF-8 BOM not being stripped for syntax detection, see #3314 (@krikera)
|
||||
- Fix `BAT_THEME_DARK` and `BAT_THEME_LIGHT` being ignored, see issue #3171 and PR #3168 (@bash)
|
||||
- Prevent `--list-themes` from outputting default theme info to stdout when it is piped, see #3189 (@einfachIrgendwer0815)
|
||||
- Rename some submodules to fix Dependabot submodule updates, see issue #3198 and PR #3201 (@victor-gp)
|
||||
|
@ -298,7 +298,11 @@ impl HighlightingAssets {
|
||||
let syntax_set = self.get_syntax_set()?;
|
||||
Ok(String::from_utf8(reader.first_line.clone())
|
||||
.ok()
|
||||
.and_then(|l| syntax_set.find_syntax_by_first_line(&l))
|
||||
.and_then(|l| {
|
||||
// Strip UTF-8 BOM if present
|
||||
let line = l.strip_prefix('\u{feff}').unwrap_or(&l);
|
||||
syntax_set.find_syntax_by_first_line(line)
|
||||
})
|
||||
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
||||
}
|
||||
}
|
||||
@ -533,6 +537,41 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn syntax_detection_first_line_with_utf8_bom() {
|
||||
let test = SyntaxDetectionTest::new();
|
||||
|
||||
// Test that XML files are detected correctly even with UTF-8 BOM
|
||||
// The BOM should be stripped before first-line syntax detection
|
||||
let xml_with_bom = "\u{feff}<?xml version=\"1.0\" encoding=\"utf-8\"?>";
|
||||
assert_eq!(
|
||||
test.syntax_for_file_with_content("unknown_file", xml_with_bom),
|
||||
"XML"
|
||||
);
|
||||
|
||||
// Test the specific .csproj case mentioned in the issue
|
||||
// Even if .csproj has extension mapping, this tests first-line fallback
|
||||
let csproj_content_with_bom = "\u{feff}<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"15.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuild/2003\">";
|
||||
assert_eq!(
|
||||
test.syntax_for_file_with_content("test.csproj", csproj_content_with_bom),
|
||||
"XML"
|
||||
);
|
||||
|
||||
// Test that shell scripts are detected correctly even with UTF-8 BOM
|
||||
let script_with_bom = "\u{feff}#!/bin/bash";
|
||||
assert_eq!(
|
||||
test.syntax_for_file_with_content("unknown_script", script_with_bom),
|
||||
"Bourne Again Shell (bash)"
|
||||
);
|
||||
|
||||
// Test that PHP files are detected correctly even with UTF-8 BOM
|
||||
let php_with_bom = "\u{feff}<?php";
|
||||
assert_eq!(
|
||||
test.syntax_for_file_with_content("unknown_php", php_with_bom),
|
||||
"PHP"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn syntax_detection_with_custom_mapping() {
|
||||
let mut test = SyntaxDetectionTest::new();
|
||||
|
Loading…
x
Reference in New Issue
Block a user