Apply same fix and tests for UTF16BE

This commit is contained in:
Keith Hall
2025-08-16 15:33:53 +03:00
parent 40c4c8e542
commit 96ce80d0e2
4 changed files with 25 additions and 9 deletions

View File

@@ -17,7 +17,7 @@
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
- Correctly determine the end of the line in UTF16LE input #3369 (@keith-hall)
- Correctly determine the end of the line in UTF16LE/BE input #3369 (@keith-hall)
## Other

View File

@@ -267,7 +267,9 @@ impl<'a> InputReader<'a> {
};
if content_type == Some(ContentType::UTF_16LE) {
read_utf16le_line(&mut reader, &mut first_line).ok();
read_utf16_line(&mut reader, &mut first_line, 0x00, 0x0A).ok();
} else if content_type == Some(ContentType::UTF_16BE) {
read_utf16_line(&mut reader, &mut first_line, 0x0A, 0x00).ok();
}
InputReader {
@@ -283,26 +285,28 @@ impl<'a> InputReader<'a> {
return Ok(true);
}
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
if self.content_type == Some(ContentType::UTF_16LE) {
return read_utf16le_line(&mut self.inner, buf);
return read_utf16_line(&mut self.inner, buf, 0x00, 0x0A);
}
if self.content_type == Some(ContentType::UTF_16BE) {
return read_utf16_line(&mut self.inner, buf, 0x0A, 0x00);
}
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
Ok(res)
}
}
fn read_utf16le_line<R: BufRead>(reader: &mut R, buf: &mut Vec<u8>) -> io::Result<bool> {
fn read_utf16_line<R: BufRead>(reader: &mut R, buf: &mut Vec<u8>, read_until_char: u8, preceded_by_char: u8) -> io::Result<bool> {
loop {
let mut temp = Vec::new();
let n = reader.read_until(0x00, &mut temp)?;
let n = reader.read_until(read_until_char, &mut temp)?;
if n == 0 {
// EOF reached
break;
}
buf.extend_from_slice(&temp);
if buf.len() >= 2 && buf[buf.len() - 2] == 0x0A && buf[buf.len() - 1] == 0x00 {
if buf.len() >= 2 && buf[buf.len() - 2] == preceded_by_char && buf[buf.len() - 1] == read_until_char {
// end of line found
break;
}
@@ -403,7 +407,7 @@ fn utf16le_issue3367() {
assert_eq!(b"\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x20\x00\x77\x00\x6F\x00\x72\x00\x6C\x00\x64\x00", &buffer[..]);
buffer.clear();
let res = reader.read_line(&mut buffer);
assert!(res.is_ok());
assert!(!res.unwrap());

Binary file not shown.

View File

@@ -1343,6 +1343,18 @@ fn utf16le() {
.stdout(" 1 上一伊刀\n 2 foo bar\n 3 hello world\n");
}
#[test]
fn utf16be() {
bat()
.arg("--decorations=always")
.arg("--style=numbers")
.arg("--color=never")
.arg("test_UTF-16BE-complicated.txt")
.assert()
.success()
.stdout(" 1 上一伊刀\n 2 foo bar\n 3 hello world\n");
}
// Regression test for https://github.com/sharkdp/bat/issues/1922
#[test]
fn bom_not_stripped_in_loop_through_mode() {