mirror of
https://github.com/sharkdp/bat.git
synced 2025-08-24 22:25:37 +02:00
Merge pull request #3369 from forkeith/utf16le
Fix the read_line method for utf16le/be input
This commit is contained in:
@@ -17,6 +17,7 @@
|
|||||||
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
|
- Add missing mappings for various bash/zsh files, see PR #3262 (@AdamGaskins)
|
||||||
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
|
- Send all bat errors to stderr by default, see #3336 (@JerryImMouse)
|
||||||
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
|
- Make --map-syntax target case insensitive to match --language, see #3206 (@keith-hall)
|
||||||
|
- Correctly determine the end of the line in UTF16LE/BE input #3369 (@keith-hall)
|
||||||
|
|
||||||
## Other
|
## Other
|
||||||
|
|
||||||
|
88
src/input.rs
88
src/input.rs
@@ -267,7 +267,9 @@ impl<'a> InputReader<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if content_type == Some(ContentType::UTF_16LE) {
|
if content_type == Some(ContentType::UTF_16LE) {
|
||||||
reader.read_until(0x00, &mut first_line).ok();
|
read_utf16_line(&mut reader, &mut first_line, 0x00, 0x0A).ok();
|
||||||
|
} else if content_type == Some(ContentType::UTF_16BE) {
|
||||||
|
read_utf16_line(&mut reader, &mut first_line, 0x0A, 0x00).ok();
|
||||||
}
|
}
|
||||||
|
|
||||||
InputReader {
|
InputReader {
|
||||||
@@ -283,16 +285,44 @@ impl<'a> InputReader<'a> {
|
|||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
|
|
||||||
|
|
||||||
if self.content_type == Some(ContentType::UTF_16LE) {
|
if self.content_type == Some(ContentType::UTF_16LE) {
|
||||||
let _ = self.inner.read_until(0x00, buf);
|
return read_utf16_line(&mut self.inner, buf, 0x00, 0x0A);
|
||||||
|
}
|
||||||
|
if self.content_type == Some(ContentType::UTF_16BE) {
|
||||||
|
return read_utf16_line(&mut self.inner, buf, 0x0A, 0x00);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let res = self.inner.read_until(b'\n', buf).map(|size| size > 0)?;
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_utf16_line<R: BufRead>(
|
||||||
|
reader: &mut R,
|
||||||
|
buf: &mut Vec<u8>,
|
||||||
|
read_until_char: u8,
|
||||||
|
preceded_by_char: u8,
|
||||||
|
) -> io::Result<bool> {
|
||||||
|
loop {
|
||||||
|
let mut temp = Vec::new();
|
||||||
|
let n = reader.read_until(read_until_char, &mut temp)?;
|
||||||
|
if n == 0 {
|
||||||
|
// EOF reached
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buf.extend_from_slice(&temp);
|
||||||
|
if buf.len() >= 2
|
||||||
|
&& buf[buf.len() - 2] == preceded_by_char
|
||||||
|
&& buf[buf.len() - 1] == read_until_char
|
||||||
|
{
|
||||||
|
// end of line found
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// end of line not found, keep going
|
||||||
|
}
|
||||||
|
return Ok(!buf.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn basic() {
|
fn basic() {
|
||||||
let content = b"#!/bin/bash\necho hello";
|
let content = b"#!/bin/bash\necho hello";
|
||||||
@@ -350,3 +380,53 @@ fn utf16le() {
|
|||||||
assert!(!res.unwrap());
|
assert!(!res.unwrap());
|
||||||
assert!(buffer.is_empty());
|
assert!(buffer.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf16le_issue3367() {
|
||||||
|
let content = b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00\
|
||||||
|
\x6F\x00\x20\x00\x62\x00\x61\x00\x72\x00\x0A\x00\
|
||||||
|
\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x20\x00\x77\x00\x6F\x00\x72\x00\x6C\x00\x64\x00";
|
||||||
|
let mut reader = InputReader::new(&content[..]);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00",
|
||||||
|
&reader.first_line[..]
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut buffer = vec![];
|
||||||
|
|
||||||
|
let res = reader.read_line(&mut buffer);
|
||||||
|
assert!(res.is_ok());
|
||||||
|
assert!(res.unwrap());
|
||||||
|
assert_eq!(
|
||||||
|
b"\xFF\xFE\x0A\x4E\x00\x4E\x0A\x4F\x00\x52\x0A\x00",
|
||||||
|
&buffer[..]
|
||||||
|
);
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
|
let res = reader.read_line(&mut buffer);
|
||||||
|
assert!(res.is_ok());
|
||||||
|
assert!(res.unwrap());
|
||||||
|
assert_eq!(
|
||||||
|
b"\x6F\x00\x20\x00\x62\x00\x61\x00\x72\x00\x0A\x00",
|
||||||
|
&buffer[..]
|
||||||
|
);
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
|
let res = reader.read_line(&mut buffer);
|
||||||
|
assert!(res.is_ok());
|
||||||
|
assert!(res.unwrap());
|
||||||
|
assert_eq!(
|
||||||
|
b"\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x20\x00\x77\x00\x6F\x00\x72\x00\x6C\x00\x64\x00",
|
||||||
|
&buffer[..]
|
||||||
|
);
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
|
let res = reader.read_line(&mut buffer);
|
||||||
|
assert!(res.is_ok());
|
||||||
|
assert!(!res.unwrap());
|
||||||
|
assert!(buffer.is_empty());
|
||||||
|
}
|
||||||
|
BIN
tests/examples/test_UTF-16BE-complicated.txt
vendored
Normal file
BIN
tests/examples/test_UTF-16BE-complicated.txt
vendored
Normal file
Binary file not shown.
BIN
tests/examples/test_UTF-16BE.txt
vendored
Normal file
BIN
tests/examples/test_UTF-16BE.txt
vendored
Normal file
Binary file not shown.
BIN
tests/examples/test_UTF-16LE-complicated.txt
vendored
Normal file
BIN
tests/examples/test_UTF-16LE-complicated.txt
vendored
Normal file
Binary file not shown.
@@ -1321,6 +1321,38 @@ fn utf16() {
|
|||||||
.assert()
|
.assert()
|
||||||
.success()
|
.success()
|
||||||
.stdout("hello world\n");
|
.stdout("hello world\n");
|
||||||
|
|
||||||
|
bat()
|
||||||
|
.arg("--plain")
|
||||||
|
.arg("--decorations=always")
|
||||||
|
.arg("test_UTF-16BE.txt")
|
||||||
|
.assert()
|
||||||
|
.success()
|
||||||
|
.stdout("hello world\nthis is a test\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf16le() {
|
||||||
|
bat()
|
||||||
|
.arg("--decorations=always")
|
||||||
|
.arg("--style=numbers")
|
||||||
|
.arg("--color=never")
|
||||||
|
.arg("test_UTF-16LE-complicated.txt")
|
||||||
|
.assert()
|
||||||
|
.success()
|
||||||
|
.stdout(" 1 上一伊刀\n 2 foo bar\n 3 hello world\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf16be() {
|
||||||
|
bat()
|
||||||
|
.arg("--decorations=always")
|
||||||
|
.arg("--style=numbers")
|
||||||
|
.arg("--color=never")
|
||||||
|
.arg("test_UTF-16BE-complicated.txt")
|
||||||
|
.assert()
|
||||||
|
.success()
|
||||||
|
.stdout(" 1 上一伊刀\n 2 foo bar\n 3 hello world\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regression test for https://github.com/sharkdp/bat/issues/1922
|
// Regression test for https://github.com/sharkdp/bat/issues/1922
|
||||||
|
Reference in New Issue
Block a user