mirror of
https://github.com/sharkdp/bat.git
synced 2024-11-22 15:53:29 +01:00
Handle non-unicode characters in the preprocessor
This commit is contained in:
parent
b48b9fcf3b
commit
82ca8804a2
@ -25,3 +25,7 @@ contexts:
|
|||||||
scope: entity.other.attribute-name.show-nonprintable.escape
|
scope: entity.other.attribute-name.show-nonprintable.escape
|
||||||
- match: "␈"
|
- match: "␈"
|
||||||
scope: entity.other.attribute-name.show-nonprintable.backspace
|
scope: entity.other.attribute-name.show-nonprintable.backspace
|
||||||
|
- match: "\\\\x[A-Z0-9][A-Z0-9]"
|
||||||
|
scope: comment.block.show-nonprintable.backspace
|
||||||
|
- match: "\\\\u\\{[a-z0-9]+\\}"
|
||||||
|
scope: comment.block.show-nonprintable.backspace
|
||||||
|
@ -172,7 +172,8 @@ pub fn build_app(interactive_output: bool) -> ClapApp<'static, 'static> {
|
|||||||
.help("Show non-printable characters (space, tab, newline, ..).")
|
.help("Show non-printable characters (space, tab, newline, ..).")
|
||||||
.long_help(
|
.long_help(
|
||||||
"Show non-printable characters like space, tab or newline. \
|
"Show non-printable characters like space, tab or newline. \
|
||||||
Use '--tabs' to control the width of the tab-placeholders.",
|
Use '--tabs' to control the width of the tab-placeholders. \
|
||||||
|
This option can also be used to print binary files.",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -33,41 +33,115 @@ pub fn expand_tabs(line: &str, width: usize, cursor: &mut usize) -> String {
|
|||||||
buffer
|
buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn replace_nonprintable(input: &str, tab_width: usize) -> String {
|
fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
|
||||||
|
let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
|
||||||
|
|
||||||
|
let decoded = None
|
||||||
|
.or(input.get(0..1).and_then(str_from_utf8).map(|c| (c, 1)))
|
||||||
|
.or(input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
|
||||||
|
.or(input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
|
||||||
|
.or(input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
|
||||||
|
|
||||||
|
let decoded_char = decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n));
|
||||||
|
|
||||||
|
decoded_char
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn replace_nonprintable(input: &[u8], tab_width: usize) -> String {
|
||||||
let mut output = String::new();
|
let mut output = String::new();
|
||||||
|
|
||||||
let tab_width = if tab_width == 0 { 4 } else { tab_width };
|
let tab_width = if tab_width == 0 { 4 } else { tab_width };
|
||||||
|
|
||||||
for chr in input.chars() {
|
let mut idx = 0;
|
||||||
match chr {
|
let len = input.len();
|
||||||
// space
|
while idx < len {
|
||||||
' ' => output.push('•'),
|
if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
|
||||||
// tab
|
idx += skip_ahead;
|
||||||
'\t' => {
|
|
||||||
if tab_width == 1 {
|
match chr {
|
||||||
output.push('↹');
|
// space
|
||||||
} else {
|
' ' => output.push('•'),
|
||||||
output.push('├');
|
// tab
|
||||||
output.push_str(&"─".repeat(tab_width - 2));
|
'\t' => {
|
||||||
output.push('┤');
|
if tab_width == 1 {
|
||||||
|
output.push('↹');
|
||||||
|
} else {
|
||||||
|
output.push('├');
|
||||||
|
output.push_str(&"─".repeat(tab_width - 2));
|
||||||
|
output.push('┤');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// line feed
|
||||||
|
'\x0A' => output.push('␊'),
|
||||||
|
// carriage return
|
||||||
|
'\x0D' => output.push('␍'),
|
||||||
|
// null
|
||||||
|
'\x00' => output.push('␀'),
|
||||||
|
// bell
|
||||||
|
'\x07' => output.push('␇'),
|
||||||
|
// backspace
|
||||||
|
'\x08' => output.push('␈'),
|
||||||
|
// escape
|
||||||
|
'\x1B' => output.push('␛'),
|
||||||
|
// printable ASCII
|
||||||
|
c if c.is_ascii_alphanumeric()
|
||||||
|
|| c.is_ascii_punctuation()
|
||||||
|
|| c.is_ascii_graphic() =>
|
||||||
|
{
|
||||||
|
output.push(c)
|
||||||
|
}
|
||||||
|
// everything else
|
||||||
|
c => output.push_str(&c.escape_unicode().collect::<String>()),
|
||||||
}
|
}
|
||||||
// line feed
|
} else {
|
||||||
'\x0A' => output.push('␊'),
|
output.push_str(&format!("\\x{:02X}", input[idx]));
|
||||||
// carriage return
|
idx += 1;
|
||||||
'\x0D' => output.push('␍'),
|
|
||||||
// null
|
|
||||||
'\x00' => output.push('␀'),
|
|
||||||
// bell
|
|
||||||
'\x07' => output.push('␇'),
|
|
||||||
// backspace
|
|
||||||
'\x08' => output.push('␈'),
|
|
||||||
// escape
|
|
||||||
'\x1B' => output.push('␛'),
|
|
||||||
// anything else
|
|
||||||
_ => output.push(chr),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_try_parse_utf8_char() {
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0x20]), Some((' ', 1)));
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0x20, 0x20]), Some((' ', 1)));
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0x20, 0xef]), Some((' ', 1)));
|
||||||
|
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0x00]), Some(('\x00', 1)));
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0x1b]), Some(('\x1b', 1)));
|
||||||
|
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4]), Some(('ä', 2)));
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4, 0xef]), Some(('ä', 2)));
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xc3, 0xa4, 0x20]), Some(('ä', 2)));
|
||||||
|
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xe2, 0x82, 0xac]), Some(('€', 3)));
|
||||||
|
assert_eq!(
|
||||||
|
try_parse_utf8_char(&[0xe2, 0x82, 0xac, 0xef]),
|
||||||
|
Some(('€', 3))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
try_parse_utf8_char(&[0xe2, 0x82, 0xac, 0x20]),
|
||||||
|
Some(('€', 3))
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xe2, 0x88, 0xb0]), Some(('∰', 3)));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82]),
|
||||||
|
Some(('🌂', 4))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82, 0xef]),
|
||||||
|
Some(('🌂', 4))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
try_parse_utf8_char(&[0xf0, 0x9f, 0x8c, 0x82, 0x20]),
|
||||||
|
Some(('🌂', 4))
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(try_parse_utf8_char(&[]), None);
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xef]), None);
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xef, 0x20]), None);
|
||||||
|
assert_eq!(try_parse_utf8_char(&[0xf0, 0xf0]), None);
|
||||||
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::ascii;
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::vec::Vec;
|
use std::vec::Vec;
|
||||||
|
|
||||||
@ -232,7 +231,8 @@ impl<'a> Printer for InteractivePrinter<'a> {
|
|||||||
writeln!(
|
writeln!(
|
||||||
handle,
|
handle,
|
||||||
"{}: Binary content from {} will not be printed to the terminal \
|
"{}: Binary content from {} will not be printed to the terminal \
|
||||||
(but will be present if the output of 'bat' is piped).",
|
(but will be present if the output of 'bat' is piped). You can use 'bat -A' \
|
||||||
|
to show the binary file contents.",
|
||||||
Yellow.paint("[bat warning]"),
|
Yellow.paint("[bat warning]"),
|
||||||
input
|
input
|
||||||
)?;
|
)?;
|
||||||
@ -281,7 +281,7 @@ impl<'a> Printer for InteractivePrinter<'a> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
if self.config.output_components.grid() {
|
if self.config.output_components.grid() {
|
||||||
if self.content_type.map_or(false, |c| c.is_text()) {
|
if self.content_type.map_or(false, |c| c.is_text()) || self.config.show_nonprintable {
|
||||||
self.print_horizontal_line(handle, '┼')?;
|
self.print_horizontal_line(handle, '┼')?;
|
||||||
} else {
|
} else {
|
||||||
self.print_horizontal_line(handle, '┴')?;
|
self.print_horizontal_line(handle, '┴')?;
|
||||||
@ -292,7 +292,8 @@ impl<'a> Printer for InteractivePrinter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn print_footer(&mut self, handle: &mut dyn Write) -> Result<()> {
|
fn print_footer(&mut self, handle: &mut dyn Write) -> Result<()> {
|
||||||
if self.config.output_components.grid() && self.content_type.map_or(false, |c| c.is_text())
|
if self.config.output_components.grid()
|
||||||
|
&& (self.content_type.map_or(false, |c| c.is_text()) || self.config.show_nonprintable)
|
||||||
{
|
{
|
||||||
self.print_horizontal_line(handle, '┴')
|
self.print_horizontal_line(handle, '┴')
|
||||||
} else {
|
} else {
|
||||||
@ -331,32 +332,23 @@ impl<'a> Printer for InteractivePrinter<'a> {
|
|||||||
line_number: usize,
|
line_number: usize,
|
||||||
line_buffer: &[u8],
|
line_buffer: &[u8],
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut line = match self.content_type {
|
let line = if self.config.show_nonprintable {
|
||||||
None => {
|
replace_nonprintable(&line_buffer, self.config.tab_width)
|
||||||
return Ok(());
|
} else {
|
||||||
|
match self.content_type {
|
||||||
|
Some(ContentType::BINARY) | None => {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Some(ContentType::UTF_16LE) => UTF_16LE
|
||||||
|
.decode(&line_buffer, DecoderTrap::Replace)
|
||||||
|
.map_err(|_| "Invalid UTF-16LE")?,
|
||||||
|
Some(ContentType::UTF_16BE) => UTF_16BE
|
||||||
|
.decode(&line_buffer, DecoderTrap::Replace)
|
||||||
|
.map_err(|_| "Invalid UTF-16BE")?,
|
||||||
|
_ => String::from_utf8_lossy(&line_buffer).to_string(),
|
||||||
}
|
}
|
||||||
Some(ContentType::BINARY) => String::from_utf8(
|
|
||||||
line_buffer
|
|
||||||
.as_ref()
|
|
||||||
.iter()
|
|
||||||
.map(|b| ascii::escape_default(*b))
|
|
||||||
.flatten()
|
|
||||||
.collect(),
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
Some(ContentType::UTF_16LE) => UTF_16LE
|
|
||||||
.decode(&line_buffer, DecoderTrap::Replace)
|
|
||||||
.map_err(|_| "Invalid UTF-16LE")?,
|
|
||||||
Some(ContentType::UTF_16BE) => UTF_16BE
|
|
||||||
.decode(&line_buffer, DecoderTrap::Replace)
|
|
||||||
.map_err(|_| "Invalid UTF-16BE")?,
|
|
||||||
_ => String::from_utf8_lossy(&line_buffer).to_string(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if self.config.show_nonprintable {
|
|
||||||
line = replace_nonprintable(&line, self.config.tab_width);
|
|
||||||
}
|
|
||||||
|
|
||||||
let regions = {
|
let regions = {
|
||||||
let highlighter = match self.highlighter {
|
let highlighter = match self.highlighter {
|
||||||
Some(ref mut highlighter) => highlighter,
|
Some(ref mut highlighter) => highlighter,
|
||||||
|
Loading…
Reference in New Issue
Block a user