Refactor ansi stripping into nu-utils functions (#6966)

Allows use of slightly optimized variants that check if they have to use
the heavier vte parser. Tries to avoid unnnecessary allocations. Initial
performance characteristics proven out in #4378.

Also reduces boilerplate with right-ward drift.
This commit is contained in:
Stefan Holderbach
2022-11-04 19:49:45 +01:00
committed by GitHub
parent b9195c2668
commit 2c4048eb43
24 changed files with 126 additions and 134 deletions

View File

@ -0,0 +1,91 @@
use std::borrow::Cow;
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_unlikely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_likely(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that rarely contain ANSI control chars.
/// Uses fast search to avoid reallocations.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_unlikely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string
.as_str()
.bytes()
.any(|x| matches!(x, 0..=9 | 11..=31))
{
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
}
// Else case includes failures to parse!
string
}
/// Removes ANSI escape codes and some ASCII control characters
///
/// Optimized for strings that likely contain ANSI control chars.
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
pub fn strip_ansi_string_likely(string: String) -> String {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if let Ok(stripped) = strip_ansi_escapes::strip(&string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return new_string;
}
}
// Else case includes failures to parse!
string
}

View File

@ -1,3 +1,4 @@
mod deansi;
pub mod locale;
pub mod utils;
@ -6,3 +7,7 @@ pub use utils::{
enable_vt_processing, get_default_config, get_default_env, get_ls_colors,
stderr_write_all_and_flush, stdout_write_all_and_flush,
};
pub use deansi::{
strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely,
};