From 9c7feb2b1915e1855410c0e0b39796005f6bb69d Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Wed, 9 Feb 2022 00:43:32 +0100 Subject: [PATCH] Reduce table allocs: only strip ANSI if necessary (#4378) For the width calculations for table layout the `strip_ansi` function has to be called frequently. By checking for the ASCII control chars (0x00 to 0x1f) except `\n` that are stripped by `strip_ansi_escapes` the number of necessary allocations can be reduced significantly for the simple case of text not containing ANSI escapes. **Benchmark:** ``` nu -c "for i in 0..1000 { ls } | flatten | table" ``` **Allocation reduction** Running on the nushell repo root as the directory, this change reduces the allocation volume by approximately 400 MB (Measured run via KDE heaptrack) **Speed improvement to output** Measured via `/usr/bin/time -v` *before* ``` Command being timed: "./eager_nu -c for i in 0..1000 {ls} | flatten | table" User time (seconds): 0.87 System time (seconds): 0.14 Percent of CPU this job got: 87% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.16 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 18888 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 4809 Voluntary context switches: 38 Involuntary context switches: 14 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0 ``` *after* ``` Command being timed: "./lazy_nu -c for i in 0..1000 {ls} | flatten | table" User time (seconds): 0.63 System time (seconds): 0.14 Percent of CPU this job got: 80% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.97 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 18660 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 5149 Voluntary context switches: 24 Involuntary context switches: 5 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0 ``` --- crates/nu-table/src/wrap.rs | 40 +++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/crates/nu-table/src/wrap.rs b/crates/nu-table/src/wrap.rs index bd0e95275a..3180882b68 100644 --- a/crates/nu-table/src/wrap.rs +++ b/crates/nu-table/src/wrap.rs @@ -1,6 +1,7 @@ use crate::table::TextStyle; use ansi_cut::AnsiCut; use nu_ansi_term::Style; +use std::borrow::Cow; use std::collections::HashMap; use std::{fmt::Display, iter::Iterator}; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; @@ -53,24 +54,24 @@ impl Display for Line { } } -fn strip_ansi(astring: &str) -> String { - if let Ok(bytes) = strip_ansi_escapes::strip(astring) { - String::from_utf8_lossy(&bytes).to_string() - } else { - astring.to_string() - } -} - -fn unicode_width_strip_ansi(astring: &str) -> usize { - let stripped_string: String = { - if let Ok(bytes) = strip_ansi_escapes::strip(astring) { - String::from_utf8_lossy(&bytes).to_string() - } else { - astring.to_string() +/// Removes ANSI escape codes and some ASCII control characters +/// +/// Keeps `\n` removes `\r`, `\t` etc. +/// +/// If parsing fails silently returns the input string +fn strip_ansi(string: &str) -> Cow { + // Check if any ascii control character except LF(0x0A = 10) is present, + // which will be stripped. Includes the primary start of ANSI sequences ESC + // (0x1B = decimal 27) + if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) { + if let Ok(stripped) = strip_ansi_escapes::strip(string) { + if let Ok(new_string) = String::from_utf8(stripped) { + return Cow::Owned(new_string); + } } - }; - - UnicodeWidthStr::width(&stripped_string[..]) + } + // Else case includes failures to parse! + Cow::Borrowed(string) } // fn special_width(astring: &str) -> usize { @@ -108,9 +109,10 @@ pub fn split_sublines(input: &str) -> Vec> { // let c = strip_ansi(x).chars().count(); // let u = special_width(x); // std::cmp::max(c, u) + let stripped = strip_ansi(x); - let c = strip_ansi(x).chars().count(); - let u = unicode_width_strip_ansi(x); + let c = stripped.chars().count(); + let u = stripped.width(); std::cmp::max(c, u) }, })