Reduce table allocs: only strip ANSI if necessary (#4378)

For the width calculations for table layout the `strip_ansi` function
has to be called frequently. By checking for the ASCII control chars
(0x00 to 0x1f) except `\n` that are stripped by `strip_ansi_escapes` the number of
necessary allocations can be reduced significantly for the simple case
of text not containing ANSI escapes.

**Benchmark:**

```
nu -c "for i in 0..1000 { ls } | flatten | table"
```

**Allocation reduction**

Running on the nushell repo root as the directory, this change reduces the
allocation volume by approximately 400 MB

(Measured run via KDE heaptrack)
**Speed improvement to output**

Measured via `/usr/bin/time -v`

*before*

```
Command being timed: "./eager_nu -c for i in 0..1000 {ls} | flatten | table"
	User time (seconds): 0.87
	System time (seconds): 0.14
	Percent of CPU this job got: 87%
	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.16
	Average shared text size (kbytes): 0
	Average unshared data size (kbytes): 0
	Average stack size (kbytes): 0
	Average total size (kbytes): 0
	Maximum resident set size (kbytes): 18888
	Average resident set size (kbytes): 0
	Major (requiring I/O) page faults: 0
	Minor (reclaiming a frame) page faults: 4809
	Voluntary context switches: 38
	Involuntary context switches: 14
	Swaps: 0
	File system inputs: 0
	File system outputs: 0
	Socket messages sent: 0
	Socket messages received: 0
	Signals delivered: 0
	Page size (bytes): 4096
	Exit status: 0
```

*after*

```
Command being timed: "./lazy_nu -c for i in 0..1000 {ls} | flatten | table"
	User time (seconds): 0.63
	System time (seconds): 0.14
	Percent of CPU this job got: 80%
	Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.97
	Average shared text size (kbytes): 0
	Average unshared data size (kbytes): 0
	Average stack size (kbytes): 0
	Average total size (kbytes): 0
	Maximum resident set size (kbytes): 18660
	Average resident set size (kbytes): 0
	Major (requiring I/O) page faults: 0
	Minor (reclaiming a frame) page faults: 5149
	Voluntary context switches: 24
	Involuntary context switches: 5
	Swaps: 0
	File system inputs: 0
	File system outputs: 0
	Socket messages sent: 0
	Socket messages received: 0
	Signals delivered: 0
	Page size (bytes): 4096
	Exit status: 0
```
This commit is contained in:
Stefan Holderbach 2022-02-09 00:43:32 +01:00 committed by GitHub
parent cf20eed7bc
commit 9c7feb2b19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
use crate::table::TextStyle;
use ansi_cut::AnsiCut;
use nu_ansi_term::Style;
use std::borrow::Cow;
use std::collections::HashMap;
use std::{fmt::Display, iter::Iterator};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
@ -53,24 +54,24 @@ impl Display for Line {
}
}
fn strip_ansi(astring: &str) -> String {
if let Ok(bytes) = strip_ansi_escapes::strip(astring) {
String::from_utf8_lossy(&bytes).to_string()
} else {
astring.to_string()
}
}
fn unicode_width_strip_ansi(astring: &str) -> usize {
let stripped_string: String = {
if let Ok(bytes) = strip_ansi_escapes::strip(astring) {
String::from_utf8_lossy(&bytes).to_string()
} else {
astring.to_string()
/// Removes ANSI escape codes and some ASCII control characters
///
/// Keeps `\n` removes `\r`, `\t` etc.
///
/// If parsing fails silently returns the input string
fn strip_ansi(string: &str) -> Cow<str> {
// Check if any ascii control character except LF(0x0A = 10) is present,
// which will be stripped. Includes the primary start of ANSI sequences ESC
// (0x1B = decimal 27)
if string.bytes().any(|x| matches!(x, 0..=9 | 11..=31)) {
if let Ok(stripped) = strip_ansi_escapes::strip(string) {
if let Ok(new_string) = String::from_utf8(stripped) {
return Cow::Owned(new_string);
}
}
};
UnicodeWidthStr::width(&stripped_string[..])
}
// Else case includes failures to parse!
Cow::Borrowed(string)
}
// fn special_width(astring: &str) -> usize {
@ -108,9 +109,10 @@ pub fn split_sublines(input: &str) -> Vec<Vec<Subline>> {
// let c = strip_ansi(x).chars().count();
// let u = special_width(x);
// std::cmp::max(c, u)
let stripped = strip_ansi(x);
let c = strip_ansi(x).chars().count();
let u = unicode_width_strip_ansi(x);
let c = stripped.chars().count();
let u = stripped.width();
std::cmp::max(c, u)
},
})