refactor: move SplitRead and MultiLife into nu-utils (#16482)

This commit is contained in:
Bahex
2025-08-21 22:01:32 +03:00
committed by GitHub
parent f7aee7c4fe
commit a08e1f6217
7 changed files with 190 additions and 185 deletions

View File

@@ -8,6 +8,7 @@ use crate::{
IntRange, PipelineData, ShellError, Signals, Span, Type, Value,
shell_error::{bridge::ShellErrorBridge, io::IoError},
};
use nu_utils::SplitRead as SplitReadInner;
use serde::{Deserialize, Serialize};
use std::ops::Bound;
#[cfg(unix)]
@@ -864,166 +865,8 @@ impl Iterator for Lines {
}
}
mod split_read {
use std::io::{BufRead, ErrorKind};
use memchr::memmem::Finder;
pub struct SplitRead<R> {
reader: Option<R>,
buf: Option<Vec<u8>>,
finder: Finder<'static>,
}
impl<R: BufRead> SplitRead<R> {
pub fn new(reader: R, delim: impl AsRef<[u8]>) -> Self {
// empty delimiter results in an infinite stream of empty items
debug_assert!(!delim.as_ref().is_empty(), "delimiter can't be empty");
Self {
reader: Some(reader),
buf: Some(Vec::new()),
finder: Finder::new(delim.as_ref()).into_owned(),
}
}
}
impl<R: BufRead> Iterator for SplitRead<R> {
type Item = Result<Vec<u8>, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
let buf = self.buf.as_mut()?;
let mut search_start = 0usize;
loop {
if let Some(i) = self.finder.find(&buf[search_start..]) {
let needle_idx = search_start + i;
let right = buf.split_off(needle_idx + self.finder.needle().len());
buf.truncate(needle_idx);
let left = std::mem::replace(buf, right);
return Some(Ok(left));
}
if let Some(mut r) = self.reader.take() {
search_start = buf.len().saturating_sub(self.finder.needle().len() + 1);
let available = match r.fill_buf() {
Ok(n) => n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Some(Err(e)),
};
buf.extend_from_slice(available);
let used = available.len();
r.consume(used);
if used != 0 {
self.reader = Some(r);
}
continue;
} else {
return self.buf.take().map(Ok);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{self, Cursor, Read};
#[test]
fn simple() {
let s = "foo-bar-baz";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "-").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next(), None);
}
#[test]
fn with_empty_fields() -> Result<(), io::Error> {
let s = "\0\0foo\0\0bar\0\0\0\0baz\0\0";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "\0\0").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), None);
Ok(())
}
#[test]
fn complex_delimiter() -> Result<(), io::Error> {
let s = "<|>foo<|>bar<|><|>baz<|>";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), None);
Ok(())
}
#[test]
fn all_empty() -> Result<(), io::Error> {
let s = "<><>";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "<>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next(), None);
Ok(())
}
#[should_panic = "delimiter can't be empty"]
#[test]
fn empty_delimiter() {
let s = "abc";
let cursor = Cursor::new(String::from(s));
let _split = SplitRead::new(cursor, "").map(|e| e.unwrap());
}
#[test]
fn delimiter_spread_across_reads() {
let reader = Cursor::new("<|>foo<|")
.chain(Cursor::new(">bar<|><"))
.chain(Cursor::new("|>baz<|>"));
let mut split =
SplitRead::new(reader, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next().unwrap(), "foo");
assert_eq!(split.next().unwrap(), "bar");
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next().unwrap(), "baz");
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next(), None);
}
}
}
pub struct SplitRead {
internal: split_read::SplitRead<BufReader<SourceReader>>,
internal: SplitReadInner<BufReader<SourceReader>>,
span: Span,
signals: Signals,
}
@@ -1036,7 +879,7 @@ impl SplitRead {
signals: Signals,
) -> Self {
Self {
internal: split_read::SplitRead::new(BufReader::new(reader), delimiter),
internal: SplitReadInner::new(BufReader::new(reader), delimiter),
span,
signals,
}

View File

@@ -15,6 +15,7 @@ pub use filesize::*;
pub use from_value::FromValue;
pub use glob::*;
pub use into_value::{IntoValue, TryIntoValue};
pub use nu_utils::MultiLife;
pub use range::{FloatRange, IntRange, Range};
pub use record::Record;
@@ -36,7 +37,7 @@ use std::{
borrow::Cow,
cmp::Ordering,
fmt::{Debug, Display, Write},
ops::{Bound, ControlFlow, Deref},
ops::{Bound, ControlFlow},
path::PathBuf,
};
@@ -1083,30 +1084,6 @@ impl Value {
&'out self,
cell_path: &[PathMember],
) -> Result<Cow<'out, Value>, ShellError> {
enum MultiLife<'out, 'local, T>
where
'out: 'local,
T: ?Sized,
{
Out(&'out T),
Local(&'local T),
}
impl<'out, 'local, T> Deref for MultiLife<'out, 'local, T>
where
'out: 'local,
T: ?Sized,
{
type Target = T;
fn deref(&self) -> &Self::Target {
match *self {
MultiLife::Out(x) => x,
MultiLife::Local(x) => x,
}
}
}
// A dummy value is required, otherwise rust doesn't allow references, which we need for
// the `std::ptr::eq` comparison
let mut store: Value = Value::test_nothing();

View File

@@ -23,6 +23,7 @@ fancy-regex = { workspace = true }
lean_string.workspace = true
lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] }
log = { workspace = true }
memchr = { workspace = true }
num-format = { workspace = true }
strip-ansi-escapes = { workspace = true }
serde = { workspace = true }

View File

@@ -6,8 +6,10 @@ pub mod filesystem;
pub mod flatten_json;
pub mod float;
pub mod locale;
mod multilife;
mod quoting;
mod shared_cow;
mod split_read;
pub mod strings;
pub mod utils;
@@ -25,8 +27,10 @@ pub use deansi::{
pub use emoji::contains_emoji;
pub use flatten_json::JsonFlattener;
pub use float::ObviousFloat;
pub use multilife::MultiLife;
pub use quoting::{escape_quote_string, needs_quoting};
pub use shared_cow::SharedCow;
pub use split_read::SplitRead;
#[cfg(unix)]
pub use filesystem::users;

View File

@@ -0,0 +1,25 @@
use std::ops::Deref;
pub enum MultiLife<'out, 'local, T>
where
'out: 'local,
T: ?Sized,
{
Out(&'out T),
Local(&'local T),
}
impl<'out, 'local, T> Deref for MultiLife<'out, 'local, T>
where
'out: 'local,
T: ?Sized,
{
type Target = T;
fn deref(&self) -> &Self::Target {
match *self {
MultiLife::Out(x) => x,
MultiLife::Local(x) => x,
}
}
}

View File

@@ -0,0 +1,154 @@
use std::io::{BufRead, ErrorKind};
use memchr::memmem::Finder;
pub struct SplitRead<R> {
reader: Option<R>,
buf: Option<Vec<u8>>,
finder: Finder<'static>,
}
impl<R: BufRead> SplitRead<R> {
pub fn new(reader: R, delim: impl AsRef<[u8]>) -> Self {
// empty delimiter results in an infinite stream of empty items
debug_assert!(!delim.as_ref().is_empty(), "delimiter can't be empty");
Self {
reader: Some(reader),
buf: Some(Vec::new()),
finder: Finder::new(delim.as_ref()).into_owned(),
}
}
}
impl<R: BufRead> Iterator for SplitRead<R> {
type Item = Result<Vec<u8>, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
let buf = self.buf.as_mut()?;
let mut search_start = 0usize;
loop {
if let Some(i) = self.finder.find(&buf[search_start..]) {
let needle_idx = search_start + i;
let right = buf.split_off(needle_idx + self.finder.needle().len());
buf.truncate(needle_idx);
let left = std::mem::replace(buf, right);
return Some(Ok(left));
}
if let Some(mut r) = self.reader.take() {
search_start = buf.len().saturating_sub(self.finder.needle().len() + 1);
let available = match r.fill_buf() {
Ok(n) => n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Some(Err(e)),
};
buf.extend_from_slice(available);
let used = available.len();
r.consume(used);
if used != 0 {
self.reader = Some(r);
}
continue;
} else {
return self.buf.take().map(Ok);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{self, Cursor, Read};
#[test]
fn simple() {
let s = "foo-bar-baz";
let cursor = Cursor::new(String::from(s));
let mut split = SplitRead::new(cursor, "-").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next(), None);
}
#[test]
fn with_empty_fields() -> Result<(), io::Error> {
let s = "\0\0foo\0\0bar\0\0\0\0baz\0\0";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "\0\0").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), None);
Ok(())
}
#[test]
fn complex_delimiter() -> Result<(), io::Error> {
let s = "<|>foo<|>bar<|><|>baz<|>";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("foo"));
assert_eq!(split.next().as_deref(), Some("bar"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some("baz"));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), None);
Ok(())
}
#[test]
fn all_empty() -> Result<(), io::Error> {
let s = "<><>";
let cursor = Cursor::new(String::from(s));
let mut split =
SplitRead::new(cursor, "<>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next().as_deref(), Some(""));
assert_eq!(split.next(), None);
Ok(())
}
#[should_panic = "delimiter can't be empty"]
#[test]
fn empty_delimiter() {
let s = "abc";
let cursor = Cursor::new(String::from(s));
let _split = SplitRead::new(cursor, "").map(|e| e.unwrap());
}
#[test]
fn delimiter_spread_across_reads() {
let reader = Cursor::new("<|>foo<|")
.chain(Cursor::new(">bar<|><"))
.chain(Cursor::new("|>baz<|>"));
let mut split =
SplitRead::new(reader, "<|>").map(|r| String::from_utf8(r.unwrap()).unwrap());
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next().unwrap(), "foo");
assert_eq!(split.next().unwrap(), "bar");
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next().unwrap(), "baz");
assert_eq!(split.next().unwrap(), "");
assert_eq!(split.next(), None);
}
}