forked from extern/nushell
"maybe text codec" version 2 (#871)
* Add a RawStream that can be binary or string * Finish up updating the into's
This commit is contained in:
@ -1,8 +1,6 @@
|
||||
use std::sync::{atomic::AtomicBool, Arc};
|
||||
|
||||
use crate::{
|
||||
ast::PathMember, ByteStream, Config, ShellError, Span, StringStream, Value, ValueStream,
|
||||
};
|
||||
use crate::{ast::PathMember, Config, ListStream, RawStream, ShellError, Span, Value};
|
||||
|
||||
/// The foundational abstraction for input and output to commands
|
||||
///
|
||||
@ -36,9 +34,8 @@ use crate::{
|
||||
#[derive(Debug)]
|
||||
pub enum PipelineData {
|
||||
Value(Value, Option<PipelineMetadata>),
|
||||
ListStream(ValueStream, Option<PipelineMetadata>),
|
||||
StringStream(StringStream, Span, Option<PipelineMetadata>),
|
||||
ByteStream(ByteStream, Span, Option<PipelineMetadata>),
|
||||
ListStream(ListStream, Option<PipelineMetadata>),
|
||||
RawStream(RawStream, Span, Option<PipelineMetadata>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -63,8 +60,7 @@ impl PipelineData {
|
||||
pub fn metadata(&self) -> Option<PipelineMetadata> {
|
||||
match self {
|
||||
PipelineData::ListStream(_, x) => x.clone(),
|
||||
PipelineData::ByteStream(_, _, x) => x.clone(),
|
||||
PipelineData::StringStream(_, _, x) => x.clone(),
|
||||
PipelineData::RawStream(_, _, x) => x.clone(),
|
||||
PipelineData::Value(_, x) => x.clone(),
|
||||
}
|
||||
}
|
||||
@ -72,8 +68,7 @@ impl PipelineData {
|
||||
pub fn set_metadata(mut self, metadata: Option<PipelineMetadata>) -> Self {
|
||||
match &mut self {
|
||||
PipelineData::ListStream(_, x) => *x = metadata,
|
||||
PipelineData::ByteStream(_, _, x) => *x = metadata,
|
||||
PipelineData::StringStream(_, _, x) => *x = metadata,
|
||||
PipelineData::RawStream(_, _, x) => *x = metadata,
|
||||
PipelineData::Value(_, x) => *x = metadata,
|
||||
}
|
||||
|
||||
@ -88,33 +83,51 @@ impl PipelineData {
|
||||
vals: s.collect(),
|
||||
span, // FIXME?
|
||||
},
|
||||
PipelineData::StringStream(s, ..) => {
|
||||
let mut output = String::new();
|
||||
PipelineData::RawStream(mut s, ..) => {
|
||||
let mut items = vec![];
|
||||
|
||||
for item in s {
|
||||
match item {
|
||||
Ok(s) => output.push_str(&s),
|
||||
Err(err) => return Value::Error { error: err },
|
||||
}
|
||||
}
|
||||
Value::String {
|
||||
val: output,
|
||||
span, // FIXME?
|
||||
}
|
||||
}
|
||||
PipelineData::ByteStream(s, ..) => {
|
||||
let mut output = vec![];
|
||||
|
||||
for item in s {
|
||||
match item {
|
||||
Ok(s) => output.extend(&s),
|
||||
Err(err) => return Value::Error { error: err },
|
||||
for val in &mut s {
|
||||
match val {
|
||||
Ok(val) => {
|
||||
items.push(val);
|
||||
}
|
||||
Err(e) => {
|
||||
return Value::Error { error: e };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value::Binary {
|
||||
val: output,
|
||||
span, // FIXME?
|
||||
if s.is_binary {
|
||||
let mut output = vec![];
|
||||
for item in items {
|
||||
match item.as_binary() {
|
||||
Ok(item) => {
|
||||
output.extend(item);
|
||||
}
|
||||
Err(err) => {
|
||||
return Value::Error { error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value::Binary {
|
||||
val: output,
|
||||
span, // FIXME?
|
||||
}
|
||||
} else {
|
||||
let mut output = String::new();
|
||||
for item in items {
|
||||
match item.as_string() {
|
||||
Ok(s) => output.push_str(&s),
|
||||
Err(err) => {
|
||||
return Value::Error { error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
Value::String {
|
||||
val: output,
|
||||
span, // FIXME?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -134,9 +147,30 @@ impl PipelineData {
|
||||
match self {
|
||||
PipelineData::Value(v, ..) => Ok(v.into_string(separator, config)),
|
||||
PipelineData::ListStream(s, ..) => Ok(s.into_string(separator, config)),
|
||||
PipelineData::StringStream(s, ..) => s.into_string(separator),
|
||||
PipelineData::ByteStream(s, ..) => {
|
||||
Ok(String::from_utf8_lossy(&s.into_vec()?).to_string())
|
||||
PipelineData::RawStream(s, ..) => {
|
||||
let mut items = vec![];
|
||||
|
||||
for val in s {
|
||||
match val {
|
||||
Ok(val) => {
|
||||
items.push(val);
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut output = String::new();
|
||||
for item in items {
|
||||
match item.as_string() {
|
||||
Ok(s) => output.push_str(&s),
|
||||
Err(err) => {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -191,9 +225,9 @@ impl PipelineData {
|
||||
Ok(vals.into_iter().map(f).into_pipeline_data(ctrlc))
|
||||
}
|
||||
PipelineData::ListStream(stream, ..) => Ok(stream.map(f).into_pipeline_data(ctrlc)),
|
||||
PipelineData::StringStream(stream, span, ..) => Ok(stream
|
||||
PipelineData::RawStream(stream, ..) => Ok(stream
|
||||
.map(move |x| match x {
|
||||
Ok(s) => f(Value::String { val: s, span }),
|
||||
Ok(v) => f(v),
|
||||
Err(err) => Value::Error { error: err },
|
||||
})
|
||||
.into_pipeline_data(ctrlc)),
|
||||
@ -205,11 +239,6 @@ impl PipelineData {
|
||||
Value::Error { error } => Err(error),
|
||||
v => Ok(v.into_pipeline_data()),
|
||||
},
|
||||
PipelineData::ByteStream(_, span, ..) => Err(ShellError::UnsupportedInput(
|
||||
"Binary output from this command may need to be decoded using the 'decode' command"
|
||||
.into(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,9 +261,9 @@ impl PipelineData {
|
||||
PipelineData::ListStream(stream, ..) => {
|
||||
Ok(stream.map(f).flatten().into_pipeline_data(ctrlc))
|
||||
}
|
||||
PipelineData::StringStream(stream, span, ..) => Ok(stream
|
||||
PipelineData::RawStream(stream, ..) => Ok(stream
|
||||
.map(move |x| match x {
|
||||
Ok(s) => Value::String { val: s, span },
|
||||
Ok(v) => v,
|
||||
Err(err) => Value::Error { error: err },
|
||||
})
|
||||
.map(f)
|
||||
@ -245,11 +274,6 @@ impl PipelineData {
|
||||
Err(error) => Err(error),
|
||||
},
|
||||
PipelineData::Value(v, ..) => Ok(f(v).into_iter().into_pipeline_data(ctrlc)),
|
||||
PipelineData::ByteStream(_, span, ..) => Err(ShellError::UnsupportedInput(
|
||||
"Binary output from this command may need to be decoded using the 'decode' command"
|
||||
.into(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,14 +291,13 @@ impl PipelineData {
|
||||
Ok(vals.into_iter().filter(f).into_pipeline_data(ctrlc))
|
||||
}
|
||||
PipelineData::ListStream(stream, ..) => Ok(stream.filter(f).into_pipeline_data(ctrlc)),
|
||||
PipelineData::StringStream(stream, span, ..) => Ok(stream
|
||||
PipelineData::RawStream(stream, ..) => Ok(stream
|
||||
.map(move |x| match x {
|
||||
Ok(s) => Value::String { val: s, span },
|
||||
Ok(v) => v,
|
||||
Err(err) => Value::Error { error: err },
|
||||
})
|
||||
.filter(f)
|
||||
.into_pipeline_data(ctrlc)),
|
||||
|
||||
PipelineData::Value(Value::Range { val, .. }, ..) => {
|
||||
Ok(val.into_range_iter()?.filter(f).into_pipeline_data(ctrlc))
|
||||
}
|
||||
@ -285,11 +308,6 @@ impl PipelineData {
|
||||
Ok(Value::Nothing { span: v.span()? }.into_pipeline_data())
|
||||
}
|
||||
}
|
||||
PipelineData::ByteStream(_, span, ..) => Err(ShellError::UnsupportedInput(
|
||||
"Binary output from this command may need to be decoded using the 'decode' command"
|
||||
.into(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -305,7 +323,7 @@ impl IntoIterator for PipelineData {
|
||||
match self {
|
||||
PipelineData::Value(Value::List { vals, .. }, metadata) => {
|
||||
PipelineIterator(PipelineData::ListStream(
|
||||
ValueStream {
|
||||
ListStream {
|
||||
stream: Box::new(vals.into_iter()),
|
||||
ctrlc: None,
|
||||
},
|
||||
@ -315,14 +333,14 @@ impl IntoIterator for PipelineData {
|
||||
PipelineData::Value(Value::Range { val, .. }, metadata) => {
|
||||
match val.into_range_iter() {
|
||||
Ok(iter) => PipelineIterator(PipelineData::ListStream(
|
||||
ValueStream {
|
||||
ListStream {
|
||||
stream: Box::new(iter),
|
||||
ctrlc: None,
|
||||
},
|
||||
metadata,
|
||||
)),
|
||||
Err(error) => PipelineIterator(PipelineData::ListStream(
|
||||
ValueStream {
|
||||
ListStream {
|
||||
stream: Box::new(std::iter::once(Value::Error { error })),
|
||||
ctrlc: None,
|
||||
},
|
||||
@ -343,18 +361,8 @@ impl Iterator for PipelineIterator {
|
||||
PipelineData::Value(Value::Nothing { .. }, ..) => None,
|
||||
PipelineData::Value(v, ..) => Some(std::mem::take(v)),
|
||||
PipelineData::ListStream(stream, ..) => stream.next(),
|
||||
PipelineData::StringStream(stream, span, ..) => stream.next().map(|x| match x {
|
||||
Ok(x) => Value::String {
|
||||
val: x,
|
||||
span: *span,
|
||||
},
|
||||
Err(err) => Value::Error { error: err },
|
||||
}),
|
||||
PipelineData::ByteStream(stream, span, ..) => stream.next().map(|x| match x {
|
||||
Ok(x) => Value::Binary {
|
||||
val: x,
|
||||
span: *span,
|
||||
},
|
||||
PipelineData::RawStream(stream, ..) => stream.next().map(|x| match x {
|
||||
Ok(x) => x,
|
||||
Err(err) => Value::Error { error: err },
|
||||
}),
|
||||
}
|
||||
@ -391,7 +399,7 @@ where
|
||||
{
|
||||
fn into_pipeline_data(self, ctrlc: Option<Arc<AtomicBool>>) -> PipelineData {
|
||||
PipelineData::ListStream(
|
||||
ValueStream {
|
||||
ListStream {
|
||||
stream: Box::new(self.into_iter().map(Into::into)),
|
||||
ctrlc,
|
||||
},
|
||||
@ -405,7 +413,7 @@ where
|
||||
ctrlc: Option<Arc<AtomicBool>>,
|
||||
) -> PipelineData {
|
||||
PipelineData::ListStream(
|
||||
ValueStream {
|
||||
ListStream {
|
||||
stream: Box::new(self.into_iter().map(Into::into)),
|
||||
ctrlc,
|
||||
},
|
||||
|
@ -239,6 +239,18 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_binary(&self) -> Result<&[u8], ShellError> {
|
||||
match self {
|
||||
Value::Binary { val, .. } => Ok(val),
|
||||
Value::String { val, .. } => Ok(val.as_bytes()),
|
||||
x => Err(ShellError::CantConvert(
|
||||
"binary".into(),
|
||||
x.get_type().to_string(),
|
||||
self.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_record(&self) -> Result<(&[String], &[Value]), ShellError> {
|
||||
match self {
|
||||
Value::Record { cols, vals, .. } => Ok((cols, vals)),
|
||||
|
@ -7,95 +7,139 @@ use std::{
|
||||
},
|
||||
};
|
||||
|
||||
/// A single buffer of binary data streamed over multiple parts. Optionally contains ctrl-c that can be used
|
||||
/// to break the stream.
|
||||
pub struct ByteStream {
|
||||
pub struct RawStream {
|
||||
pub stream: Box<dyn Iterator<Item = Result<Vec<u8>, ShellError>> + Send + 'static>,
|
||||
pub leftover: Vec<u8>,
|
||||
pub ctrlc: Option<Arc<AtomicBool>>,
|
||||
pub is_binary: bool,
|
||||
pub span: Span,
|
||||
}
|
||||
impl ByteStream {
|
||||
pub fn into_vec(self) -> Result<Vec<u8>, ShellError> {
|
||||
|
||||
impl RawStream {
|
||||
pub fn new(
|
||||
stream: Box<dyn Iterator<Item = Result<Vec<u8>, ShellError>> + Send + 'static>,
|
||||
ctrlc: Option<Arc<AtomicBool>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
leftover: vec![],
|
||||
ctrlc,
|
||||
is_binary: false,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_bytes(self) -> Result<Vec<u8>, ShellError> {
|
||||
let mut output = vec![];
|
||||
|
||||
for item in self.stream {
|
||||
output.append(&mut item?);
|
||||
output.extend(item?);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
impl Debug for ByteStream {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ByteStream").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ByteStream {
|
||||
type Item = Result<Vec<u8>, ShellError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(ctrlc) = &self.ctrlc {
|
||||
if ctrlc.load(Ordering::SeqCst) {
|
||||
None
|
||||
} else {
|
||||
self.stream.next()
|
||||
}
|
||||
} else {
|
||||
self.stream.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single string streamed over multiple parts. Optionally contains ctrl-c that can be used
|
||||
/// to break the stream.
|
||||
pub struct StringStream {
|
||||
pub stream: Box<dyn Iterator<Item = Result<String, ShellError>> + Send + 'static>,
|
||||
pub ctrlc: Option<Arc<AtomicBool>>,
|
||||
}
|
||||
impl StringStream {
|
||||
pub fn into_string(self, separator: &str) -> Result<String, ShellError> {
|
||||
pub fn into_string(self) -> Result<String, ShellError> {
|
||||
let mut output = String::new();
|
||||
|
||||
let mut first = true;
|
||||
for s in self.stream {
|
||||
output.push_str(&s?);
|
||||
|
||||
if !first {
|
||||
output.push_str(separator);
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
for item in self {
|
||||
output.push_str(&item?.as_string()?);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub fn from_stream(
|
||||
input: impl Iterator<Item = Result<String, ShellError>> + Send + 'static,
|
||||
ctrlc: Option<Arc<AtomicBool>>,
|
||||
) -> StringStream {
|
||||
StringStream {
|
||||
stream: Box::new(input),
|
||||
ctrlc,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Debug for StringStream {
|
||||
impl Debug for RawStream {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("StringStream").finish()
|
||||
f.debug_struct("RawStream").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for StringStream {
|
||||
type Item = Result<String, ShellError>;
|
||||
impl Iterator for RawStream {
|
||||
type Item = Result<Value, ShellError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(ctrlc) = &self.ctrlc {
|
||||
if ctrlc.load(Ordering::SeqCst) {
|
||||
None
|
||||
} else {
|
||||
self.stream.next()
|
||||
// If we know we're already binary, just output that
|
||||
if self.is_binary {
|
||||
match self.stream.next() {
|
||||
Some(buffer) => match buffer {
|
||||
Ok(mut v) => {
|
||||
while let Some(b) = self.leftover.pop() {
|
||||
v.insert(0, b);
|
||||
}
|
||||
Some(Ok(Value::Binary {
|
||||
val: v,
|
||||
span: self.span,
|
||||
}))
|
||||
}
|
||||
Err(e) => Some(Err(e)),
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
} else {
|
||||
self.stream.next()
|
||||
// We *may* be text. We're only going to try utf-8. Other decodings
|
||||
// needs to be taken as binary first, then passed through `decode`.
|
||||
match self.stream.next() {
|
||||
Some(buffer) => match buffer {
|
||||
Ok(mut v) => {
|
||||
while let Some(b) = self.leftover.pop() {
|
||||
v.insert(0, b);
|
||||
}
|
||||
|
||||
match String::from_utf8(v.clone()) {
|
||||
Ok(s) => {
|
||||
// Great, we have a complete string, let's output it
|
||||
Some(Ok(Value::String {
|
||||
val: s,
|
||||
span: self.span,
|
||||
}))
|
||||
}
|
||||
Err(err) => {
|
||||
// Okay, we *might* have a string but we've also got some errors
|
||||
if v.is_empty() {
|
||||
// We can just end here
|
||||
None
|
||||
} else if v.len() > 3
|
||||
&& (v.len() - err.utf8_error().valid_up_to() > 3)
|
||||
{
|
||||
// As UTF-8 characters are max 4 bytes, if we have more than that in error we know
|
||||
// that it's not just a character spanning two frames.
|
||||
// We now know we are definitely binary, so switch to binary and stay there.
|
||||
self.is_binary = true;
|
||||
Some(Ok(Value::Binary {
|
||||
val: v,
|
||||
span: self.span,
|
||||
}))
|
||||
} else {
|
||||
// Okay, we have a tiny bit of error at the end of the buffer. This could very well be
|
||||
// a character that spans two frames. Since this is the case, remove the error from
|
||||
// the current frame an dput it in the leftover buffer.
|
||||
self.leftover =
|
||||
v[(err.utf8_error().valid_up_to() + 1)..].to_vec();
|
||||
|
||||
let buf = v[0..err.utf8_error().valid_up_to()].to_vec();
|
||||
|
||||
match String::from_utf8(buf) {
|
||||
Ok(s) => Some(Ok(Value::String {
|
||||
val: s,
|
||||
span: self.span,
|
||||
})),
|
||||
Err(_) => {
|
||||
// Something is definitely wrong. Switch to binary, and stay there
|
||||
self.is_binary = true;
|
||||
Some(Ok(Value::Binary {
|
||||
val: v,
|
||||
span: self.span,
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Some(Err(e)),
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -106,12 +150,12 @@ impl Iterator for StringStream {
|
||||
/// In practice, a "stream" here means anything which can be iterated and produce Values as it iterates.
|
||||
/// Like other iterators in Rust, observing values from this stream will drain the items as you view them
|
||||
/// and the stream cannot be replayed.
|
||||
pub struct ValueStream {
|
||||
pub struct ListStream {
|
||||
pub stream: Box<dyn Iterator<Item = Value> + Send + 'static>,
|
||||
pub ctrlc: Option<Arc<AtomicBool>>,
|
||||
}
|
||||
|
||||
impl ValueStream {
|
||||
impl ListStream {
|
||||
pub fn into_string(self, separator: &str, config: &Config) -> String {
|
||||
self.map(|x: Value| x.into_string(", ", config))
|
||||
.collect::<Vec<String>>()
|
||||
@ -121,21 +165,21 @@ impl ValueStream {
|
||||
pub fn from_stream(
|
||||
input: impl Iterator<Item = Value> + Send + 'static,
|
||||
ctrlc: Option<Arc<AtomicBool>>,
|
||||
) -> ValueStream {
|
||||
ValueStream {
|
||||
) -> ListStream {
|
||||
ListStream {
|
||||
stream: Box::new(input),
|
||||
ctrlc,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for ValueStream {
|
||||
impl Debug for ListStream {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ValueStream").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ValueStream {
|
||||
impl Iterator for ListStream {
|
||||
type Item = Value;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
Reference in New Issue
Block a user