mirror of
https://github.com/nushell/nushell.git
synced 2025-05-01 00:24:29 +02:00
# Description This PR introduces a `ByteStream` type which is a `Read`-able stream of bytes. Internally, it has an enum over three different byte stream sources: ```rust pub enum ByteStreamSource { Read(Box<dyn Read + Send + 'static>), File(File), Child(ChildProcess), } ``` This is in comparison to the current `RawStream` type, which is an `Iterator<Item = Vec<u8>>` and has to allocate for each read chunk. Currently, `PipelineData::ExternalStream` serves a weird dual role where it is either external command output or a wrapper around `RawStream`. `ByteStream` makes this distinction more clear (via `ByteStreamSource`) and replaces `PipelineData::ExternalStream` in this PR: ```rust pub enum PipelineData { Empty, Value(Value, Option<PipelineMetadata>), ListStream(ListStream, Option<PipelineMetadata>), ByteStream(ByteStream, Option<PipelineMetadata>), } ``` The PR is relatively large, but a decent amount of it is just repetitive changes. This PR fixes #7017, fixes #10763, and fixes #12369. This PR also improves performance when piping external commands. Nushell should, in most cases, have competitive pipeline throughput compared to, e.g., bash. | Command | Before (MB/s) | After (MB/s) | Bash (MB/s) | | -------------------------------------------------- | -------------:| ------------:| -----------:| | `throughput \| rg 'x'` | 3059 | 3744 | 3739 | | `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 | # User-Facing Changes - This is a breaking change for the plugin communication protocol, because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`. Plugins now only have to deal with a single input stream, as opposed to the previous three streams: stdout, stderr, and exit code. - The output of `describe` has been changed for external/byte streams. - Temporary breaking change: `bytes starts-with` no longer works with byte streams. This is to keep the PR smaller, and `bytes ends-with` already does not work on byte streams. - If a process core dumped, then instead of having a `Value::Error` in the `exit_code` column of the output returned from `complete`, it now is a `Value::Int` with the negation of the signal number. # After Submitting - Update docs and book as necessary - Release notes (e.g., plugin protocol changes) - Adapt/convert commands to work with byte streams (high priority is `str length`, `bytes starts-with`, and maybe `bytes ends-with`). - Refactor the `tee` code, Devyn has already done some work on this. --------- Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
326 lines
11 KiB
Rust
326 lines
11 KiB
Rust
use nu_engine::{command_prelude::*, env::get_config, find_in_dirs_env, get_dirs_var_from_call};
|
|
use nu_parser::{parse, parse_module_block, parse_module_file_or_dir, unescape_unquote_string};
|
|
use nu_protocol::engine::{FileStack, StateWorkingSet};
|
|
use std::path::Path;
|
|
|
|
#[derive(Clone)]
|
|
pub struct NuCheck;
|
|
|
|
impl Command for NuCheck {
|
|
fn name(&self) -> &str {
|
|
"nu-check"
|
|
}
|
|
|
|
fn signature(&self) -> Signature {
|
|
Signature::build("nu-check")
|
|
.input_output_types(vec![
|
|
(Type::String, Type::Bool),
|
|
(Type::ListStream, Type::Bool),
|
|
(Type::List(Box::new(Type::Any)), Type::Bool),
|
|
])
|
|
// type is string to avoid automatically canonicalizing the path
|
|
.optional("path", SyntaxShape::String, "File path to parse.")
|
|
.switch("as-module", "Parse content as module", Some('m'))
|
|
.switch("debug", "Show error messages", Some('d'))
|
|
.category(Category::Strings)
|
|
}
|
|
|
|
fn usage(&self) -> &str {
|
|
"Validate and parse input content."
|
|
}
|
|
|
|
fn search_terms(&self) -> Vec<&str> {
|
|
vec!["syntax", "parse", "debug"]
|
|
}
|
|
|
|
fn run(
|
|
&self,
|
|
engine_state: &EngineState,
|
|
stack: &mut Stack,
|
|
call: &Call,
|
|
input: PipelineData,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let path_arg: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
|
|
let as_module = call.has_flag(engine_state, stack, "as-module")?;
|
|
let is_debug = call.has_flag(engine_state, stack, "debug")?;
|
|
|
|
// DO NOT ever try to merge the working_set in this command
|
|
let mut working_set = StateWorkingSet::new(engine_state);
|
|
|
|
let input_span = input.span().unwrap_or(call.head);
|
|
|
|
match input {
|
|
PipelineData::Value(Value::String { val, .. }, ..) => {
|
|
let contents = Vec::from(val);
|
|
if as_module {
|
|
parse_module(&mut working_set, None, &contents, is_debug, input_span)
|
|
} else {
|
|
parse_script(&mut working_set, None, &contents, is_debug, input_span)
|
|
}
|
|
}
|
|
PipelineData::ListStream(stream, ..) => {
|
|
let config = get_config(engine_state, stack);
|
|
let list_stream = stream.into_string("\n", &config);
|
|
let contents = Vec::from(list_stream);
|
|
|
|
if as_module {
|
|
parse_module(&mut working_set, None, &contents, is_debug, call.head)
|
|
} else {
|
|
parse_script(&mut working_set, None, &contents, is_debug, call.head)
|
|
}
|
|
}
|
|
PipelineData::ByteStream(stream, ..) => {
|
|
let contents = stream.into_bytes()?;
|
|
|
|
if as_module {
|
|
parse_module(&mut working_set, None, &contents, is_debug, call.head)
|
|
} else {
|
|
parse_script(&mut working_set, None, &contents, is_debug, call.head)
|
|
}
|
|
}
|
|
_ => {
|
|
if let Some(path_str) = path_arg {
|
|
let path_span = path_str.span;
|
|
|
|
// look up the path as relative to FILE_PWD or inside NU_LIB_DIRS (same process as source-env)
|
|
let path = match find_in_dirs_env(
|
|
&path_str.item,
|
|
engine_state,
|
|
stack,
|
|
get_dirs_var_from_call(call),
|
|
) {
|
|
Ok(path) => {
|
|
if let Some(path) = path {
|
|
path
|
|
} else {
|
|
return Err(ShellError::FileNotFound {
|
|
file: path_str.item,
|
|
span: path_span,
|
|
});
|
|
}
|
|
}
|
|
Err(error) => return Err(error),
|
|
};
|
|
|
|
let result = if as_module || path.is_dir() {
|
|
parse_file_or_dir_module(
|
|
path.to_string_lossy().as_bytes(),
|
|
&mut working_set,
|
|
is_debug,
|
|
path_span,
|
|
call.head,
|
|
)
|
|
} else {
|
|
// Unlike `parse_file_or_dir_module`, `parse_file_script` parses the content directly,
|
|
// without adding the file to the stack. Therefore we need to handle this manually.
|
|
working_set.files = FileStack::with_file(path.clone());
|
|
parse_file_script(&path, &mut working_set, is_debug, path_span, call.head)
|
|
// The working set is not merged, so no need to pop the file from the stack.
|
|
};
|
|
|
|
result
|
|
} else {
|
|
Err(ShellError::GenericError {
|
|
error: "Failed to execute command".into(),
|
|
msg: "Requires path argument if ran without pipeline input".into(),
|
|
span: Some(call.head),
|
|
help: Some("Please run 'nu-check --help' for more details".into()),
|
|
inner: vec![],
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn examples(&self) -> Vec<Example> {
|
|
vec![
|
|
Example {
|
|
description: "Parse a input file as script(Default)",
|
|
example: "nu-check script.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Parse a input file as module",
|
|
example: "nu-check --as-module module.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Parse a input file by showing error message",
|
|
example: "nu-check --debug script.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Parse a byte stream as script by showing error message",
|
|
example: "open foo.nu | nu-check --debug script.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Parse an internal stream as module by showing error message",
|
|
example: "open module.nu | lines | nu-check --debug --as-module module.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Parse a string as script",
|
|
example: "$'two(char nl)lines' | nu-check ",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Heuristically parse which begins with script first, if it sees a failure, try module afterwards",
|
|
example: "nu-check -a script.nu",
|
|
result: None,
|
|
},
|
|
Example {
|
|
description: "Heuristically parse by showing error message",
|
|
example: "open foo.nu | lines | nu-check --all --debug",
|
|
result: None,
|
|
},
|
|
]
|
|
}
|
|
}
|
|
|
|
fn parse_module(
|
|
working_set: &mut StateWorkingSet,
|
|
filename: Option<String>,
|
|
contents: &[u8],
|
|
is_debug: bool,
|
|
call_head: Span,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let filename = filename.unwrap_or_else(|| "empty".to_string());
|
|
|
|
let file_id = working_set.add_file(filename.clone(), contents);
|
|
let new_span = working_set.get_span_for_file(file_id);
|
|
|
|
let starting_error_count = working_set.parse_errors.len();
|
|
parse_module_block(working_set, new_span, filename.as_bytes());
|
|
|
|
check_parse(
|
|
starting_error_count,
|
|
working_set,
|
|
is_debug,
|
|
Some(
|
|
"If the content is intended to be a script, please try to remove `--as-module` flag "
|
|
.to_string(),
|
|
),
|
|
call_head,
|
|
)
|
|
}
|
|
|
|
fn parse_script(
|
|
working_set: &mut StateWorkingSet,
|
|
filename: Option<&str>,
|
|
contents: &[u8],
|
|
is_debug: bool,
|
|
call_head: Span,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let starting_error_count = working_set.parse_errors.len();
|
|
parse(working_set, filename, contents, false);
|
|
check_parse(starting_error_count, working_set, is_debug, None, call_head)
|
|
}
|
|
|
|
fn check_parse(
|
|
starting_error_count: usize,
|
|
working_set: &StateWorkingSet,
|
|
is_debug: bool,
|
|
help: Option<String>,
|
|
call_head: Span,
|
|
) -> Result<PipelineData, ShellError> {
|
|
if starting_error_count != working_set.parse_errors.len() {
|
|
let msg = format!(
|
|
r#"Found : {}"#,
|
|
working_set
|
|
.parse_errors
|
|
.first()
|
|
.expect("Missing parser error")
|
|
);
|
|
|
|
if is_debug {
|
|
Err(ShellError::GenericError {
|
|
error: "Failed to parse content".into(),
|
|
msg,
|
|
span: Some(call_head),
|
|
help,
|
|
inner: vec![],
|
|
})
|
|
} else {
|
|
Ok(PipelineData::Value(Value::bool(false, call_head), None))
|
|
}
|
|
} else {
|
|
Ok(PipelineData::Value(Value::bool(true, call_head), None))
|
|
}
|
|
}
|
|
|
|
fn parse_file_script(
|
|
path: &Path,
|
|
working_set: &mut StateWorkingSet,
|
|
is_debug: bool,
|
|
path_span: Span,
|
|
call_head: Span,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let filename = check_path(working_set, path_span, call_head)?;
|
|
|
|
if let Ok(contents) = std::fs::read(path) {
|
|
parse_script(working_set, Some(&filename), &contents, is_debug, call_head)
|
|
} else {
|
|
Err(ShellError::IOErrorSpanned {
|
|
msg: "Could not read path".to_string(),
|
|
span: path_span,
|
|
})
|
|
}
|
|
}
|
|
|
|
fn parse_file_or_dir_module(
|
|
path_bytes: &[u8],
|
|
working_set: &mut StateWorkingSet,
|
|
is_debug: bool,
|
|
path_span: Span,
|
|
call_head: Span,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let _ = check_path(working_set, path_span, call_head)?;
|
|
|
|
let starting_error_count = working_set.parse_errors.len();
|
|
let _ = parse_module_file_or_dir(working_set, path_bytes, path_span, None);
|
|
|
|
if starting_error_count != working_set.parse_errors.len() {
|
|
if is_debug {
|
|
let msg = format!(
|
|
r#"Found : {}"#,
|
|
working_set
|
|
.parse_errors
|
|
.first()
|
|
.expect("Missing parser error")
|
|
);
|
|
Err(ShellError::GenericError {
|
|
error: "Failed to parse content".into(),
|
|
msg,
|
|
span: Some(path_span),
|
|
help: Some("If the content is intended to be a script, please try to remove `--as-module` flag ".into()),
|
|
inner: vec![],
|
|
})
|
|
} else {
|
|
Ok(PipelineData::Value(Value::bool(false, call_head), None))
|
|
}
|
|
} else {
|
|
Ok(PipelineData::Value(Value::bool(true, call_head), None))
|
|
}
|
|
}
|
|
|
|
fn check_path(
|
|
working_set: &mut StateWorkingSet,
|
|
path_span: Span,
|
|
call_head: Span,
|
|
) -> Result<String, ShellError> {
|
|
let bytes = working_set.get_span_contents(path_span);
|
|
let (filename, err) = unescape_unquote_string(bytes, path_span);
|
|
if let Some(e) = err {
|
|
Err(ShellError::GenericError {
|
|
error: "Could not escape filename".to_string(),
|
|
msg: "could not escape filename".to_string(),
|
|
span: Some(call_head),
|
|
help: Some(format!("Returned error: {e}")),
|
|
inner: vec![],
|
|
})
|
|
} else {
|
|
Ok(filename)
|
|
}
|
|
}
|