nushell/crates/nu-command/src/system/run_external.rs

377 lines
13 KiB
Rust
Raw Normal View History

2021-10-25 08:31:39 +02:00
use std::collections::HashMap;
2021-09-29 20:25:05 +02:00
use std::io::{BufRead, BufReader, Write};
2021-10-26 00:56:29 +02:00
use std::process::{Command as CommandSys, Stdio};
use std::sync::atomic::Ordering;
2021-09-29 20:25:05 +02:00
use std::sync::mpsc;
use nu_engine::env_to_strings;
2021-10-25 08:31:39 +02:00
use nu_protocol::engine::{EngineState, Stack};
2021-10-25 18:58:58 +02:00
use nu_protocol::{ast::Call, engine::Command, ShellError, Signature, SyntaxShape, Value};
use nu_protocol::{ByteStream, Category, Config, PipelineData, Spanned};
2021-10-25 18:58:58 +02:00
use itertools::Itertools;
2021-10-25 18:58:58 +02:00
use nu_engine::CallExt;
use regex::Regex;
2021-09-29 20:25:05 +02:00
const OUTPUT_BUFFER_SIZE: usize = 1024;
2021-09-29 20:25:05 +02:00
2021-10-25 06:01:02 +02:00
#[derive(Clone)]
2021-09-29 20:25:05 +02:00
pub struct External;
impl Command for External {
fn name(&self) -> &str {
"run_external"
}
fn usage(&self) -> &str {
"Runs external command"
}
fn is_private(&self) -> bool {
true
}
2021-09-29 20:25:05 +02:00
fn signature(&self) -> nu_protocol::Signature {
Signature::build("run_external")
.switch("last_expression", "last_expression", None)
.rest("rest", SyntaxShape::Any, "external command to run")
.category(Category::System)
2021-09-29 20:25:05 +02:00
}
fn run(
&self,
2021-10-25 08:31:39 +02:00
engine_state: &EngineState,
stack: &mut Stack,
2021-09-29 20:25:05 +02:00
call: &Call,
2021-10-25 06:01:02 +02:00
input: PipelineData,
) -> Result<PipelineData, ShellError> {
2021-12-27 04:04:22 +01:00
let name: Spanned<String> = call.req(engine_state, stack, 0)?;
let args: Vec<Value> = call.rest(engine_state, stack, 1)?;
2021-10-25 08:31:39 +02:00
let last_expression = call.has_flag("last_expression");
// Translate environment variables from Values to Strings
let config = stack.get_config().unwrap_or_default();
let env_vars_str = env_to_strings(engine_state, stack, &config)?;
let mut args_strs = vec![];
for arg in args {
if let Ok(s) = arg.as_string() {
args_strs.push(s);
} else if let Value::List { vals, .. } = arg {
// Interpret a list as a series of arguments
for val in vals {
if let Ok(s) = val.as_string() {
args_strs.push(s);
} else {
return Err(ShellError::ExternalCommand(
"Cannot convert argument to a string".into(),
2022-01-22 15:12:34 +01:00
"All arguments to an external command need to be string-compatible"
.into(),
val.span()?,
));
}
}
} else {
return Err(ShellError::ExternalCommand(
"Cannot convert argument to a string".into(),
2022-01-22 15:12:34 +01:00
"All arguments to an external command need to be string-compatible".into(),
arg.span()?,
));
}
}
2021-10-25 08:31:39 +02:00
let command = ExternalCommand {
name,
args: args_strs,
2021-10-25 08:31:39 +02:00
last_expression,
env_vars: env_vars_str,
2021-12-04 13:38:21 +01:00
call,
2021-10-25 08:31:39 +02:00
};
command.run_with_input(engine_state, input, config)
2021-09-29 20:25:05 +02:00
}
}
2021-12-04 13:38:21 +01:00
pub struct ExternalCommand<'call> {
2021-10-25 08:31:39 +02:00
pub name: Spanned<String>,
pub args: Vec<String>,
2021-09-29 20:25:05 +02:00
pub last_expression: bool,
2021-10-25 08:31:39 +02:00
pub env_vars: HashMap<String, String>,
2021-12-04 13:38:21 +01:00
pub call: &'call Call,
2021-09-29 20:25:05 +02:00
}
2021-12-04 13:38:21 +01:00
impl<'call> ExternalCommand<'call> {
2021-10-28 06:13:10 +02:00
pub fn run_with_input(
&self,
engine_state: &EngineState,
input: PipelineData,
config: Config,
2021-10-28 06:13:10 +02:00
) -> Result<PipelineData, ShellError> {
let head = self.name.span;
2021-09-29 20:25:05 +02:00
2021-10-28 06:13:10 +02:00
let ctrlc = engine_state.ctrlc.clone();
let mut process = if let Some(d) = self.env_vars.get("PWD") {
let mut process = self.create_command(d);
Use only $nu.env.PWD for getting the current directory (#587) * Use only $nu.env.PWD for getting current directory Because setting and reading to/from std::env changes the global state shich is problematic if we call `cd` from multiple threads (e.g., in a `par-each` block). With this change, when engine-q starts, it will either inherit existing PWD env var, or create a new one from `std::env::current_dir()`. Otherwise, everything that needs the current directory will get it from `$nu.env.PWD`. Each spawned external command will get its current directory per-process which should be thread-safe. One thing left to do is to patch nu-path for this as well since it uses `std::env::current_dir()` in its expansions. * Rename nu-path functions *_with is not *_relative which should be more descriptive and frees "with" for use in a followup commit. * Clone stack every each iter; Fix some commands Cloning the stack each iteration of `each` makes sure we're not reusing PWD between iterations. Some fixes in commands to make them use the new PWD. * Post-rebase cleanup, fmt, clippy * Change back _relative to _with in nu-path funcs Didn't use the idea I had for the new "_with". * Remove leftover current_dir from rebase * Add cwd sync at merge_delta() This makes sure the parser and completer always have up-to-date cwd. * Always pass absolute path to glob in ls * Do not allow PWD a relative path; Allow recovery Makes it possible to recover PWD by proceeding with the REPL cycle. * Clone stack in each also for byte/string stream * (WIP) Start moving env variables to engine state * (WIP) Move env vars to engine state (ugly) Quick and dirty code. * (WIP) Remove unused mut and args; Fmt * (WIP) Fix dataframe tests * (WIP) Fix missing args after rebase * (WIP) Clone only env vars, not the whole stack * (WIP) Add env var clone to `for` loop as well * Minor edits * Refactor merge_delta() to include stack merging. Less error-prone than doing it manually. * Clone env for each `update` command iteration * Mark env var hidden only when found in eng. state * Fix clippt warnings * Add TODO about env var reading * Do not clone empty environment in loops * Remove extra cwd collection * Split current_dir() into str and path; Fix autocd * Make completions respect PWD env var
2022-01-04 23:30:34 +01:00
process.current_dir(d);
process
Use only $nu.env.PWD for getting the current directory (#587) * Use only $nu.env.PWD for getting current directory Because setting and reading to/from std::env changes the global state shich is problematic if we call `cd` from multiple threads (e.g., in a `par-each` block). With this change, when engine-q starts, it will either inherit existing PWD env var, or create a new one from `std::env::current_dir()`. Otherwise, everything that needs the current directory will get it from `$nu.env.PWD`. Each spawned external command will get its current directory per-process which should be thread-safe. One thing left to do is to patch nu-path for this as well since it uses `std::env::current_dir()` in its expansions. * Rename nu-path functions *_with is not *_relative which should be more descriptive and frees "with" for use in a followup commit. * Clone stack every each iter; Fix some commands Cloning the stack each iteration of `each` makes sure we're not reusing PWD between iterations. Some fixes in commands to make them use the new PWD. * Post-rebase cleanup, fmt, clippy * Change back _relative to _with in nu-path funcs Didn't use the idea I had for the new "_with". * Remove leftover current_dir from rebase * Add cwd sync at merge_delta() This makes sure the parser and completer always have up-to-date cwd. * Always pass absolute path to glob in ls * Do not allow PWD a relative path; Allow recovery Makes it possible to recover PWD by proceeding with the REPL cycle. * Clone stack in each also for byte/string stream * (WIP) Start moving env variables to engine state * (WIP) Move env vars to engine state (ugly) Quick and dirty code. * (WIP) Remove unused mut and args; Fmt * (WIP) Fix dataframe tests * (WIP) Fix missing args after rebase * (WIP) Clone only env vars, not the whole stack * (WIP) Add env var clone to `for` loop as well * Minor edits * Refactor merge_delta() to include stack merging. Less error-prone than doing it manually. * Clone env for each `update` command iteration * Mark env var hidden only when found in eng. state * Fix clippt warnings * Add TODO about env var reading * Do not clone empty environment in loops * Remove extra cwd collection * Split current_dir() into str and path; Fix autocd * Make completions respect PWD env var
2022-01-04 23:30:34 +01:00
} else {
return Err(ShellError::SpannedLabeledErrorHelp(
"Current directory not found".to_string(),
"did not find PWD environment variable".to_string(),
head,
concat!(
"The environment variable 'PWD' was not found. ",
"It is required to define the current directory when running an external command."
).to_string(),
));
};
2021-09-29 20:25:05 +02:00
2021-10-25 08:31:39 +02:00
process.envs(&self.env_vars);
2021-09-29 20:25:05 +02:00
// If the external is not the last command, its output will get piped
// either as a string or binary
if !self.last_expression {
process.stdout(Stdio::piped());
}
// If there is an input from the pipeline. The stdin from the process
// is piped so it can be used to send the input information
if !matches!(input, PipelineData::Value(Value::Nothing { .. }, ..)) {
2021-09-29 20:25:05 +02:00
process.stdin(Stdio::piped());
}
match process.spawn() {
Err(err) => Err(ShellError::ExternalCommand(
2022-01-22 15:12:34 +01:00
"can't run executable".to_string(),
err.to_string(),
2021-09-29 20:25:05 +02:00
self.name.span,
)),
Ok(mut child) => {
// if there is a string or a stream, that is sent to the pipe std
2021-10-25 06:01:02 +02:00
if let Some(mut stdin_write) = child.stdin.take() {
2021-10-26 00:56:29 +02:00
std::thread::spawn(move || {
for value in input.into_iter() {
match value {
Value::String { val, span: _ } => {
if stdin_write.write(val.as_bytes()).is_err() {
return Ok(());
}
}
Value::Binary { val, span: _ } => {
if stdin_write.write(&val).is_err() {
return Ok(());
}
}
x => {
if stdin_write
.write(x.into_string(", ", &config).as_bytes())
.is_err()
{
2021-10-26 00:56:29 +02:00
return Err(());
}
}
2021-09-29 20:25:05 +02:00
}
}
2021-10-26 00:56:29 +02:00
Ok(())
});
2021-09-29 20:25:05 +02:00
}
let last_expression = self.last_expression;
let span = self.name.span;
let output_ctrlc = ctrlc.clone();
let (tx, rx) = mpsc::channel();
std::thread::spawn(move || {
// If this external is not the last expression, then its output is piped to a channel
// and we create a ValueStream that can be consumed
if !last_expression {
2021-12-04 13:38:21 +01:00
let stdout = child.stdout.take().ok_or_else(|| {
ShellError::ExternalCommand(
"Error taking stdout from external".to_string(),
2022-01-22 15:12:34 +01:00
"Redirects need access to stdout of an external command"
.to_string(),
2021-12-04 13:38:21 +01:00
span,
)
})?;
2021-09-29 20:25:05 +02:00
// Stdout is read using the Buffer reader. It will do so until there is an
// error or there are no more bytes to read
let mut buf_read = BufReader::with_capacity(OUTPUT_BUFFER_SIZE, stdout);
while let Ok(bytes) = buf_read.fill_buf() {
if bytes.is_empty() {
break;
}
// The Cow generated from the function represents the conversion
// from bytes to String. If no replacements are required, then the
// borrowed value is a proper UTF-8 string. The Owned option represents
// a string where the values had to be replaced, thus marking it as bytes
let bytes = bytes.to_vec();
2021-09-29 20:25:05 +02:00
let length = bytes.len();
buf_read.consume(length);
if let Some(ctrlc) = &ctrlc {
if ctrlc.load(Ordering::SeqCst) {
break;
}
}
match tx.send(bytes) {
2021-09-29 20:25:05 +02:00
Ok(_) => continue,
Err(_) => break,
}
}
}
2021-09-29 20:25:05 +02:00
match child.wait() {
2022-01-22 15:12:34 +01:00
Err(err) => Err(ShellError::ExternalCommand(
"External command exited with error".into(),
err.to_string(),
span,
)),
Ok(_) => Ok(()),
}
});
let receiver = ChannelReceiver::new(rx);
Ok(PipelineData::ByteStream(
ByteStream {
stream: Box::new(receiver),
ctrlc: output_ctrlc,
},
head,
None,
))
2021-09-29 20:25:05 +02:00
}
}
}
fn create_command(&self, cwd: &str) -> CommandSys {
2021-09-29 20:25:05 +02:00
// in all the other cases shell out
if cfg!(windows) {
//TODO. This should be modifiable from the config file.
// We could give the option to call from powershell
// for minimal builds cwd is unused
if self.name.item.ends_with(".cmd") || self.name.item.ends_with(".bat") {
self.spawn_cmd_command()
} else {
self.spawn_simple_command(cwd)
2021-09-29 20:25:05 +02:00
}
} else if self.name.item.ends_with(".sh") {
self.spawn_sh_command()
2021-09-29 20:25:05 +02:00
} else {
self.spawn_simple_command(cwd)
}
}
/// Spawn a command without shelling out to an external shell
fn spawn_simple_command(&self, cwd: &str) -> std::process::Command {
let head = trim_enclosing_quotes(&self.name.item);
let head = if head.starts_with('~') || head.starts_with("..") {
nu_path::expand_path_with(head, cwd)
.to_string_lossy()
.to_string()
} else {
head
};
let head = head.replace("\\", "\\\\");
let mut process = std::process::Command::new(&head);
for arg in &self.args {
2021-12-03 07:15:23 +01:00
let arg = trim_enclosing_quotes(arg);
let arg = if arg.starts_with('~') || arg.starts_with("..") {
nu_path::expand_path_with(arg, cwd)
.to_string_lossy()
.to_string()
} else {
arg
};
let arg = arg.replace("\\", "\\\\");
process.arg(&arg);
}
process
}
/// Spawn a cmd command with `cmd /c args...`
fn spawn_cmd_command(&self) -> std::process::Command {
let mut process = std::process::Command::new("cmd");
process.arg("/c");
process.arg(&self.name.item);
for arg in &self.args {
// Clean the args before we use them:
// https://stackoverflow.com/questions/1200235/how-to-pass-a-quoted-pipe-character-to-cmd-exe
// cmd.exe needs to have a caret to escape a pipe
let arg = arg.replace("|", "^|");
process.arg(&arg);
}
process
}
/// Spawn a sh command with `sh -c args...`
fn spawn_sh_command(&self) -> std::process::Command {
let joined_and_escaped_arguments =
self.args.iter().map(|arg| shell_arg_escape(arg)).join(" ");
let cmd_with_args = vec![self.name.item.clone(), joined_and_escaped_arguments].join(" ");
let mut process = std::process::Command::new("sh");
process.arg("-c").arg(cmd_with_args);
process
}
}
fn has_unsafe_shell_characters(arg: &str) -> bool {
let re: Regex = Regex::new(r"[^\w@%+=:,./-]").expect("regex to be valid");
re.is_match(arg)
}
fn shell_arg_escape(arg: &str) -> String {
match arg {
"" => String::from("''"),
s if !has_unsafe_shell_characters(s) => String::from(s),
_ => {
let single_quotes_escaped = arg.split('\'').join("'\"'\"'");
format!("'{}'", single_quotes_escaped)
2021-09-29 20:25:05 +02:00
}
}
}
2021-12-03 07:15:23 +01:00
fn trim_enclosing_quotes(input: &str) -> String {
let mut chars = input.chars();
match (chars.next(), chars.next_back()) {
(Some('"'), Some('"')) => chars.collect(),
(Some('\''), Some('\'')) => chars.collect(),
_ => input.to_string(),
}
}
2021-09-29 20:25:05 +02:00
// Receiver used for the ValueStream
// It implements iterator so it can be used as a ValueStream
struct ChannelReceiver {
rx: mpsc::Receiver<Vec<u8>>,
2021-09-29 20:25:05 +02:00
}
impl ChannelReceiver {
pub fn new(rx: mpsc::Receiver<Vec<u8>>) -> Self {
Self { rx }
2021-09-29 20:25:05 +02:00
}
}
impl Iterator for ChannelReceiver {
2021-12-24 20:24:55 +01:00
type Item = Result<Vec<u8>, ShellError>;
2021-09-29 20:25:05 +02:00
fn next(&mut self) -> Option<Self::Item> {
match self.rx.recv() {
2021-12-24 20:24:55 +01:00
Ok(v) => Some(Ok(v)),
2021-09-29 20:25:05 +02:00
Err(_) => None,
}
}
}