2020-07-03 21:53:20 +02:00
|
|
|
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
2020-04-27 04:04:54 +02:00
|
|
|
use crate::commands::WholeStreamCommand;
|
2019-05-28 06:00:00 +02:00
|
|
|
use crate::prelude::*;
|
2020-07-03 21:53:20 +02:00
|
|
|
use futures_codec::FramedRead;
|
Extract core stuff into own crates
This commit extracts five new crates:
- nu-source, which contains the core source-code handling logic in Nu,
including Text, Span, and also the pretty.rs-based debug logic
- nu-parser, which is the parser and expander logic
- nu-protocol, which is the bulk of the types and basic conveniences
used by plugins
- nu-errors, which contains ShellError, ParseError and error handling
conveniences
- nu-textview, which is the textview plugin extracted into a crate
One of the major consequences of this refactor is that it's no longer
possible to `impl X for Spanned<Y>` outside of the `nu-source` crate, so
a lot of types became more concrete (Value became a concrete type
instead of Spanned<Value>, for example).
This also turned a number of inherent methods in the main nu crate into
plain functions (impl Value {} became a bunch of functions in the
`value` namespace in `crate::data::value`).
2019-11-26 03:30:48 +01:00
|
|
|
use nu_errors::ShellError;
|
2020-07-03 21:53:20 +02:00
|
|
|
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
|
2020-04-27 04:04:54 +02:00
|
|
|
use nu_source::{AnchorLocation, Span, Tagged};
|
2020-07-03 21:53:20 +02:00
|
|
|
use std::path::PathBuf;
|
2020-06-12 02:37:43 +02:00
|
|
|
extern crate encoding_rs;
|
2020-07-03 21:53:20 +02:00
|
|
|
use crate::commands::constants::BAT_LANGUAGES;
|
2020-06-12 02:37:43 +02:00
|
|
|
use encoding_rs::*;
|
2020-07-03 21:53:20 +02:00
|
|
|
use futures::prelude::*;
|
|
|
|
use log::debug;
|
2020-06-12 02:37:43 +02:00
|
|
|
use std::fs::File;
|
Extract core stuff into own crates
This commit extracts five new crates:
- nu-source, which contains the core source-code handling logic in Nu,
including Text, Span, and also the pretty.rs-based debug logic
- nu-parser, which is the parser and expander logic
- nu-protocol, which is the bulk of the types and basic conveniences
used by plugins
- nu-errors, which contains ShellError, ParseError and error handling
conveniences
- nu-textview, which is the textview plugin extracted into a crate
One of the major consequences of this refactor is that it's no longer
possible to `impl X for Spanned<Y>` outside of the `nu-source` crate, so
a lot of types became more concrete (Value became a concrete type
instead of Spanned<Value>, for example).
This also turned a number of inherent methods in the main nu crate into
plain functions (impl Value {} became a bunch of functions in the
`value` namespace in `crate::data::value`).
2019-11-26 03:30:48 +01:00
|
|
|
|
2019-07-24 00:22:11 +02:00
|
|
|
pub struct Open;
|
2019-06-22 05:43:37 +02:00
|
|
|
|
2020-04-27 04:04:54 +02:00
|
|
|
#[derive(Deserialize)]
|
|
|
|
pub struct OpenArgs {
|
|
|
|
path: Tagged<PathBuf>,
|
|
|
|
raw: Tagged<bool>,
|
2020-06-12 02:37:43 +02:00
|
|
|
encoding: Option<Tagged<String>>,
|
2020-04-27 04:04:54 +02:00
|
|
|
}
|
|
|
|
|
2020-05-29 10:22:52 +02:00
|
|
|
#[async_trait]
|
2020-04-27 04:04:54 +02:00
|
|
|
impl WholeStreamCommand for Open {
|
2019-08-02 21:15:07 +02:00
|
|
|
fn name(&self) -> &str {
|
|
|
|
"open"
|
|
|
|
}
|
|
|
|
|
|
|
|
fn signature(&self) -> Signature {
|
|
|
|
Signature::build(self.name())
|
2019-10-28 06:15:35 +01:00
|
|
|
.required(
|
|
|
|
"path",
|
|
|
|
SyntaxShape::Path,
|
|
|
|
"the file path to load values from",
|
|
|
|
)
|
2020-02-12 03:24:31 +01:00
|
|
|
.switch(
|
|
|
|
"raw",
|
|
|
|
"load content as a string instead of a table",
|
|
|
|
Some('r'),
|
|
|
|
)
|
2020-06-12 02:37:43 +02:00
|
|
|
.named(
|
|
|
|
"encoding",
|
|
|
|
SyntaxShape::String,
|
|
|
|
"encoding to use to open file",
|
|
|
|
Some('e'),
|
|
|
|
)
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
|
|
|
|
2019-08-30 00:52:32 +02:00
|
|
|
fn usage(&self) -> &str {
|
2020-06-12 02:37:43 +02:00
|
|
|
r#"Load a file into a cell, convert to table if possible (avoid by appending '--raw').
|
|
|
|
|
|
|
|
Multiple encodings are supported for reading text files by using
|
|
|
|
the '--encoding <encoding>' parameter. Here is an example of a few:
|
|
|
|
big5, euc-jp, euc-kr, gbk, iso-8859-1, utf-16, cp1252, latin5
|
|
|
|
|
|
|
|
For a more complete list of encodings please refer to the encoding_rs
|
|
|
|
documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
2019-08-30 00:52:32 +02:00
|
|
|
}
|
|
|
|
|
2020-05-29 10:22:52 +02:00
|
|
|
async fn run(
|
2019-07-24 00:22:11 +02:00
|
|
|
&self,
|
2020-04-27 04:04:54 +02:00
|
|
|
args: CommandArgs,
|
|
|
|
registry: &CommandRegistry,
|
2019-08-24 21:36:19 +02:00
|
|
|
) -> Result<OutputStream, ShellError> {
|
2020-06-08 06:48:10 +02:00
|
|
|
open(args, registry).await
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
2020-05-18 09:11:37 +02:00
|
|
|
|
2020-05-18 17:40:44 +02:00
|
|
|
fn examples(&self) -> Vec<Example> {
|
2020-06-12 02:37:43 +02:00
|
|
|
vec![
|
|
|
|
Example {
|
|
|
|
description: "Opens \"users.csv\" and creates a table from the data",
|
|
|
|
example: "open users.csv",
|
|
|
|
result: None,
|
|
|
|
},
|
|
|
|
Example {
|
|
|
|
description: "Opens file with iso-8859-1 encoding",
|
|
|
|
example: "open file.csv --encoding iso-8859-1 | from csv",
|
|
|
|
result: None,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
pub fn get_encoding(opt: Option<Tagged<String>>) -> Result<&'static Encoding, ShellError> {
|
2020-06-12 02:37:43 +02:00
|
|
|
match opt {
|
2020-07-03 21:53:20 +02:00
|
|
|
None => Ok(UTF_8),
|
|
|
|
Some(label) => match Encoding::for_label((&label.item).as_bytes()) {
|
|
|
|
None => Err(ShellError::labeled_error(
|
|
|
|
format!(
|
|
|
|
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
|
|
|
|
label.item
|
|
|
|
),
|
|
|
|
"invalid encoding",
|
|
|
|
label.span(),
|
|
|
|
)),
|
|
|
|
Some(encoding) => Ok(encoding),
|
2020-06-12 02:37:43 +02:00
|
|
|
},
|
2020-05-18 09:11:37 +02:00
|
|
|
}
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
2019-07-24 00:22:11 +02:00
|
|
|
|
2020-06-08 06:48:10 +02:00
|
|
|
async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
2020-05-16 05:18:24 +02:00
|
|
|
let cwd = PathBuf::from(args.shell_manager.path());
|
|
|
|
let registry = registry.clone();
|
2019-06-22 05:43:37 +02:00
|
|
|
|
2020-06-12 02:37:43 +02:00
|
|
|
let (
|
|
|
|
OpenArgs {
|
|
|
|
path,
|
|
|
|
raw,
|
|
|
|
encoding,
|
|
|
|
},
|
|
|
|
_,
|
|
|
|
) = args.process(®istry).await?;
|
2019-08-25 15:57:47 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// TODO: Remove once Streams are supported everywhere!
|
|
|
|
// As a short term workaround for getting AutoConvert and Bat functionality (Those don't currently support Streams)
|
2019-08-25 15:57:47 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// Check if the extension has a "from *" command OR "bat" supports syntax highlighting
|
|
|
|
// AND the user doesn't want the raw output
|
|
|
|
// In these cases, we will collect the Stream
|
|
|
|
let ext = if raw.item {
|
2020-06-08 06:48:10 +02:00
|
|
|
None
|
|
|
|
} else {
|
2020-07-03 21:53:20 +02:00
|
|
|
path.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string())
|
2019-08-02 21:15:07 +02:00
|
|
|
};
|
2019-07-24 00:22:11 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
if let Some(ext) = ext {
|
|
|
|
// Check if we have a conversion command
|
|
|
|
if let Some(_command) = registry.get_command(&format!("from {}", ext)) {
|
|
|
|
let (_, tagged_contents) = crate::commands::open::fetch(
|
|
|
|
&cwd,
|
|
|
|
&PathBuf::from(&path.item),
|
|
|
|
path.tag.span,
|
|
|
|
encoding,
|
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
return Ok(OutputStream::one(ReturnSuccess::action(
|
|
|
|
CommandAction::AutoConvert(tagged_contents, ext),
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
// Check if bat does syntax highlighting
|
|
|
|
if BAT_LANGUAGES.contains(&ext.as_ref()) {
|
|
|
|
let (_, tagged_contents) = crate::commands::open::fetch(
|
|
|
|
&cwd,
|
|
|
|
&PathBuf::from(&path.item),
|
|
|
|
path.tag.span,
|
|
|
|
encoding,
|
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
return Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)));
|
|
|
|
}
|
|
|
|
}
|
2020-06-08 06:48:10 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// Normal Streaming operation
|
|
|
|
let with_encoding = if encoding.is_none() {
|
|
|
|
None
|
2020-06-08 06:48:10 +02:00
|
|
|
} else {
|
2020-07-03 21:53:20 +02:00
|
|
|
Some(get_encoding(encoding)?)
|
|
|
|
};
|
|
|
|
let f = File::open(&path).map_err(|e| {
|
|
|
|
ShellError::labeled_error(
|
|
|
|
format!("Error opening file: {:?}", e),
|
|
|
|
"Error opening file",
|
|
|
|
path.span(),
|
|
|
|
)
|
|
|
|
})?;
|
|
|
|
let async_reader = futures::io::AllowStdIo::new(f);
|
|
|
|
let sob_stream = FramedRead::new(async_reader, MaybeTextCodec::new(with_encoding))
|
|
|
|
.map_err(|e| ShellError::unexpected(format!("AsyncRead failed in open function: {:?}", e)))
|
|
|
|
.into_stream();
|
|
|
|
|
2020-07-04 21:40:04 +02:00
|
|
|
let final_stream = sob_stream.map(move |x| {
|
|
|
|
// The tag that will used when returning a Value
|
|
|
|
let file_tag = Tag {
|
|
|
|
span: path.tag.span,
|
|
|
|
anchor: Some(AnchorLocation::File(path.to_string_lossy().to_string())),
|
|
|
|
};
|
|
|
|
|
|
|
|
match x {
|
|
|
|
Ok(StringOrBinary::String(s)) => {
|
|
|
|
ReturnSuccess::value(UntaggedValue::string(s).into_value(file_tag))
|
|
|
|
}
|
|
|
|
Ok(StringOrBinary::Binary(b)) => ReturnSuccess::value(
|
|
|
|
UntaggedValue::binary(b.into_iter().collect()).into_value(file_tag),
|
|
|
|
),
|
|
|
|
Err(se) => Err(se),
|
2020-07-03 21:53:20 +02:00
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
Ok(OutputStream::new(final_stream))
|
2019-06-22 05:43:37 +02:00
|
|
|
}
|
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// Note that we do not output a Stream in "fetch" since it is only used by "enter" command
|
|
|
|
// Which we expect to use a concrete Value a not a Stream
|
2019-08-24 21:36:19 +02:00
|
|
|
pub async fn fetch(
|
2019-07-02 09:56:20 +02:00
|
|
|
cwd: &PathBuf,
|
2020-04-27 04:04:54 +02:00
|
|
|
location: &PathBuf,
|
2019-09-18 08:37:04 +02:00
|
|
|
span: Span,
|
2020-07-03 21:53:20 +02:00
|
|
|
encoding_choice: Option<Tagged<String>>,
|
|
|
|
) -> Result<(Option<String>, Value), ShellError> {
|
|
|
|
// TODO: I don't understand the point of this? Maybe for better error reporting
|
2019-06-26 09:40:43 +02:00
|
|
|
let mut cwd = cwd.clone();
|
2020-07-03 21:53:20 +02:00
|
|
|
cwd.push(location);
|
|
|
|
let nice_location = dunce::canonicalize(&cwd).map_err(|e| {
|
|
|
|
ShellError::labeled_error(
|
|
|
|
format!("Cannot canonicalize file {:?} because {:?}", &cwd, e),
|
|
|
|
"Cannot canonicalize",
|
2020-06-12 02:37:43 +02:00
|
|
|
span,
|
2020-07-03 21:53:20 +02:00
|
|
|
)
|
|
|
|
})?;
|
2020-06-12 02:37:43 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// The extension may be used in AutoConvert later on
|
|
|
|
let ext = location
|
|
|
|
.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string());
|
2020-06-12 02:37:43 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// The tag that will used when returning a Value
|
|
|
|
let file_tag = Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
nice_location.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
};
|
2020-06-12 02:37:43 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
let res = std::fs::read(location)?;
|
2020-06-12 02:37:43 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// If no encoding is provided we try to guess the encoding to read the file with
|
|
|
|
let encoding = if encoding_choice.is_none() {
|
|
|
|
UTF_8
|
2019-08-12 06:11:42 +02:00
|
|
|
} else {
|
2020-07-03 21:53:20 +02:00
|
|
|
get_encoding(encoding_choice.clone())?
|
|
|
|
};
|
2019-08-12 06:11:42 +02:00
|
|
|
|
2020-07-03 21:53:20 +02:00
|
|
|
// If the user specified an encoding, then do not do BOM sniffing
|
|
|
|
let decoded_res = if encoding_choice.is_some() {
|
|
|
|
let (cow_res, _replacements) = encoding.decode_with_bom_removal(&res);
|
|
|
|
cow_res
|
2019-08-12 06:11:42 +02:00
|
|
|
} else {
|
2020-07-03 21:53:20 +02:00
|
|
|
// Otherwise, use the default UTF-8 encoder with BOM sniffing
|
|
|
|
let (cow_res, actual_encoding, replacements) = encoding.decode(&res);
|
|
|
|
// If we had to use replacement characters then fallback to binary
|
|
|
|
if replacements {
|
|
|
|
return Ok((ext, UntaggedValue::binary(res).into_value(file_tag)));
|
2019-08-12 06:11:42 +02:00
|
|
|
}
|
2020-07-03 21:53:20 +02:00
|
|
|
debug!("Decoded using {:?}", actual_encoding);
|
|
|
|
cow_res
|
|
|
|
};
|
|
|
|
let v = UntaggedValue::string(decoded_res.to_string()).into_value(file_tag);
|
|
|
|
Ok((ext, v))
|
2019-08-12 06:11:42 +02:00
|
|
|
}
|
2020-05-18 14:56:01 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::Open;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn examples_work_as_expected() {
|
|
|
|
use crate::examples::test as test_examples;
|
|
|
|
|
|
|
|
test_examples(Open {})
|
|
|
|
}
|
|
|
|
}
|