2020-04-27 04:04:54 +02:00
|
|
|
use crate::commands::WholeStreamCommand;
|
2019-05-28 06:00:00 +02:00
|
|
|
use crate::prelude::*;
|
Extract core stuff into own crates
This commit extracts five new crates:
- nu-source, which contains the core source-code handling logic in Nu,
including Text, Span, and also the pretty.rs-based debug logic
- nu-parser, which is the parser and expander logic
- nu-protocol, which is the bulk of the types and basic conveniences
used by plugins
- nu-errors, which contains ShellError, ParseError and error handling
conveniences
- nu-textview, which is the textview plugin extracted into a crate
One of the major consequences of this refactor is that it's no longer
possible to `impl X for Spanned<Y>` outside of the `nu-source` crate, so
a lot of types became more concrete (Value became a concrete type
instead of Spanned<Value>, for example).
This also turned a number of inherent methods in the main nu crate into
plain functions (impl Value {} became a bunch of functions in the
`value` namespace in `crate::data::value`).
2019-11-26 03:30:48 +01:00
|
|
|
use nu_errors::ShellError;
|
2020-04-27 04:04:54 +02:00
|
|
|
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue};
|
|
|
|
use nu_source::{AnchorLocation, Span, Tagged};
|
2019-06-03 09:41:28 +02:00
|
|
|
use std::path::{Path, PathBuf};
|
2020-06-12 02:37:43 +02:00
|
|
|
extern crate encoding_rs;
|
|
|
|
use encoding_rs::*;
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io::BufWriter;
|
|
|
|
use std::io::Read;
|
|
|
|
use std::io::Write;
|
Extract core stuff into own crates
This commit extracts five new crates:
- nu-source, which contains the core source-code handling logic in Nu,
including Text, Span, and also the pretty.rs-based debug logic
- nu-parser, which is the parser and expander logic
- nu-protocol, which is the bulk of the types and basic conveniences
used by plugins
- nu-errors, which contains ShellError, ParseError and error handling
conveniences
- nu-textview, which is the textview plugin extracted into a crate
One of the major consequences of this refactor is that it's no longer
possible to `impl X for Spanned<Y>` outside of the `nu-source` crate, so
a lot of types became more concrete (Value became a concrete type
instead of Spanned<Value>, for example).
This also turned a number of inherent methods in the main nu crate into
plain functions (impl Value {} became a bunch of functions in the
`value` namespace in `crate::data::value`).
2019-11-26 03:30:48 +01:00
|
|
|
|
2019-07-24 00:22:11 +02:00
|
|
|
pub struct Open;
|
2019-06-22 05:43:37 +02:00
|
|
|
|
2020-04-27 04:04:54 +02:00
|
|
|
#[derive(Deserialize)]
|
|
|
|
pub struct OpenArgs {
|
|
|
|
path: Tagged<PathBuf>,
|
|
|
|
raw: Tagged<bool>,
|
2020-06-12 02:37:43 +02:00
|
|
|
encoding: Option<Tagged<String>>,
|
2020-04-27 04:04:54 +02:00
|
|
|
}
|
|
|
|
|
2020-05-29 10:22:52 +02:00
|
|
|
#[async_trait]
|
2020-04-27 04:04:54 +02:00
|
|
|
impl WholeStreamCommand for Open {
|
2019-08-02 21:15:07 +02:00
|
|
|
fn name(&self) -> &str {
|
|
|
|
"open"
|
|
|
|
}
|
|
|
|
|
|
|
|
fn signature(&self) -> Signature {
|
|
|
|
Signature::build(self.name())
|
2019-10-28 06:15:35 +01:00
|
|
|
.required(
|
|
|
|
"path",
|
|
|
|
SyntaxShape::Path,
|
|
|
|
"the file path to load values from",
|
|
|
|
)
|
2020-02-12 03:24:31 +01:00
|
|
|
.switch(
|
|
|
|
"raw",
|
|
|
|
"load content as a string instead of a table",
|
|
|
|
Some('r'),
|
|
|
|
)
|
2020-06-12 02:37:43 +02:00
|
|
|
.named(
|
|
|
|
"encoding",
|
|
|
|
SyntaxShape::String,
|
|
|
|
"encoding to use to open file",
|
|
|
|
Some('e'),
|
|
|
|
)
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
|
|
|
|
2019-08-30 00:52:32 +02:00
|
|
|
fn usage(&self) -> &str {
|
2020-06-12 02:37:43 +02:00
|
|
|
r#"Load a file into a cell, convert to table if possible (avoid by appending '--raw').
|
|
|
|
|
|
|
|
Multiple encodings are supported for reading text files by using
|
|
|
|
the '--encoding <encoding>' parameter. Here is an example of a few:
|
|
|
|
big5, euc-jp, euc-kr, gbk, iso-8859-1, utf-16, cp1252, latin5
|
|
|
|
|
|
|
|
For a more complete list of encodings please refer to the encoding_rs
|
|
|
|
documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
2019-08-30 00:52:32 +02:00
|
|
|
}
|
|
|
|
|
2020-05-29 10:22:52 +02:00
|
|
|
async fn run(
|
2019-07-24 00:22:11 +02:00
|
|
|
&self,
|
2020-04-27 04:04:54 +02:00
|
|
|
args: CommandArgs,
|
|
|
|
registry: &CommandRegistry,
|
2019-08-24 21:36:19 +02:00
|
|
|
) -> Result<OutputStream, ShellError> {
|
2020-06-08 06:48:10 +02:00
|
|
|
open(args, registry).await
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
2020-05-18 09:11:37 +02:00
|
|
|
|
2020-05-18 17:40:44 +02:00
|
|
|
fn examples(&self) -> Vec<Example> {
|
2020-06-12 02:37:43 +02:00
|
|
|
vec![
|
|
|
|
Example {
|
|
|
|
description: "Opens \"users.csv\" and creates a table from the data",
|
|
|
|
example: "open users.csv",
|
|
|
|
result: None,
|
|
|
|
},
|
|
|
|
Example {
|
|
|
|
description: "Opens file with iso-8859-1 encoding",
|
|
|
|
example: "open file.csv --encoding iso-8859-1 | from csv",
|
|
|
|
result: None,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_encoding(opt: Option<String>) -> &'static Encoding {
|
|
|
|
match opt {
|
|
|
|
None => UTF_8,
|
|
|
|
Some(label) => match Encoding::for_label((&label).as_bytes()) {
|
|
|
|
None => {
|
|
|
|
//print!("{} is not a known encoding label. Trying UTF-8.", label);
|
|
|
|
//std::process::exit(-2);
|
|
|
|
get_encoding(Some("utf-8".to_string()))
|
|
|
|
}
|
|
|
|
Some(encoding) => encoding,
|
|
|
|
},
|
2020-05-18 09:11:37 +02:00
|
|
|
}
|
2019-08-02 21:15:07 +02:00
|
|
|
}
|
2019-07-24 00:22:11 +02:00
|
|
|
|
2020-06-08 06:48:10 +02:00
|
|
|
async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
2020-05-16 05:18:24 +02:00
|
|
|
let cwd = PathBuf::from(args.shell_manager.path());
|
2019-12-06 16:28:26 +01:00
|
|
|
let full_path = cwd;
|
2020-05-16 05:18:24 +02:00
|
|
|
let registry = registry.clone();
|
2019-06-22 05:43:37 +02:00
|
|
|
|
2020-06-12 02:37:43 +02:00
|
|
|
let (
|
|
|
|
OpenArgs {
|
|
|
|
path,
|
|
|
|
raw,
|
|
|
|
encoding,
|
|
|
|
},
|
|
|
|
_,
|
|
|
|
) = args.process(®istry).await?;
|
|
|
|
let enc = match encoding {
|
|
|
|
Some(e) => e.to_string(),
|
|
|
|
_ => "".to_string(),
|
|
|
|
};
|
|
|
|
let result = fetch(&full_path, &path.item, path.tag.span, enc).await;
|
2019-08-25 15:57:47 +02:00
|
|
|
|
2020-06-08 06:48:10 +02:00
|
|
|
let (file_extension, contents, contents_tag) = result?;
|
2019-08-25 15:57:47 +02:00
|
|
|
|
2020-06-08 06:48:10 +02:00
|
|
|
let file_extension = if raw.item {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
// If the extension could not be determined via mimetype, try to use the path
|
|
|
|
// extension. Some file types do not declare their mimetypes (such as bson files).
|
|
|
|
file_extension.or_else(|| path.extension().map(|x| x.to_string_lossy().to_string()))
|
2019-08-02 21:15:07 +02:00
|
|
|
};
|
2019-07-24 00:22:11 +02:00
|
|
|
|
2020-06-08 06:48:10 +02:00
|
|
|
let tagged_contents = contents.into_value(&contents_tag);
|
|
|
|
|
|
|
|
if let Some(extension) = file_extension {
|
|
|
|
Ok(OutputStream::one(ReturnSuccess::action(
|
|
|
|
CommandAction::AutoConvert(tagged_contents, extension),
|
|
|
|
)))
|
|
|
|
} else {
|
|
|
|
Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)))
|
|
|
|
}
|
2019-06-22 05:43:37 +02:00
|
|
|
}
|
|
|
|
|
2019-08-24 21:36:19 +02:00
|
|
|
pub async fn fetch(
|
2019-07-02 09:56:20 +02:00
|
|
|
cwd: &PathBuf,
|
2020-04-27 04:04:54 +02:00
|
|
|
location: &PathBuf,
|
2019-09-18 08:37:04 +02:00
|
|
|
span: Span,
|
2020-06-12 02:37:43 +02:00
|
|
|
encoding: String,
|
2019-11-21 15:33:14 +01:00
|
|
|
) -> Result<(Option<String>, UntaggedValue, Tag), ShellError> {
|
2019-06-26 09:40:43 +02:00
|
|
|
let mut cwd = cwd.clone();
|
2020-06-12 02:37:43 +02:00
|
|
|
let output_encoding: &Encoding = get_encoding(Some("utf-8".to_string()));
|
|
|
|
let input_encoding: &Encoding = get_encoding(Some(encoding.clone()));
|
|
|
|
let mut decoder = input_encoding.new_decoder();
|
|
|
|
let mut encoder = output_encoding.new_encoder();
|
|
|
|
let mut _file: File;
|
|
|
|
let buf = Vec::new();
|
|
|
|
let mut bufwriter = BufWriter::new(buf);
|
|
|
|
|
|
|
|
cwd.push(Path::new(location));
|
|
|
|
if let Ok(cwd) = dunce::canonicalize(&cwd) {
|
|
|
|
if !encoding.is_empty() {
|
|
|
|
// use the encoding string
|
|
|
|
match File::open(&Path::new(&cwd)) {
|
|
|
|
Ok(mut _file) => {
|
|
|
|
convert_via_utf8(
|
|
|
|
&mut decoder,
|
|
|
|
&mut encoder,
|
|
|
|
&mut _file,
|
|
|
|
&mut bufwriter,
|
|
|
|
false,
|
|
|
|
);
|
|
|
|
//bufwriter.flush()?;
|
|
|
|
Ok((
|
|
|
|
cwd.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string()),
|
|
|
|
UntaggedValue::string(String::from_utf8_lossy(&bufwriter.buffer())),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
|
|
|
},
|
|
|
|
))
|
|
|
|
}
|
|
|
|
Err(_) => Err(ShellError::labeled_error(
|
|
|
|
format!("Cannot open {:?} for reading.", &cwd),
|
|
|
|
"file not found",
|
|
|
|
span,
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Do the old stuff
|
|
|
|
match std::fs::read(&cwd) {
|
|
|
|
Ok(bytes) => match std::str::from_utf8(&bytes) {
|
|
|
|
Ok(s) => Ok((
|
|
|
|
cwd.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string()),
|
|
|
|
UntaggedValue::string(s),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
Err(_) => {
|
|
|
|
//Non utf8 data.
|
|
|
|
match (bytes.get(0), bytes.get(1)) {
|
|
|
|
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
|
|
|
|
// Possibly UTF-16 little endian
|
|
|
|
let utf16 = read_le_u16(&bytes[2..]);
|
|
|
|
|
|
|
|
if let Some(utf16) = utf16 {
|
|
|
|
match std::string::String::from_utf16(&utf16) {
|
|
|
|
Ok(s) => Ok((
|
|
|
|
cwd.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string()),
|
|
|
|
UntaggedValue::string(s),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
Err(_) => Ok((
|
|
|
|
None,
|
|
|
|
UntaggedValue::binary(bytes),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Ok((
|
|
|
|
None,
|
|
|
|
UntaggedValue::binary(bytes),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
|
|
|
|
// Possibly UTF-16 big endian
|
|
|
|
let utf16 = read_be_u16(&bytes[2..]);
|
2019-06-26 09:40:43 +02:00
|
|
|
|
2020-06-12 02:37:43 +02:00
|
|
|
if let Some(utf16) = utf16 {
|
|
|
|
match std::string::String::from_utf16(&utf16) {
|
|
|
|
Ok(s) => Ok((
|
|
|
|
cwd.extension()
|
|
|
|
.map(|name| name.to_string_lossy().to_string()),
|
|
|
|
UntaggedValue::string(s),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
Err(_) => Ok((
|
|
|
|
None,
|
|
|
|
UntaggedValue::binary(bytes),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Ok((
|
|
|
|
None,
|
|
|
|
UntaggedValue::binary(bytes),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => Ok((
|
|
|
|
None,
|
|
|
|
UntaggedValue::binary(bytes),
|
|
|
|
Tag {
|
|
|
|
span,
|
|
|
|
anchor: Some(AnchorLocation::File(
|
|
|
|
cwd.to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
},
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Err(_) => Err(ShellError::labeled_error(
|
|
|
|
format!("Cannot open {:?} for reading.", &cwd),
|
|
|
|
"file not found",
|
|
|
|
span,
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Err(ShellError::labeled_error(
|
|
|
|
format!("Cannot open {:?} for reading.", &cwd),
|
|
|
|
"file not found",
|
|
|
|
span,
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn convert_via_utf8(
|
|
|
|
decoder: &mut Decoder,
|
|
|
|
encoder: &mut Encoder,
|
|
|
|
read: &mut dyn Read,
|
|
|
|
write: &mut dyn Write,
|
|
|
|
last: bool,
|
|
|
|
) {
|
|
|
|
let mut input_buffer = [0u8; 2048];
|
|
|
|
let mut intermediate_buffer_bytes = [0u8; 4096];
|
|
|
|
// Is there a safe way to create a stack-allocated &mut str?
|
|
|
|
let mut intermediate_buffer: &mut str =
|
|
|
|
//unsafe { std::mem::transmute(&mut intermediate_buffer_bytes[..]) };
|
|
|
|
std::str::from_utf8_mut(&mut intermediate_buffer_bytes[..]).expect("error with from_utf8_mut");
|
|
|
|
let mut output_buffer = [0u8; 4096];
|
|
|
|
let mut current_input_ended = false;
|
|
|
|
while !current_input_ended {
|
|
|
|
match read.read(&mut input_buffer) {
|
|
|
|
Err(_) => {
|
|
|
|
print!("Error reading input.");
|
|
|
|
//std::process::exit(-5);
|
|
|
|
}
|
|
|
|
Ok(decoder_input_end) => {
|
|
|
|
current_input_ended = decoder_input_end == 0;
|
|
|
|
let input_ended = last && current_input_ended;
|
|
|
|
let mut decoder_input_start = 0usize;
|
|
|
|
loop {
|
|
|
|
let (decoder_result, decoder_read, decoder_written, _) = decoder.decode_to_str(
|
|
|
|
&input_buffer[decoder_input_start..decoder_input_end],
|
|
|
|
&mut intermediate_buffer,
|
|
|
|
input_ended,
|
|
|
|
);
|
|
|
|
decoder_input_start += decoder_read;
|
|
|
|
|
|
|
|
let last_output = if input_ended {
|
|
|
|
match decoder_result {
|
|
|
|
CoderResult::InputEmpty => true,
|
|
|
|
CoderResult::OutputFull => false,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
};
|
|
|
|
|
|
|
|
// Regardless of whether the intermediate buffer got full
|
|
|
|
// or the input buffer was exhausted, let's process what's
|
|
|
|
// in the intermediate buffer.
|
|
|
|
|
|
|
|
if encoder.encoding() == UTF_8 {
|
|
|
|
// If the target is UTF-8, optimize out the encoder.
|
|
|
|
if write
|
|
|
|
.write_all(&intermediate_buffer.as_bytes()[..decoder_written])
|
|
|
|
.is_err()
|
|
|
|
{
|
|
|
|
print!("Error writing output.");
|
|
|
|
//std::process::exit(-7);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
let mut encoder_input_start = 0usize;
|
|
|
|
loop {
|
|
|
|
let (encoder_result, encoder_read, encoder_written, _) = encoder
|
|
|
|
.encode_from_utf8(
|
|
|
|
&intermediate_buffer[encoder_input_start..decoder_written],
|
|
|
|
&mut output_buffer,
|
|
|
|
last_output,
|
|
|
|
);
|
|
|
|
encoder_input_start += encoder_read;
|
|
|
|
if write.write_all(&output_buffer[..encoder_written]).is_err() {
|
|
|
|
print!("Error writing output.");
|
|
|
|
//std::process::exit(-6);
|
|
|
|
}
|
|
|
|
match encoder_result {
|
|
|
|
CoderResult::InputEmpty => {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
CoderResult::OutputFull => {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now let's see if we should read again or process the
|
|
|
|
// rest of the current input buffer.
|
|
|
|
match decoder_result {
|
|
|
|
CoderResult::InputEmpty => {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
CoderResult::OutputFull => {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-26 09:40:43 +02:00
|
|
|
}
|
|
|
|
|
2019-08-12 06:11:42 +02:00
|
|
|
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
|
|
|
|
if input.len() % 2 != 0 || input.len() < 2 {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let mut result = vec![];
|
|
|
|
let mut pos = 0;
|
|
|
|
while pos < input.len() {
|
|
|
|
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
|
|
|
|
pos += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(result)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
|
|
|
|
if input.len() % 2 != 0 || input.len() < 2 {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let mut result = vec![];
|
|
|
|
let mut pos = 0;
|
|
|
|
while pos < input.len() {
|
|
|
|
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
|
|
|
|
pos += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(result)
|
|
|
|
}
|
|
|
|
}
|
2020-05-18 14:56:01 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::Open;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn examples_work_as_expected() {
|
|
|
|
use crate::examples::test as test_examples;
|
|
|
|
|
|
|
|
test_examples(Open {})
|
|
|
|
}
|
|
|
|
}
|