mirror of
https://github.com/nushell/nushell.git
synced 2024-12-25 16:39:08 +01:00
* WIP - not compiling * compiling but panicing * still broken * nearly working * reverted deserializer_string changes updated enter.rs and open.rs to use Option<Tagged<String>> Accepted Clippy suggestions Accepted fmt suggestions Left original code from open.rs We may want to use some of it and only fallback to encoding. * Don't exit when there is an unknown encoding. * When encoding is unknown default to utf-8. * only do encoding if the user says to it * merged some conflicts on open * made error messages consistent * Updated unwrap with expect * updated open test to pass with more descriptive err updated enter test to not fail * change _location to location * changed _visitor to visitor * Added a more verbose usage statement for encoding Linked to docs.rs/encoding_rs for details Co-authored-by: Darren Schroeder <fdncred@hotmail.com>
This commit is contained in:
parent
a268e825aa
commit
731aa6bbdd
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2247,6 +2247,7 @@ dependencies = [
|
||||
"dirs 2.0.2",
|
||||
"dunce",
|
||||
"eml-parser",
|
||||
"encoding_rs",
|
||||
"filesize",
|
||||
"futures 0.3.5",
|
||||
"futures-util",
|
||||
|
@ -92,6 +92,7 @@ trash = { version = "1.0.1", optional = true }
|
||||
clipboard = { version = "0.5", optional = true }
|
||||
starship = { version = "0.41.3", optional = true }
|
||||
rayon = "1.3.0"
|
||||
encoding_rs = "0.8.23"
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
users = "0.10.0"
|
||||
|
@ -14,6 +14,7 @@ pub struct Enter;
|
||||
#[derive(Deserialize)]
|
||||
pub struct EnterArgs {
|
||||
location: Tagged<PathBuf>,
|
||||
encoding: Option<Tagged<String>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@ -23,15 +24,29 @@ impl WholeStreamCommand for Enter {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("enter").required(
|
||||
"location",
|
||||
SyntaxShape::Path,
|
||||
"the location to create a new shell from",
|
||||
)
|
||||
Signature::build("enter")
|
||||
.required(
|
||||
"location",
|
||||
SyntaxShape::Path,
|
||||
"the location to create a new shell from",
|
||||
)
|
||||
.named(
|
||||
"encoding",
|
||||
SyntaxShape::String,
|
||||
"encoding to use to open file",
|
||||
Some('e'),
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Create a new shell and begin at this path."
|
||||
r#"Create a new shell and begin at this path.
|
||||
|
||||
Multiple encodings are supported for reading text files by using
|
||||
the '--encoding <encoding>' parameter. Here is an example of a few:
|
||||
big5, euc-jp, euc-kr, gbk, iso-8859-1, utf-16, cp1252, latin5
|
||||
|
||||
For a more complete list of encodings please refer to the encoding_rs
|
||||
documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
||||
}
|
||||
|
||||
async fn run(
|
||||
@ -54,6 +69,11 @@ impl WholeStreamCommand for Enter {
|
||||
example: "enter package.json",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Enters file with iso-8859-1 encoding",
|
||||
example: "enter file.csv --encoding iso-8859-1",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -68,7 +88,7 @@ fn enter(raw_args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStre
|
||||
let current_errors = raw_args.current_errors.clone();
|
||||
let host = raw_args.host.clone();
|
||||
let tag = raw_args.call_info.name_tag.clone();
|
||||
let (EnterArgs { location }, _) = raw_args.process(®istry).await?;
|
||||
let (EnterArgs { location, encoding }, _) = raw_args.process(®istry).await?;
|
||||
let location_string = location.display().to_string();
|
||||
let location_clone = location_string.clone();
|
||||
|
||||
@ -103,6 +123,10 @@ fn enter(raw_args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStre
|
||||
&full_path,
|
||||
&PathBuf::from(location_clone),
|
||||
tag.span,
|
||||
match encoding {
|
||||
Some(e) => e.to_string(),
|
||||
_ => "".to_string()
|
||||
}
|
||||
).await?;
|
||||
|
||||
match contents {
|
||||
|
@ -4,6 +4,12 @@ use nu_errors::ShellError;
|
||||
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue};
|
||||
use nu_source::{AnchorLocation, Span, Tagged};
|
||||
use std::path::{Path, PathBuf};
|
||||
extern crate encoding_rs;
|
||||
use encoding_rs::*;
|
||||
use std::fs::File;
|
||||
use std::io::BufWriter;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
pub struct Open;
|
||||
|
||||
@ -11,6 +17,7 @@ pub struct Open;
|
||||
pub struct OpenArgs {
|
||||
path: Tagged<PathBuf>,
|
||||
raw: Tagged<bool>,
|
||||
encoding: Option<Tagged<String>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@ -31,10 +38,23 @@ impl WholeStreamCommand for Open {
|
||||
"load content as a string instead of a table",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"encoding",
|
||||
SyntaxShape::String,
|
||||
"encoding to use to open file",
|
||||
Some('e'),
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Load a file into a cell, convert to table if possible (avoid by appending '--raw')"
|
||||
r#"Load a file into a cell, convert to table if possible (avoid by appending '--raw').
|
||||
|
||||
Multiple encodings are supported for reading text files by using
|
||||
the '--encoding <encoding>' parameter. Here is an example of a few:
|
||||
big5, euc-jp, euc-kr, gbk, iso-8859-1, utf-16, cp1252, latin5
|
||||
|
||||
For a more complete list of encodings please refer to the encoding_rs
|
||||
documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
||||
}
|
||||
|
||||
async fn run(
|
||||
@ -46,11 +66,32 @@ impl WholeStreamCommand for Open {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Opens \"users.csv\" and creates a table from the data",
|
||||
example: "open users.csv",
|
||||
result: None,
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Opens \"users.csv\" and creates a table from the data",
|
||||
example: "open users.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Opens file with iso-8859-1 encoding",
|
||||
example: "open file.csv --encoding iso-8859-1 | from csv",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_encoding(opt: Option<String>) -> &'static Encoding {
|
||||
match opt {
|
||||
None => UTF_8,
|
||||
Some(label) => match Encoding::for_label((&label).as_bytes()) {
|
||||
None => {
|
||||
//print!("{} is not a known encoding label. Trying UTF-8.", label);
|
||||
//std::process::exit(-2);
|
||||
get_encoding(Some("utf-8".to_string()))
|
||||
}
|
||||
Some(encoding) => encoding,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,8 +100,19 @@ async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStr
|
||||
let full_path = cwd;
|
||||
let registry = registry.clone();
|
||||
|
||||
let (OpenArgs { path, raw }, _) = args.process(®istry).await?;
|
||||
let result = fetch(&full_path, &path.item, path.tag.span).await;
|
||||
let (
|
||||
OpenArgs {
|
||||
path,
|
||||
raw,
|
||||
encoding,
|
||||
},
|
||||
_,
|
||||
) = args.process(®istry).await?;
|
||||
let enc = match encoding {
|
||||
Some(e) => e.to_string(),
|
||||
_ => "".to_string(),
|
||||
};
|
||||
let result = fetch(&full_path, &path.item, path.tag.span, enc).await;
|
||||
|
||||
let (file_extension, contents, contents_tag) = result?;
|
||||
|
||||
@ -87,9 +139,173 @@ pub async fn fetch(
|
||||
cwd: &PathBuf,
|
||||
location: &PathBuf,
|
||||
span: Span,
|
||||
encoding: String,
|
||||
) -> Result<(Option<String>, UntaggedValue, Tag), ShellError> {
|
||||
let mut cwd = cwd.clone();
|
||||
let output_encoding: &Encoding = get_encoding(Some("utf-8".to_string()));
|
||||
let input_encoding: &Encoding = get_encoding(Some(encoding.clone()));
|
||||
let mut decoder = input_encoding.new_decoder();
|
||||
let mut encoder = output_encoding.new_encoder();
|
||||
let mut _file: File;
|
||||
let buf = Vec::new();
|
||||
let mut bufwriter = BufWriter::new(buf);
|
||||
|
||||
cwd.push(Path::new(location));
|
||||
if let Ok(cwd) = dunce::canonicalize(&cwd) {
|
||||
if !encoding.is_empty() {
|
||||
// use the encoding string
|
||||
match File::open(&Path::new(&cwd)) {
|
||||
Ok(mut _file) => {
|
||||
convert_via_utf8(
|
||||
&mut decoder,
|
||||
&mut encoder,
|
||||
&mut _file,
|
||||
&mut bufwriter,
|
||||
false,
|
||||
);
|
||||
//bufwriter.flush()?;
|
||||
Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(String::from_utf8_lossy(&bufwriter.buffer())),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
||||
},
|
||||
))
|
||||
}
|
||||
Err(_) => Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
span,
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
// Do the old stuff
|
||||
match std::fs::read(&cwd) {
|
||||
Ok(bytes) => match std::str::from_utf8(&bytes) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
||||
},
|
||||
)),
|
||||
Err(_) => {
|
||||
//Non utf8 data.
|
||||
match (bytes.get(0), bytes.get(1)) {
|
||||
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
|
||||
// Possibly UTF-16 little endian
|
||||
let utf16 = read_le_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
Err(_) => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
|
||||
// Possibly UTF-16 big endian
|
||||
let utf16 = read_be_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
UntaggedValue::string(s),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
Err(_) => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => Ok((
|
||||
None,
|
||||
UntaggedValue::binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
anchor: Some(AnchorLocation::File(
|
||||
cwd.to_string_lossy().to_string(),
|
||||
)),
|
||||
},
|
||||
)),
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(_) => Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
format!("Cannot open {:?} for reading.", &cwd),
|
||||
"file not found",
|
||||
span,
|
||||
))
|
||||
}
|
||||
/*
|
||||
cwd.push(Path::new(location));
|
||||
if let Ok(cwd) = dunce::canonicalize(cwd) {
|
||||
match std::fs::read(&cwd) {
|
||||
@ -214,6 +430,103 @@ pub async fn fetch(
|
||||
span,
|
||||
))
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
fn convert_via_utf8(
|
||||
decoder: &mut Decoder,
|
||||
encoder: &mut Encoder,
|
||||
read: &mut dyn Read,
|
||||
write: &mut dyn Write,
|
||||
last: bool,
|
||||
) {
|
||||
let mut input_buffer = [0u8; 2048];
|
||||
let mut intermediate_buffer_bytes = [0u8; 4096];
|
||||
// Is there a safe way to create a stack-allocated &mut str?
|
||||
let mut intermediate_buffer: &mut str =
|
||||
//unsafe { std::mem::transmute(&mut intermediate_buffer_bytes[..]) };
|
||||
std::str::from_utf8_mut(&mut intermediate_buffer_bytes[..]).expect("error with from_utf8_mut");
|
||||
let mut output_buffer = [0u8; 4096];
|
||||
let mut current_input_ended = false;
|
||||
while !current_input_ended {
|
||||
match read.read(&mut input_buffer) {
|
||||
Err(_) => {
|
||||
print!("Error reading input.");
|
||||
//std::process::exit(-5);
|
||||
}
|
||||
Ok(decoder_input_end) => {
|
||||
current_input_ended = decoder_input_end == 0;
|
||||
let input_ended = last && current_input_ended;
|
||||
let mut decoder_input_start = 0usize;
|
||||
loop {
|
||||
let (decoder_result, decoder_read, decoder_written, _) = decoder.decode_to_str(
|
||||
&input_buffer[decoder_input_start..decoder_input_end],
|
||||
&mut intermediate_buffer,
|
||||
input_ended,
|
||||
);
|
||||
decoder_input_start += decoder_read;
|
||||
|
||||
let last_output = if input_ended {
|
||||
match decoder_result {
|
||||
CoderResult::InputEmpty => true,
|
||||
CoderResult::OutputFull => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// Regardless of whether the intermediate buffer got full
|
||||
// or the input buffer was exhausted, let's process what's
|
||||
// in the intermediate buffer.
|
||||
|
||||
if encoder.encoding() == UTF_8 {
|
||||
// If the target is UTF-8, optimize out the encoder.
|
||||
if write
|
||||
.write_all(&intermediate_buffer.as_bytes()[..decoder_written])
|
||||
.is_err()
|
||||
{
|
||||
print!("Error writing output.");
|
||||
//std::process::exit(-7);
|
||||
}
|
||||
} else {
|
||||
let mut encoder_input_start = 0usize;
|
||||
loop {
|
||||
let (encoder_result, encoder_read, encoder_written, _) = encoder
|
||||
.encode_from_utf8(
|
||||
&intermediate_buffer[encoder_input_start..decoder_written],
|
||||
&mut output_buffer,
|
||||
last_output,
|
||||
);
|
||||
encoder_input_start += encoder_read;
|
||||
if write.write_all(&output_buffer[..encoder_written]).is_err() {
|
||||
print!("Error writing output.");
|
||||
//std::process::exit(-6);
|
||||
}
|
||||
match encoder_result {
|
||||
CoderResult::InputEmpty => {
|
||||
break;
|
||||
}
|
||||
CoderResult::OutputFull => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now let's see if we should read again or process the
|
||||
// rest of the current input buffer.
|
||||
match decoder_result {
|
||||
CoderResult::InputEmpty => {
|
||||
break;
|
||||
}
|
||||
CoderResult::OutputFull => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
|
||||
|
@ -80,7 +80,7 @@ fn errors_if_file_not_found() {
|
||||
"enter i_dont_exist.csv"
|
||||
);
|
||||
|
||||
assert!(actual.err.contains("File could not be opened"));
|
||||
//assert!(actual.err.contains("File could not be opened"));
|
||||
assert!(actual.err.contains("file not found"));
|
||||
})
|
||||
}
|
||||
|
@ -225,6 +225,6 @@ fn errors_if_file_not_found() {
|
||||
"open i_dont_exist.txt"
|
||||
);
|
||||
|
||||
assert!(actual.err.contains("File could not be opened"));
|
||||
assert!(actual.err.contains("file not found"));
|
||||
//assert!(actual.err.contains("File could not be opened"));
|
||||
assert!(actual.err.contains("Cannot open"));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user