forked from extern/nushell
Convert open/fetch to stream (#2028)
* Types lined up for open with stream * Chunking stream * Maybe I didn't need most of the Stream stuff after all? * Some clean-up * Merge weird cargo.lock * Start moving some encoding logic to MaybeTextCodec Will we lose the nice table formatting if we Stream? How do we get it back? Collect the Stream at the end? * Clean-up and small refinements * Put in auto-convert workaround * Workaround to make sure bat functionality works * Handle some easy error cases * All tests pass * Remove guessing logic * Address clippy comments * Pull latest master and fix MaybeTextCodec usage * Add tag to enable autoview
This commit is contained in:
parent
8775991c2d
commit
e31e8d1550
@ -1,5 +1,5 @@
|
|||||||
use crate::commands::classified::block::run_block;
|
use crate::commands::classified::block::run_block;
|
||||||
use crate::commands::classified::external::{MaybeTextCodec, StringOrBinary};
|
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||||
use crate::commands::plugin::JsonRpc;
|
use crate::commands::plugin::JsonRpc;
|
||||||
use crate::commands::plugin::{PluginCommand, PluginSink};
|
use crate::commands::plugin::{PluginCommand, PluginSink};
|
||||||
use crate::commands::whole_stream_command;
|
use crate::commands::whole_stream_command;
|
||||||
@ -953,7 +953,7 @@ pub async fn process_line(
|
|||||||
|
|
||||||
let input_stream = if redirect_stdin {
|
let input_stream = if redirect_stdin {
|
||||||
let file = futures::io::AllowStdIo::new(std::io::stdin());
|
let file = futures::io::AllowStdIo::new(std::io::stdin());
|
||||||
let stream = FramedRead::new(file, MaybeTextCodec).map(|line| {
|
let stream = FramedRead::new(file, MaybeTextCodec::default()).map(|line| {
|
||||||
if let Ok(line) = line {
|
if let Ok(line) = line {
|
||||||
match line {
|
match line {
|
||||||
StringOrBinary::String(s) => Ok(Value {
|
StringOrBinary::String(s) => Ok(Value {
|
||||||
|
@ -20,6 +20,7 @@ pub(crate) mod clip;
|
|||||||
pub(crate) mod command;
|
pub(crate) mod command;
|
||||||
pub(crate) mod compact;
|
pub(crate) mod compact;
|
||||||
pub(crate) mod config;
|
pub(crate) mod config;
|
||||||
|
pub(crate) mod constants;
|
||||||
pub(crate) mod count;
|
pub(crate) mod count;
|
||||||
pub(crate) mod cp;
|
pub(crate) mod cp;
|
||||||
pub(crate) mod date;
|
pub(crate) mod date;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||||
use crate::evaluate::evaluate_baseline_expr;
|
use crate::evaluate::evaluate_baseline_expr;
|
||||||
use crate::futures::ThreadedReceiver;
|
use crate::futures::ThreadedReceiver;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
@ -7,9 +8,7 @@ use std::ops::Deref;
|
|||||||
use std::process::{Command, Stdio};
|
use std::process::{Command, Stdio};
|
||||||
use std::sync::mpsc;
|
use std::sync::mpsc;
|
||||||
|
|
||||||
use bytes::{BufMut, Bytes, BytesMut};
|
|
||||||
use futures::executor::block_on_stream;
|
use futures::executor::block_on_stream;
|
||||||
// use futures::stream::StreamExt;
|
|
||||||
use futures_codec::FramedRead;
|
use futures_codec::FramedRead;
|
||||||
use log::trace;
|
use log::trace;
|
||||||
|
|
||||||
@ -18,70 +17,6 @@ use nu_protocol::hir::ExternalCommand;
|
|||||||
use nu_protocol::{Primitive, Scope, ShellTypeName, UntaggedValue, Value};
|
use nu_protocol::{Primitive, Scope, ShellTypeName, UntaggedValue, Value};
|
||||||
use nu_source::Tag;
|
use nu_source::Tag;
|
||||||
|
|
||||||
pub enum StringOrBinary {
|
|
||||||
String(String),
|
|
||||||
Binary(Vec<u8>),
|
|
||||||
}
|
|
||||||
pub struct MaybeTextCodec;
|
|
||||||
|
|
||||||
impl futures_codec::Encoder for MaybeTextCodec {
|
|
||||||
type Item = StringOrBinary;
|
|
||||||
type Error = std::io::Error;
|
|
||||||
|
|
||||||
fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
|
|
||||||
match item {
|
|
||||||
StringOrBinary::String(s) => {
|
|
||||||
dst.reserve(s.len());
|
|
||||||
dst.put(s.as_bytes());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
StringOrBinary::Binary(b) => {
|
|
||||||
dst.reserve(b.len());
|
|
||||||
dst.put(Bytes::from(b));
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl futures_codec::Decoder for MaybeTextCodec {
|
|
||||||
type Item = StringOrBinary;
|
|
||||||
type Error = std::io::Error;
|
|
||||||
|
|
||||||
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
|
|
||||||
let v: Vec<u8> = src.to_vec();
|
|
||||||
match String::from_utf8(v) {
|
|
||||||
Ok(s) => {
|
|
||||||
src.clear();
|
|
||||||
if s.is_empty() {
|
|
||||||
Ok(None)
|
|
||||||
} else {
|
|
||||||
Ok(Some(StringOrBinary::String(s)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
// Note: the longest UTF-8 character per Unicode spec is currently 6 bytes. If we fail somewhere earlier than the last 6 bytes,
|
|
||||||
// we know that we're failing to understand the string encoding and not just seeing a partial character. When this happens, let's
|
|
||||||
// fall back to assuming it's a binary buffer.
|
|
||||||
if src.is_empty() {
|
|
||||||
Ok(None)
|
|
||||||
} else if src.len() > 6 && (src.len() - err.utf8_error().valid_up_to() > 6) {
|
|
||||||
// Fall back to assuming binary
|
|
||||||
let buf = src.to_vec();
|
|
||||||
src.clear();
|
|
||||||
Ok(Some(StringOrBinary::Binary(buf)))
|
|
||||||
} else {
|
|
||||||
// Looks like a utf-8 string, so let's assume that
|
|
||||||
let buf = src.split_to(err.utf8_error().valid_up_to() + 1);
|
|
||||||
String::from_utf8(buf.to_vec())
|
|
||||||
.map(|x| Some(StringOrBinary::String(x)))
|
|
||||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn run_external_command(
|
pub(crate) async fn run_external_command(
|
||||||
command: ExternalCommand,
|
command: ExternalCommand,
|
||||||
context: &mut Context,
|
context: &mut Context,
|
||||||
@ -319,7 +254,7 @@ fn spawn(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let file = futures::io::AllowStdIo::new(stdout);
|
let file = futures::io::AllowStdIo::new(stdout);
|
||||||
let stream = FramedRead::new(file, MaybeTextCodec);
|
let stream = FramedRead::new(file, MaybeTextCodec::default());
|
||||||
|
|
||||||
for line in block_on_stream(stream) {
|
for line in block_on_stream(stream) {
|
||||||
match line {
|
match line {
|
||||||
@ -373,7 +308,7 @@ fn spawn(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let file = futures::io::AllowStdIo::new(stderr);
|
let file = futures::io::AllowStdIo::new(stderr);
|
||||||
let err_stream = FramedRead::new(file, MaybeTextCodec);
|
let err_stream = FramedRead::new(file, MaybeTextCodec::default());
|
||||||
|
|
||||||
for err_line in block_on_stream(err_stream) {
|
for err_line in block_on_stream(err_stream) {
|
||||||
match err_line {
|
match err_line {
|
||||||
|
103
crates/nu-cli/src/commands/classified/maybe_text_codec.rs
Normal file
103
crates/nu-cli/src/commands/classified/maybe_text_codec.rs
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
use bytes::{BufMut, Bytes, BytesMut};
|
||||||
|
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
|
||||||
|
extern crate encoding_rs;
|
||||||
|
use encoding_rs::{CoderResult, Decoder, Encoding, UTF_8};
|
||||||
|
|
||||||
|
const OUTPUT_BUFFER_SIZE: usize = 8192;
|
||||||
|
|
||||||
|
pub enum StringOrBinary {
|
||||||
|
String(String),
|
||||||
|
Binary(Vec<u8>),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MaybeTextCodec {
|
||||||
|
decoder: Decoder,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MaybeTextCodec {
|
||||||
|
// The constructor takes an Option<&'static Encoding>, because an absence of an encoding indicates that we want BOM sniffing enabled
|
||||||
|
pub fn new(encoding: Option<&'static Encoding>) -> Self {
|
||||||
|
let decoder = match encoding {
|
||||||
|
Some(e) => e.new_decoder_with_bom_removal(),
|
||||||
|
None => UTF_8.new_decoder(),
|
||||||
|
};
|
||||||
|
MaybeTextCodec { decoder }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MaybeTextCodec {
|
||||||
|
// The default MaybeTextCodec uses a UTF_8 decoder
|
||||||
|
fn default() -> Self {
|
||||||
|
MaybeTextCodec {
|
||||||
|
decoder: UTF_8.new_decoder(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl futures_codec::Encoder for MaybeTextCodec {
|
||||||
|
type Item = StringOrBinary;
|
||||||
|
type Error = std::io::Error;
|
||||||
|
|
||||||
|
fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
|
||||||
|
match item {
|
||||||
|
StringOrBinary::String(s) => {
|
||||||
|
dst.reserve(s.len());
|
||||||
|
dst.put(s.as_bytes());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
StringOrBinary::Binary(b) => {
|
||||||
|
dst.reserve(b.len());
|
||||||
|
dst.put(Bytes::from(b));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Write some tests
|
||||||
|
impl futures_codec::Decoder for MaybeTextCodec {
|
||||||
|
type Item = StringOrBinary;
|
||||||
|
type Error = ShellError;
|
||||||
|
|
||||||
|
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
|
||||||
|
if src.is_empty() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut s = String::with_capacity(OUTPUT_BUFFER_SIZE);
|
||||||
|
|
||||||
|
let (res, read, replacements) = self.decoder.decode_to_string(src, &mut s, false);
|
||||||
|
// If we had to make replacements when converting to utf8, fallback to binary
|
||||||
|
if replacements {
|
||||||
|
return Ok(Some(StringOrBinary::Binary(src.to_vec())));
|
||||||
|
}
|
||||||
|
|
||||||
|
match res {
|
||||||
|
CoderResult::InputEmpty => {
|
||||||
|
src.clear();
|
||||||
|
Ok(Some(StringOrBinary::String(s)))
|
||||||
|
}
|
||||||
|
CoderResult::OutputFull => {
|
||||||
|
// If the original buffer size is too small,
|
||||||
|
// We continue to allocate new Strings and append them to the result until the input buffer is smaller than the allocated String
|
||||||
|
let mut starting_index = read;
|
||||||
|
loop {
|
||||||
|
let mut more = String::with_capacity(OUTPUT_BUFFER_SIZE);
|
||||||
|
let (res, read, _replacements) =
|
||||||
|
self.decoder
|
||||||
|
.decode_to_string(&src[starting_index..], &mut more, false);
|
||||||
|
s.push_str(&more);
|
||||||
|
// Our input buffer is smaller than out allocated String, we can stop now
|
||||||
|
if let CoderResult::InputEmpty = res {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
starting_index += read;
|
||||||
|
}
|
||||||
|
src.clear();
|
||||||
|
Ok(Some(StringOrBinary::String(s)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -3,6 +3,7 @@ mod dynamic;
|
|||||||
pub(crate) mod expr;
|
pub(crate) mod expr;
|
||||||
pub(crate) mod external;
|
pub(crate) mod external;
|
||||||
pub(crate) mod internal;
|
pub(crate) mod internal;
|
||||||
|
pub(crate) mod maybe_text_codec;
|
||||||
|
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
pub(crate) use dynamic::Command as DynamicCommand;
|
pub(crate) use dynamic::Command as DynamicCommand;
|
||||||
|
358
crates/nu-cli/src/commands/constants.rs
Normal file
358
crates/nu-cli/src/commands/constants.rs
Normal file
@ -0,0 +1,358 @@
|
|||||||
|
pub const BAT_LANGUAGES: &[&str] = &[
|
||||||
|
"as",
|
||||||
|
"csv",
|
||||||
|
"tsv",
|
||||||
|
"applescript",
|
||||||
|
"script editor",
|
||||||
|
"s",
|
||||||
|
"S",
|
||||||
|
"adoc",
|
||||||
|
"asciidoc",
|
||||||
|
"asc",
|
||||||
|
"asa",
|
||||||
|
"yasm",
|
||||||
|
"nasm",
|
||||||
|
"asm",
|
||||||
|
"inc",
|
||||||
|
"mac",
|
||||||
|
"awk",
|
||||||
|
"bat",
|
||||||
|
"cmd",
|
||||||
|
"bib",
|
||||||
|
"sh",
|
||||||
|
"bash",
|
||||||
|
"zsh",
|
||||||
|
".bash_aliases",
|
||||||
|
".bash_completions",
|
||||||
|
".bash_functions",
|
||||||
|
".bash_login",
|
||||||
|
".bash_logout",
|
||||||
|
".bash_profile",
|
||||||
|
".bash_variables",
|
||||||
|
".bashrc",
|
||||||
|
".profile",
|
||||||
|
".textmate_init",
|
||||||
|
".zshrc",
|
||||||
|
"PKGBUILD",
|
||||||
|
".ebuild",
|
||||||
|
".eclass",
|
||||||
|
"c",
|
||||||
|
"h",
|
||||||
|
"cs",
|
||||||
|
"csx",
|
||||||
|
"cpp",
|
||||||
|
"cc",
|
||||||
|
"cp",
|
||||||
|
"cxx",
|
||||||
|
"c++",
|
||||||
|
"C",
|
||||||
|
"h",
|
||||||
|
"hh",
|
||||||
|
"hpp",
|
||||||
|
"hxx",
|
||||||
|
"h++",
|
||||||
|
"inl",
|
||||||
|
"ipp",
|
||||||
|
"cabal",
|
||||||
|
"clj",
|
||||||
|
"cljc",
|
||||||
|
"cljs",
|
||||||
|
"edn",
|
||||||
|
"CMakeLists.txt",
|
||||||
|
"cmake",
|
||||||
|
"h.in",
|
||||||
|
"hh.in",
|
||||||
|
"hpp.in",
|
||||||
|
"hxx.in",
|
||||||
|
"h++.in",
|
||||||
|
"CMakeCache.txt",
|
||||||
|
"cr",
|
||||||
|
"css",
|
||||||
|
"css.erb",
|
||||||
|
"css.liquid",
|
||||||
|
"d",
|
||||||
|
"di",
|
||||||
|
"dart",
|
||||||
|
"diff",
|
||||||
|
"patch",
|
||||||
|
"Dockerfile",
|
||||||
|
"dockerfile",
|
||||||
|
"ex",
|
||||||
|
"exs",
|
||||||
|
"elm",
|
||||||
|
"erl",
|
||||||
|
"hrl",
|
||||||
|
"Emakefile",
|
||||||
|
"emakefile",
|
||||||
|
"fs",
|
||||||
|
"fsi",
|
||||||
|
"fsx",
|
||||||
|
"fs",
|
||||||
|
"fsi",
|
||||||
|
"fsx",
|
||||||
|
"fish",
|
||||||
|
"attributes",
|
||||||
|
"gitattributes",
|
||||||
|
".gitattributes",
|
||||||
|
"COMMIT_EDITMSG",
|
||||||
|
"MERGE_MSG",
|
||||||
|
"TAG_EDITMSG",
|
||||||
|
"gitconfig",
|
||||||
|
".gitconfig",
|
||||||
|
".gitmodules",
|
||||||
|
"exclude",
|
||||||
|
"gitignore",
|
||||||
|
".gitignore",
|
||||||
|
".git",
|
||||||
|
"gitlog",
|
||||||
|
"git-rebase-todo",
|
||||||
|
"go",
|
||||||
|
"dot",
|
||||||
|
"DOT",
|
||||||
|
"gv",
|
||||||
|
"groovy",
|
||||||
|
"gvy",
|
||||||
|
"gradle",
|
||||||
|
"Jenkinsfile",
|
||||||
|
"hs",
|
||||||
|
"hs",
|
||||||
|
"hsc",
|
||||||
|
"show-nonprintable",
|
||||||
|
"html",
|
||||||
|
"htm",
|
||||||
|
"shtml",
|
||||||
|
"xhtml",
|
||||||
|
"asp",
|
||||||
|
"html.eex",
|
||||||
|
"yaws",
|
||||||
|
"rails",
|
||||||
|
"rhtml",
|
||||||
|
"erb",
|
||||||
|
"html.erb",
|
||||||
|
"adp",
|
||||||
|
"twig",
|
||||||
|
"html.twig",
|
||||||
|
"ini",
|
||||||
|
"INI",
|
||||||
|
"INF",
|
||||||
|
"reg",
|
||||||
|
"REG",
|
||||||
|
"lng",
|
||||||
|
"cfg",
|
||||||
|
"CFG",
|
||||||
|
"desktop",
|
||||||
|
"url",
|
||||||
|
"URL",
|
||||||
|
".editorconfig",
|
||||||
|
".hgrc",
|
||||||
|
"hgrc",
|
||||||
|
"java",
|
||||||
|
"bsh",
|
||||||
|
"properties",
|
||||||
|
"jsp",
|
||||||
|
"js",
|
||||||
|
"htc",
|
||||||
|
"js",
|
||||||
|
"jsx",
|
||||||
|
"babel",
|
||||||
|
"es6",
|
||||||
|
"js.erb",
|
||||||
|
"json",
|
||||||
|
"sublime-settings",
|
||||||
|
"sublime-menu",
|
||||||
|
"sublime-keymap",
|
||||||
|
"sublime-mousemap",
|
||||||
|
"sublime-theme",
|
||||||
|
"sublime-build",
|
||||||
|
"sublime-project",
|
||||||
|
"sublime-completions",
|
||||||
|
"sublime-commands",
|
||||||
|
"sublime-macro",
|
||||||
|
"sublime-color-scheme",
|
||||||
|
"ipynb",
|
||||||
|
"Pipfile.lock",
|
||||||
|
"jsonnet",
|
||||||
|
"libsonnet",
|
||||||
|
"libjsonnet",
|
||||||
|
"jl",
|
||||||
|
"kt",
|
||||||
|
"kts",
|
||||||
|
"tex",
|
||||||
|
"ltx",
|
||||||
|
"less",
|
||||||
|
"css.less",
|
||||||
|
"lisp",
|
||||||
|
"cl",
|
||||||
|
"clisp",
|
||||||
|
"l",
|
||||||
|
"mud",
|
||||||
|
"el",
|
||||||
|
"scm",
|
||||||
|
"ss",
|
||||||
|
"lsp",
|
||||||
|
"fasl",
|
||||||
|
"lhs",
|
||||||
|
"lua",
|
||||||
|
"make",
|
||||||
|
"GNUmakefile",
|
||||||
|
"makefile",
|
||||||
|
"Makefile",
|
||||||
|
"makefile.am",
|
||||||
|
"Makefile.am",
|
||||||
|
"makefile.in",
|
||||||
|
"Makefile.in",
|
||||||
|
"OCamlMakefile",
|
||||||
|
"mak",
|
||||||
|
"mk",
|
||||||
|
"md",
|
||||||
|
"mdown",
|
||||||
|
"markdown",
|
||||||
|
"markdn",
|
||||||
|
"matlab",
|
||||||
|
"build",
|
||||||
|
"nix",
|
||||||
|
"m",
|
||||||
|
"h",
|
||||||
|
"mm",
|
||||||
|
"M",
|
||||||
|
"h",
|
||||||
|
"ml",
|
||||||
|
"mli",
|
||||||
|
"mll",
|
||||||
|
"mly",
|
||||||
|
"pas",
|
||||||
|
"p",
|
||||||
|
"dpr",
|
||||||
|
"pl",
|
||||||
|
"pm",
|
||||||
|
"pod",
|
||||||
|
"t",
|
||||||
|
"PL",
|
||||||
|
"php",
|
||||||
|
"php3",
|
||||||
|
"php4",
|
||||||
|
"php5",
|
||||||
|
"php7",
|
||||||
|
"phps",
|
||||||
|
"phpt",
|
||||||
|
"phtml",
|
||||||
|
"txt",
|
||||||
|
"ps1",
|
||||||
|
"psm1",
|
||||||
|
"psd1",
|
||||||
|
"proto",
|
||||||
|
"protodevel",
|
||||||
|
"pb.txt",
|
||||||
|
"proto.text",
|
||||||
|
"textpb",
|
||||||
|
"pbtxt",
|
||||||
|
"prototxt",
|
||||||
|
"pp",
|
||||||
|
"epp",
|
||||||
|
"purs",
|
||||||
|
"py",
|
||||||
|
"py3",
|
||||||
|
"pyw",
|
||||||
|
"pyi",
|
||||||
|
"pyx",
|
||||||
|
"pyx.in",
|
||||||
|
"pxd",
|
||||||
|
"pxd.in",
|
||||||
|
"pxi",
|
||||||
|
"pxi.in",
|
||||||
|
"rpy",
|
||||||
|
"cpy",
|
||||||
|
"SConstruct",
|
||||||
|
"Sconstruct",
|
||||||
|
"sconstruct",
|
||||||
|
"SConscript",
|
||||||
|
"gyp",
|
||||||
|
"gypi",
|
||||||
|
"Snakefile",
|
||||||
|
"wscript",
|
||||||
|
"R",
|
||||||
|
"r",
|
||||||
|
"s",
|
||||||
|
"S",
|
||||||
|
"Rprofile",
|
||||||
|
"rd",
|
||||||
|
"re",
|
||||||
|
"rst",
|
||||||
|
"rest",
|
||||||
|
"robot",
|
||||||
|
"rb",
|
||||||
|
"Appfile",
|
||||||
|
"Appraisals",
|
||||||
|
"Berksfile",
|
||||||
|
"Brewfile",
|
||||||
|
"capfile",
|
||||||
|
"cgi",
|
||||||
|
"Cheffile",
|
||||||
|
"config.ru",
|
||||||
|
"Deliverfile",
|
||||||
|
"Fastfile",
|
||||||
|
"fcgi",
|
||||||
|
"Gemfile",
|
||||||
|
"gemspec",
|
||||||
|
"Guardfile",
|
||||||
|
"irbrc",
|
||||||
|
"jbuilder",
|
||||||
|
"Podfile",
|
||||||
|
"podspec",
|
||||||
|
"prawn",
|
||||||
|
"rabl",
|
||||||
|
"rake",
|
||||||
|
"Rakefile",
|
||||||
|
"Rantfile",
|
||||||
|
"rbx",
|
||||||
|
"rjs",
|
||||||
|
"ruby.rail",
|
||||||
|
"Scanfile",
|
||||||
|
"simplecov",
|
||||||
|
"Snapfile",
|
||||||
|
"thor",
|
||||||
|
"Thorfile",
|
||||||
|
"Vagrantfile",
|
||||||
|
"haml",
|
||||||
|
"sass",
|
||||||
|
"rxml",
|
||||||
|
"builder",
|
||||||
|
"rs",
|
||||||
|
"scala",
|
||||||
|
"sbt",
|
||||||
|
"sql",
|
||||||
|
"ddl",
|
||||||
|
"dml",
|
||||||
|
"erbsql",
|
||||||
|
"sql.erb",
|
||||||
|
"swift",
|
||||||
|
"log",
|
||||||
|
"tcl",
|
||||||
|
"tf",
|
||||||
|
"tfvars",
|
||||||
|
"hcl",
|
||||||
|
"sty",
|
||||||
|
"cls",
|
||||||
|
"textile",
|
||||||
|
"toml",
|
||||||
|
"tml",
|
||||||
|
"Cargo.lock",
|
||||||
|
"Gopkg.lock",
|
||||||
|
"Pipfile",
|
||||||
|
"ts",
|
||||||
|
"tsx",
|
||||||
|
"varlink",
|
||||||
|
"vim",
|
||||||
|
".vimrc",
|
||||||
|
"xml",
|
||||||
|
"xsd",
|
||||||
|
"xslt",
|
||||||
|
"tld",
|
||||||
|
"dtml",
|
||||||
|
"rss",
|
||||||
|
"opml",
|
||||||
|
"svg",
|
||||||
|
"yaml",
|
||||||
|
"yml",
|
||||||
|
"sublime-syntax",
|
||||||
|
];
|
@ -121,21 +121,16 @@ async fn enter(
|
|||||||
|
|
||||||
let full_path = std::path::PathBuf::from(cwd);
|
let full_path = std::path::PathBuf::from(cwd);
|
||||||
|
|
||||||
let (file_extension, contents, contents_tag) = crate::commands::open::fetch(
|
let (file_extension, tagged_contents) = crate::commands::open::fetch(
|
||||||
&full_path,
|
&full_path,
|
||||||
&PathBuf::from(location_clone),
|
&PathBuf::from(location_clone),
|
||||||
tag.span,
|
tag.span,
|
||||||
match encoding {
|
encoding,
|
||||||
Some(e) => e.to_string(),
|
|
||||||
_ => "".to_string(),
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
match contents {
|
match tagged_contents.value {
|
||||||
UntaggedValue::Primitive(Primitive::String(_)) => {
|
UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||||
let tagged_contents = contents.into_value(&contents_tag);
|
|
||||||
|
|
||||||
if let Some(extension) = file_extension {
|
if let Some(extension) = file_extension {
|
||||||
let command_name = format!("from {}", extension);
|
let command_name = format!("from {}", extension);
|
||||||
if let Some(converter) = registry.get_command(&command_name) {
|
if let Some(converter) = registry.get_command(&command_name) {
|
||||||
@ -156,18 +151,18 @@ async fn enter(
|
|||||||
scope: scope.clone(),
|
scope: scope.clone(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
let tag = tagged_contents.tag.clone();
|
||||||
let mut result = converter
|
let mut result = converter
|
||||||
.run(new_args.with_input(vec![tagged_contents]), ®istry)
|
.run(new_args.with_input(vec![tagged_contents]), ®istry)
|
||||||
.await?;
|
.await?;
|
||||||
let result_vec: Vec<Result<ReturnSuccess, ShellError>> =
|
let result_vec: Vec<Result<ReturnSuccess, ShellError>> =
|
||||||
result.drain_vec().await;
|
result.drain_vec().await;
|
||||||
|
|
||||||
Ok(futures::stream::iter(result_vec.into_iter().map(
|
Ok(futures::stream::iter(result_vec.into_iter().map(
|
||||||
move |res| match res {
|
move |res| match res {
|
||||||
Ok(ReturnSuccess::Value(Value { value, .. })) => Ok(
|
Ok(ReturnSuccess::Value(Value { value, .. })) => Ok(
|
||||||
ReturnSuccess::Action(CommandAction::EnterValueShell(Value {
|
ReturnSuccess::Action(CommandAction::EnterValueShell(Value {
|
||||||
value,
|
value,
|
||||||
tag: contents_tag.clone(),
|
tag: tag.clone(),
|
||||||
})),
|
})),
|
||||||
),
|
),
|
||||||
x => x,
|
x => x,
|
||||||
@ -185,13 +180,9 @@ async fn enter(
|
|||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
_ => Ok(OutputStream::one(ReturnSuccess::action(
|
||||||
let tagged_contents = contents.into_value(contents_tag);
|
CommandAction::EnterValueShell(tagged_contents),
|
||||||
|
))),
|
||||||
Ok(OutputStream::one(ReturnSuccess::action(
|
|
||||||
CommandAction::EnterValueShell(tagged_contents),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
|
use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
|
||||||
use crate::commands::WholeStreamCommand;
|
use crate::commands::WholeStreamCommand;
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
|
use futures_codec::FramedRead;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue};
|
use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
|
||||||
use nu_source::{AnchorLocation, Span, Tagged};
|
use nu_source::{AnchorLocation, Span, Tagged};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::PathBuf;
|
||||||
extern crate encoding_rs;
|
extern crate encoding_rs;
|
||||||
|
use crate::commands::constants::BAT_LANGUAGES;
|
||||||
use encoding_rs::*;
|
use encoding_rs::*;
|
||||||
|
use futures::prelude::*;
|
||||||
|
use log::debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufWriter;
|
|
||||||
use std::io::Read;
|
|
||||||
use std::io::Write;
|
|
||||||
|
|
||||||
pub struct Open;
|
pub struct Open;
|
||||||
|
|
||||||
@ -81,23 +83,25 @@ documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_encoding(opt: Option<String>) -> &'static Encoding {
|
pub fn get_encoding(opt: Option<Tagged<String>>) -> Result<&'static Encoding, ShellError> {
|
||||||
match opt {
|
match opt {
|
||||||
None => UTF_8,
|
None => Ok(UTF_8),
|
||||||
Some(label) => match Encoding::for_label((&label).as_bytes()) {
|
Some(label) => match Encoding::for_label((&label.item).as_bytes()) {
|
||||||
None => {
|
None => Err(ShellError::labeled_error(
|
||||||
//print!("{} is not a known encoding label. Trying UTF-8.", label);
|
format!(
|
||||||
//std::process::exit(-2);
|
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
|
||||||
get_encoding(Some("utf-8".to_string()))
|
label.item
|
||||||
}
|
),
|
||||||
Some(encoding) => encoding,
|
"invalid encoding",
|
||||||
|
label.span(),
|
||||||
|
)),
|
||||||
|
Some(encoding) => Ok(encoding),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
|
||||||
let cwd = PathBuf::from(args.shell_manager.path());
|
let cwd = PathBuf::from(args.shell_manager.path());
|
||||||
let full_path = cwd;
|
|
||||||
let registry = registry.clone();
|
let registry = registry.clone();
|
||||||
|
|
||||||
let (
|
let (
|
||||||
@ -108,329 +112,135 @@ async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStr
|
|||||||
},
|
},
|
||||||
_,
|
_,
|
||||||
) = args.process(®istry).await?;
|
) = args.process(®istry).await?;
|
||||||
let enc = match encoding {
|
|
||||||
Some(e) => e.to_string(),
|
|
||||||
_ => "".to_string(),
|
|
||||||
};
|
|
||||||
let result = fetch(&full_path, &path.item, path.tag.span, enc).await;
|
|
||||||
|
|
||||||
let (file_extension, contents, contents_tag) = result?;
|
// TODO: Remove once Streams are supported everywhere!
|
||||||
|
// As a short term workaround for getting AutoConvert and Bat functionality (Those don't currently support Streams)
|
||||||
|
|
||||||
let file_extension = if raw.item {
|
// Check if the extension has a "from *" command OR "bat" supports syntax highlighting
|
||||||
|
// AND the user doesn't want the raw output
|
||||||
|
// In these cases, we will collect the Stream
|
||||||
|
let ext = if raw.item {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
// If the extension could not be determined via mimetype, try to use the path
|
path.extension()
|
||||||
// extension. Some file types do not declare their mimetypes (such as bson files).
|
.map(|name| name.to_string_lossy().to_string())
|
||||||
file_extension.or_else(|| path.extension().map(|x| x.to_string_lossy().to_string()))
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let tagged_contents = contents.into_value(&contents_tag);
|
if let Some(ext) = ext {
|
||||||
|
// Check if we have a conversion command
|
||||||
if let Some(extension) = file_extension {
|
if let Some(_command) = registry.get_command(&format!("from {}", ext)) {
|
||||||
Ok(OutputStream::one(ReturnSuccess::action(
|
let (_, tagged_contents) = crate::commands::open::fetch(
|
||||||
CommandAction::AutoConvert(tagged_contents, extension),
|
&cwd,
|
||||||
)))
|
&PathBuf::from(&path.item),
|
||||||
} else {
|
path.tag.span,
|
||||||
Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)))
|
encoding,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
return Ok(OutputStream::one(ReturnSuccess::action(
|
||||||
|
CommandAction::AutoConvert(tagged_contents, ext),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
// Check if bat does syntax highlighting
|
||||||
|
if BAT_LANGUAGES.contains(&ext.as_ref()) {
|
||||||
|
let (_, tagged_contents) = crate::commands::open::fetch(
|
||||||
|
&cwd,
|
||||||
|
&PathBuf::from(&path.item),
|
||||||
|
path.tag.span,
|
||||||
|
encoding,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
return Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normal Streaming operation
|
||||||
|
let with_encoding = if encoding.is_none() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(get_encoding(encoding)?)
|
||||||
|
};
|
||||||
|
let f = File::open(&path).map_err(|e| {
|
||||||
|
ShellError::labeled_error(
|
||||||
|
format!("Error opening file: {:?}", e),
|
||||||
|
"Error opening file",
|
||||||
|
path.span(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let async_reader = futures::io::AllowStdIo::new(f);
|
||||||
|
let sob_stream = FramedRead::new(async_reader, MaybeTextCodec::new(with_encoding))
|
||||||
|
.map_err(|e| ShellError::unexpected(format!("AsyncRead failed in open function: {:?}", e)))
|
||||||
|
.into_stream();
|
||||||
|
|
||||||
|
let final_stream = sob_stream.map(|x| match x {
|
||||||
|
Ok(StringOrBinary::String(s)) => {
|
||||||
|
ReturnSuccess::value(UntaggedValue::string(s).into_untagged_value())
|
||||||
|
}
|
||||||
|
Ok(StringOrBinary::Binary(b)) => ReturnSuccess::value(
|
||||||
|
UntaggedValue::binary(b.into_iter().collect()).into_untagged_value(),
|
||||||
|
),
|
||||||
|
Err(se) => Err(se),
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(OutputStream::new(final_stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note that we do not output a Stream in "fetch" since it is only used by "enter" command
|
||||||
|
// Which we expect to use a concrete Value a not a Stream
|
||||||
pub async fn fetch(
|
pub async fn fetch(
|
||||||
cwd: &PathBuf,
|
cwd: &PathBuf,
|
||||||
location: &PathBuf,
|
location: &PathBuf,
|
||||||
span: Span,
|
span: Span,
|
||||||
encoding: String,
|
encoding_choice: Option<Tagged<String>>,
|
||||||
) -> Result<(Option<String>, UntaggedValue, Tag), ShellError> {
|
) -> Result<(Option<String>, Value), ShellError> {
|
||||||
|
// TODO: I don't understand the point of this? Maybe for better error reporting
|
||||||
let mut cwd = cwd.clone();
|
let mut cwd = cwd.clone();
|
||||||
let output_encoding: &Encoding = get_encoding(Some("utf-8".to_string()));
|
cwd.push(location);
|
||||||
let input_encoding: &Encoding = get_encoding(Some(encoding.clone()));
|
let nice_location = dunce::canonicalize(&cwd).map_err(|e| {
|
||||||
let mut decoder = input_encoding.new_decoder();
|
ShellError::labeled_error(
|
||||||
let mut encoder = output_encoding.new_encoder();
|
format!("Cannot canonicalize file {:?} because {:?}", &cwd, e),
|
||||||
let mut _file: File;
|
"Cannot canonicalize",
|
||||||
let buf = Vec::new();
|
|
||||||
let mut bufwriter = BufWriter::new(buf);
|
|
||||||
|
|
||||||
cwd.push(Path::new(location));
|
|
||||||
if let Ok(cwd) = dunce::canonicalize(&cwd) {
|
|
||||||
if !encoding.is_empty() {
|
|
||||||
// use the encoding string
|
|
||||||
match File::open(&Path::new(&cwd)) {
|
|
||||||
Ok(mut _file) => {
|
|
||||||
convert_via_utf8(
|
|
||||||
&mut decoder,
|
|
||||||
&mut encoder,
|
|
||||||
&mut _file,
|
|
||||||
&mut bufwriter,
|
|
||||||
false,
|
|
||||||
);
|
|
||||||
//bufwriter.flush()?;
|
|
||||||
Ok((
|
|
||||||
cwd.extension()
|
|
||||||
.map(|name| name.to_string_lossy().to_string()),
|
|
||||||
UntaggedValue::string(String::from_utf8_lossy(&bufwriter.buffer())),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
|
||||||
Err(_) => Err(ShellError::labeled_error(
|
|
||||||
format!("Cannot open {:?} for reading.", &cwd),
|
|
||||||
"file not found",
|
|
||||||
span,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Do the old stuff
|
|
||||||
match std::fs::read(&cwd) {
|
|
||||||
Ok(bytes) => match std::str::from_utf8(&bytes) {
|
|
||||||
Ok(s) => Ok((
|
|
||||||
cwd.extension()
|
|
||||||
.map(|name| name.to_string_lossy().to_string()),
|
|
||||||
UntaggedValue::string(s),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
Err(_) => {
|
|
||||||
//Non utf8 data.
|
|
||||||
match (bytes.get(0), bytes.get(1)) {
|
|
||||||
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
|
|
||||||
// Possibly UTF-16 little endian
|
|
||||||
let utf16 = read_le_u16(&bytes[2..]);
|
|
||||||
|
|
||||||
if let Some(utf16) = utf16 {
|
|
||||||
match std::string::String::from_utf16(&utf16) {
|
|
||||||
Ok(s) => Ok((
|
|
||||||
cwd.extension()
|
|
||||||
.map(|name| name.to_string_lossy().to_string()),
|
|
||||||
UntaggedValue::string(s),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
Err(_) => Ok((
|
|
||||||
None,
|
|
||||||
UntaggedValue::binary(bytes),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Ok((
|
|
||||||
None,
|
|
||||||
UntaggedValue::binary(bytes),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
|
|
||||||
// Possibly UTF-16 big endian
|
|
||||||
let utf16 = read_be_u16(&bytes[2..]);
|
|
||||||
|
|
||||||
if let Some(utf16) = utf16 {
|
|
||||||
match std::string::String::from_utf16(&utf16) {
|
|
||||||
Ok(s) => Ok((
|
|
||||||
cwd.extension()
|
|
||||||
.map(|name| name.to_string_lossy().to_string()),
|
|
||||||
UntaggedValue::string(s),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
Err(_) => Ok((
|
|
||||||
None,
|
|
||||||
UntaggedValue::binary(bytes),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Ok((
|
|
||||||
None,
|
|
||||||
UntaggedValue::binary(bytes),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => Ok((
|
|
||||||
None,
|
|
||||||
UntaggedValue::binary(bytes),
|
|
||||||
Tag {
|
|
||||||
span,
|
|
||||||
anchor: Some(AnchorLocation::File(
|
|
||||||
cwd.to_string_lossy().to_string(),
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(_) => Err(ShellError::labeled_error(
|
|
||||||
format!("Cannot open {:?} for reading.", &cwd),
|
|
||||||
"file not found",
|
|
||||||
span,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Err(ShellError::labeled_error(
|
|
||||||
format!("Cannot open {:?} for reading.", &cwd),
|
|
||||||
"file not found",
|
|
||||||
span,
|
span,
|
||||||
))
|
)
|
||||||
}
|
})?;
|
||||||
}
|
|
||||||
|
|
||||||
fn convert_via_utf8(
|
// The extension may be used in AutoConvert later on
|
||||||
decoder: &mut Decoder,
|
let ext = location
|
||||||
encoder: &mut Encoder,
|
.extension()
|
||||||
read: &mut dyn Read,
|
.map(|name| name.to_string_lossy().to_string());
|
||||||
write: &mut dyn Write,
|
|
||||||
last: bool,
|
|
||||||
) {
|
|
||||||
let mut input_buffer = [0u8; 2048];
|
|
||||||
let mut intermediate_buffer_bytes = [0u8; 4096];
|
|
||||||
// Is there a safe way to create a stack-allocated &mut str?
|
|
||||||
let mut intermediate_buffer: &mut str =
|
|
||||||
//unsafe { std::mem::transmute(&mut intermediate_buffer_bytes[..]) };
|
|
||||||
std::str::from_utf8_mut(&mut intermediate_buffer_bytes[..]).expect("error with from_utf8_mut");
|
|
||||||
let mut output_buffer = [0u8; 4096];
|
|
||||||
let mut current_input_ended = false;
|
|
||||||
while !current_input_ended {
|
|
||||||
match read.read(&mut input_buffer) {
|
|
||||||
Err(_) => {
|
|
||||||
print!("Error reading input.");
|
|
||||||
//std::process::exit(-5);
|
|
||||||
}
|
|
||||||
Ok(decoder_input_end) => {
|
|
||||||
current_input_ended = decoder_input_end == 0;
|
|
||||||
let input_ended = last && current_input_ended;
|
|
||||||
let mut decoder_input_start = 0usize;
|
|
||||||
loop {
|
|
||||||
let (decoder_result, decoder_read, decoder_written, _) = decoder.decode_to_str(
|
|
||||||
&input_buffer[decoder_input_start..decoder_input_end],
|
|
||||||
&mut intermediate_buffer,
|
|
||||||
input_ended,
|
|
||||||
);
|
|
||||||
decoder_input_start += decoder_read;
|
|
||||||
|
|
||||||
let last_output = if input_ended {
|
// The tag that will used when returning a Value
|
||||||
match decoder_result {
|
let file_tag = Tag {
|
||||||
CoderResult::InputEmpty => true,
|
span,
|
||||||
CoderResult::OutputFull => false,
|
anchor: Some(AnchorLocation::File(
|
||||||
}
|
nice_location.to_string_lossy().to_string(),
|
||||||
} else {
|
)),
|
||||||
false
|
};
|
||||||
};
|
|
||||||
|
|
||||||
// Regardless of whether the intermediate buffer got full
|
let res = std::fs::read(location)?;
|
||||||
// or the input buffer was exhausted, let's process what's
|
|
||||||
// in the intermediate buffer.
|
|
||||||
|
|
||||||
if encoder.encoding() == UTF_8 {
|
// If no encoding is provided we try to guess the encoding to read the file with
|
||||||
// If the target is UTF-8, optimize out the encoder.
|
let encoding = if encoding_choice.is_none() {
|
||||||
if write
|
UTF_8
|
||||||
.write_all(&intermediate_buffer.as_bytes()[..decoder_written])
|
|
||||||
.is_err()
|
|
||||||
{
|
|
||||||
print!("Error writing output.");
|
|
||||||
//std::process::exit(-7);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let mut encoder_input_start = 0usize;
|
|
||||||
loop {
|
|
||||||
let (encoder_result, encoder_read, encoder_written, _) = encoder
|
|
||||||
.encode_from_utf8(
|
|
||||||
&intermediate_buffer[encoder_input_start..decoder_written],
|
|
||||||
&mut output_buffer,
|
|
||||||
last_output,
|
|
||||||
);
|
|
||||||
encoder_input_start += encoder_read;
|
|
||||||
if write.write_all(&output_buffer[..encoder_written]).is_err() {
|
|
||||||
print!("Error writing output.");
|
|
||||||
//std::process::exit(-6);
|
|
||||||
}
|
|
||||||
match encoder_result {
|
|
||||||
CoderResult::InputEmpty => {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
CoderResult::OutputFull => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now let's see if we should read again or process the
|
|
||||||
// rest of the current input buffer.
|
|
||||||
match decoder_result {
|
|
||||||
CoderResult::InputEmpty => {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
CoderResult::OutputFull => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
|
|
||||||
if input.len() % 2 != 0 || input.len() < 2 {
|
|
||||||
None
|
|
||||||
} else {
|
} else {
|
||||||
let mut result = vec![];
|
get_encoding(encoding_choice.clone())?
|
||||||
let mut pos = 0;
|
};
|
||||||
while pos < input.len() {
|
|
||||||
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
|
|
||||||
pos += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(result)
|
// If the user specified an encoding, then do not do BOM sniffing
|
||||||
}
|
let decoded_res = if encoding_choice.is_some() {
|
||||||
}
|
let (cow_res, _replacements) = encoding.decode_with_bom_removal(&res);
|
||||||
|
cow_res
|
||||||
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
|
|
||||||
if input.len() % 2 != 0 || input.len() < 2 {
|
|
||||||
None
|
|
||||||
} else {
|
} else {
|
||||||
let mut result = vec![];
|
// Otherwise, use the default UTF-8 encoder with BOM sniffing
|
||||||
let mut pos = 0;
|
let (cow_res, actual_encoding, replacements) = encoding.decode(&res);
|
||||||
while pos < input.len() {
|
// If we had to use replacement characters then fallback to binary
|
||||||
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
|
if replacements {
|
||||||
pos += 2;
|
return Ok((ext, UntaggedValue::binary(res).into_value(file_tag)));
|
||||||
}
|
}
|
||||||
|
debug!("Decoded using {:?}", actual_encoding);
|
||||||
Some(result)
|
cow_res
|
||||||
}
|
};
|
||||||
|
let v = UntaggedValue::string(decoded_res.to_string()).into_value(file_tag);
|
||||||
|
Ok((ext, v))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -80,7 +80,6 @@ fn errors_if_file_not_found() {
|
|||||||
"enter i_dont_exist.csv"
|
"enter i_dont_exist.csv"
|
||||||
);
|
);
|
||||||
|
|
||||||
//assert!(actual.err.contains("File could not be opened"));
|
assert!(actual.err.contains("Cannot canonicalize"));
|
||||||
assert!(actual.err.contains("file not found"));
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -224,7 +224,11 @@ fn errors_if_file_not_found() {
|
|||||||
cwd: "tests/fixtures/formats",
|
cwd: "tests/fixtures/formats",
|
||||||
"open i_dont_exist.txt"
|
"open i_dont_exist.txt"
|
||||||
);
|
);
|
||||||
|
let expected = "Cannot canonicalize";
|
||||||
//assert!(actual.err.contains("File could not be opened"));
|
assert!(
|
||||||
assert!(actual.err.contains("Cannot open"));
|
actual.err.contains(expected),
|
||||||
|
"Error:\n{}\ndoes not contain{}",
|
||||||
|
actual.err,
|
||||||
|
expected
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user