From 6cf3dc92fce95da8643d3ee23b0a867841d31eca Mon Sep 17 00:00:00 2001 From: Jonathan Turner Date: Mon, 12 Aug 2019 16:11:42 +1200 Subject: [PATCH] Add support for utf16 files --- Cargo.lock | 6 +- src/commands/cp.rs | 4 + src/commands/get.rs | 21 ++--- src/commands/open.rs | 130 ++++++++++++++++++++++++++++--- tests/commands_test.rs | 11 +++ tests/fixtures/formats/utf16.ini | Bin 0 -> 504 bytes 6 files changed, 149 insertions(+), 23 deletions(-) create mode 100644 tests/fixtures/formats/utf16.ini diff --git a/Cargo.lock b/Cargo.lock index cad63d184..48c884cde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1979,7 +1979,7 @@ dependencies = [ "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)", "roxmltree 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)", "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", "serde-hjson 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2790,7 +2790,7 @@ dependencies = [ [[package]] name = "rustyline" version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" +source = "git+https://github.com/kkawakam/rustyline.git#568c9d0512b065e9eef68a6e46407881d2376738" dependencies = [ "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.60 (registry+https://github.com/rust-lang/crates.io-index)", @@ -4036,7 +4036,7 @@ dependencies = [ "checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af" "checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -"checksum rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7d4ca3c9586d2c1f742284f032e328313ea55f3f60a3b0a17e2ca1a2bf9ae22" +"checksum rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)" = "" "checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" "checksum safemem 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e133ccc4f4d1cd4f89cc8a7ff618287d56dc7f638b8e38fc32c5fdcadc339dd5" "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421" diff --git a/src/commands/cp.rs b/src/commands/cp.rs index b8f03e584..c0cb36544 100644 --- a/src/commands/cp.rs +++ b/src/commands/cp.rs @@ -394,6 +394,10 @@ mod tests { Res { loc: fixtures().join("sgml_description.json"), at: 0 + }, + Res { + loc: fixtures().join("utf16.ini"), + at: 0 } ] ); diff --git a/src/commands/get.rs b/src/commands/get.rs index 873fd9f0d..d0645798c 100644 --- a/src/commands/get.rs +++ b/src/commands/get.rs @@ -33,11 +33,17 @@ fn get_member(path: &Tagged, obj: &Tagged) -> Result current = v, None => { - return Err(ShellError::labeled_error( - "Unknown field", - "object missing field", - path.span(), - )); + // Before we give up, see if they gave us a path that matches a field name by itself + match obj.get_data_by_key(&path.item) { + Some(v) => return Ok(v.clone()), + None => { + return Err(ShellError::labeled_error( + "Unknown field", + "object missing field", + path.span(), + )); + } + } } } } @@ -49,11 +55,6 @@ pub fn get( GetArgs { rest: fields }: GetArgs, RunnableContext { input, .. }: RunnableContext, ) -> Result { - // If it's a number, get the row instead of the column - // if let Some(amount) = amount { - // return Ok(input.values.skip(amount as u64).take(1).from_input_stream()); - // } - let stream = input .values .map(move |item| { diff --git a/src/commands/open.rs b/src/commands/open.rs index 263b909fd..1686f8306 100644 --- a/src/commands/open.rs +++ b/src/commands/open.rs @@ -9,7 +9,6 @@ use mime::Mime; use std::path::{Path, PathBuf}; use std::str::FromStr; use uuid::Uuid; - pub struct Open; #[derive(Deserialize)] @@ -239,15 +238,96 @@ pub fn fetch( }, SpanSource::File(cwd.to_string_lossy().to_string()), )), - Err(_) => Ok(( - None, - Value::Binary(bytes), - Tag { - span, - origin: Some(Uuid::new_v4()), - }, - SpanSource::File(cwd.to_string_lossy().to_string()), - )), + Err(_) => { + //Non utf8 data. + match (bytes.get(0), bytes.get(1)) { + (Some(x), Some(y)) if *x == 0xff && *y == 0xfe => { + // Possibly UTF-16 little endian + let utf16 = read_le_u16(&bytes[2..]); + + if let Some(utf16) = utf16 { + match std::string::String::from_utf16(&utf16) { + Ok(s) => Ok(( + cwd.extension() + .map(|name| name.to_string_lossy().to_string()), + Value::string(s), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + Err(_) => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } else { + Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )) + } + } + (Some(x), Some(y)) if *x == 0xfe && *y == 0xff => { + // Possibly UTF-16 big endian + let utf16 = read_be_u16(&bytes[2..]); + + if let Some(utf16) = utf16 { + match std::string::String::from_utf16(&utf16) { + Ok(s) => Ok(( + cwd.extension() + .map(|name| name.to_string_lossy().to_string()), + Value::string(s), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + Err(_) => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } else { + Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )) + } + } + _ => Ok(( + None, + Value::Binary(bytes), + Tag { + span, + origin: Some(Uuid::new_v4()), + }, + SpanSource::File(cwd.to_string_lossy().to_string()), + )), + } + } }, Err(_) => { return Err(ShellError::labeled_error( @@ -267,6 +347,36 @@ pub fn fetch( } } +fn read_le_u16(input: &[u8]) -> Option> { + if input.len() % 2 != 0 || input.len() < 2 { + None + } else { + let mut result = vec![]; + let mut pos = 0; + while pos < input.len() { + result.push(u16::from_le_bytes([input[pos], input[pos + 1]])); + pos += 2; + } + + Some(result) + } +} + +fn read_be_u16(input: &[u8]) -> Option> { + if input.len() % 2 != 0 || input.len() < 2 { + None + } else { + let mut result = vec![]; + let mut pos = 0; + while pos < input.len() { + result.push(u16::from_be_bytes([input[pos], input[pos + 1]])); + pos += 2; + } + + Some(result) + } +} + pub fn parse_as_value( extension: Option, contents: String, diff --git a/tests/commands_test.rs b/tests/commands_test.rs index a177cc4c3..54c178aa5 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -71,6 +71,17 @@ fn open_can_parse_ini() { assert_eq!(output, "1234") } +#[test] +fn open_can_parse_utf16_ini() { + nu!( + output, + cwd("tests/fixtures/formats"), + "open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it" + ); + + assert_eq!(output, "-236") +} + #[test] fn open_error_if_file_not_found() { nu_error!( diff --git a/tests/fixtures/formats/utf16.ini b/tests/fixtures/formats/utf16.ini new file mode 100644 index 0000000000000000000000000000000000000000..c68b44ccdb4626e9d837b94cb006df967fb8a615 GIT binary patch literal 504 zcmb`D%L;;E5QWb==rP)rU_zIL5Cnk`Evj9$khBssLk+5jufAW9vRxFzf0;SM%zX2> zH`UNemKHiFRE&)|zH-ddQ9GsD$Yf3VZBb%Hx{(`b&YcrID?