Merge pull request #279 from jonathandturner/utf16

Add support for utf16 files
This commit is contained in:
Jonathan Turner 2019-08-12 16:33:45 +12:00 committed by GitHub
commit 7bc9b7a758
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 149 additions and 23 deletions

6
Cargo.lock generated
View File

@ -1979,7 +1979,7 @@ dependencies = [
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)",
"roxmltree 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "roxmltree 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)",
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
"serde-hjson 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde-hjson 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2790,7 +2790,7 @@ dependencies = [
[[package]] [[package]]
name = "rustyline" name = "rustyline"
version = "5.0.1" version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/kkawakam/rustyline.git#568c9d0512b065e9eef68a6e46407881d2376738"
dependencies = [ dependencies = [
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.60 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.60 (registry+https://github.com/rust-lang/crates.io-index)",
@ -4036,7 +4036,7 @@ dependencies = [
"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af" "checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af"
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8" "checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7d4ca3c9586d2c1f742284f032e328313ea55f3f60a3b0a17e2ca1a2bf9ae22" "checksum rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)" = "<none>"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" "checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum safemem 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e133ccc4f4d1cd4f89cc8a7ff618287d56dc7f638b8e38fc32c5fdcadc339dd5" "checksum safemem 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e133ccc4f4d1cd4f89cc8a7ff618287d56dc7f638b8e38fc32c5fdcadc339dd5"
"checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421" "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"

View File

@ -394,6 +394,10 @@ mod tests {
Res { Res {
loc: fixtures().join("sgml_description.json"), loc: fixtures().join("sgml_description.json"),
at: 0 at: 0
},
Res {
loc: fixtures().join("utf16.ini"),
at: 0
} }
] ]
); );

View File

@ -32,6 +32,10 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
for p in path.split(".") { for p in path.split(".") {
match current.get_data_by_key(p) { match current.get_data_by_key(p) {
Some(v) => current = v, Some(v) => current = v,
None => {
// Before we give up, see if they gave us a path that matches a field name by itself
match obj.get_data_by_key(&path.item) {
Some(v) => return Ok(v.clone()),
None => { None => {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
"Unknown field", "Unknown field",
@ -41,6 +45,8 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
} }
} }
} }
}
}
Ok(current.clone()) Ok(current.clone())
} }
@ -49,11 +55,6 @@ pub fn get(
GetArgs { rest: fields }: GetArgs, GetArgs { rest: fields }: GetArgs,
RunnableContext { input, .. }: RunnableContext, RunnableContext { input, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
// If it's a number, get the row instead of the column
// if let Some(amount) = amount {
// return Ok(input.values.skip(amount as u64).take(1).from_input_stream());
// }
let stream = input let stream = input
.values .values
.map(move |item| { .map(move |item| {

View File

@ -9,7 +9,6 @@ use mime::Mime;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use uuid::Uuid; use uuid::Uuid;
pub struct Open; pub struct Open;
#[derive(Deserialize)] #[derive(Deserialize)]
@ -229,6 +228,25 @@ pub fn fetch(
if let Ok(cwd) = dunce::canonicalize(cwd) { if let Ok(cwd) = dunce::canonicalize(cwd) {
match std::fs::read(&cwd) { match std::fs::read(&cwd) {
Ok(bytes) => match std::str::from_utf8(&bytes) { Ok(bytes) => match std::str::from_utf8(&bytes) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => {
//Non utf8 data.
match (bytes.get(0), bytes.get(1)) {
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
// Possibly UTF-16 little endian
let utf16 = read_le_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok(( Ok(s) => Ok((
cwd.extension() cwd.extension()
.map(|name| name.to_string_lossy().to_string()), .map(|name| name.to_string_lossy().to_string()),
@ -248,6 +266,68 @@ pub fn fetch(
}, },
SpanSource::File(cwd.to_string_lossy().to_string()), SpanSource::File(cwd.to_string_lossy().to_string()),
)), )),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
// Possibly UTF-16 big endian
let utf16 = read_be_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
_ => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
}
}, },
Err(_) => { Err(_) => {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
@ -267,6 +347,36 @@ pub fn fetch(
} }
} }
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
pub fn parse_as_value( pub fn parse_as_value(
extension: Option<String>, extension: Option<String>,
contents: String, contents: String,

View File

@ -71,6 +71,17 @@ fn open_can_parse_ini() {
assert_eq!(output, "1234") assert_eq!(output, "1234")
} }
#[test]
fn open_can_parse_utf16_ini() {
nu!(
output,
cwd("tests/fixtures/formats"),
"open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it"
);
assert_eq!(output, "-236")
}
#[test] #[test]
fn open_error_if_file_not_found() { fn open_error_if_file_not_found() {
nu_error!( nu_error!(

BIN
tests/fixtures/formats/utf16.ini vendored Normal file

Binary file not shown.