forked from extern/nushell
Merge pull request #279 from jonathandturner/utf16
Add support for utf16 files
This commit is contained in:
commit
7bc9b7a758
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -1979,7 +1979,7 @@ dependencies = [
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"roxmltree 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)",
|
||||
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde-hjson 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -2790,7 +2790,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "rustyline"
|
||||
version = "5.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
source = "git+https://github.com/kkawakam/rustyline.git#568c9d0512b065e9eef68a6e46407881d2376738"
|
||||
dependencies = [
|
||||
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.60 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -4036,7 +4036,7 @@ dependencies = [
|
||||
"checksum rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f4dccf6f4891ebcc0c39f9b6eb1a83b9bf5d747cb439ec6fba4f3b977038af"
|
||||
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
|
||||
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
"checksum rustyline 5.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7d4ca3c9586d2c1f742284f032e328313ea55f3f60a3b0a17e2ca1a2bf9ae22"
|
||||
"checksum rustyline 5.0.1 (git+https://github.com/kkawakam/rustyline.git)" = "<none>"
|
||||
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
|
||||
"checksum safemem 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e133ccc4f4d1cd4f89cc8a7ff618287d56dc7f638b8e38fc32c5fdcadc339dd5"
|
||||
"checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"
|
||||
|
@ -394,6 +394,10 @@ mod tests {
|
||||
Res {
|
||||
loc: fixtures().join("sgml_description.json"),
|
||||
at: 0
|
||||
},
|
||||
Res {
|
||||
loc: fixtures().join("utf16.ini"),
|
||||
at: 0
|
||||
}
|
||||
]
|
||||
);
|
||||
|
@ -32,6 +32,10 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
|
||||
for p in path.split(".") {
|
||||
match current.get_data_by_key(p) {
|
||||
Some(v) => current = v,
|
||||
None => {
|
||||
// Before we give up, see if they gave us a path that matches a field name by itself
|
||||
match obj.get_data_by_key(&path.item) {
|
||||
Some(v) => return Ok(v.clone()),
|
||||
None => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Unknown field",
|
||||
@ -41,6 +45,8 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(current.clone())
|
||||
}
|
||||
@ -49,11 +55,6 @@ pub fn get(
|
||||
GetArgs { rest: fields }: GetArgs,
|
||||
RunnableContext { input, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
// If it's a number, get the row instead of the column
|
||||
// if let Some(amount) = amount {
|
||||
// return Ok(input.values.skip(amount as u64).take(1).from_input_stream());
|
||||
// }
|
||||
|
||||
let stream = input
|
||||
.values
|
||||
.map(move |item| {
|
||||
|
@ -9,7 +9,6 @@ use mime::Mime;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct Open;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@ -229,6 +228,25 @@ pub fn fetch(
|
||||
if let Ok(cwd) = dunce::canonicalize(cwd) {
|
||||
match std::fs::read(&cwd) {
|
||||
Ok(bytes) => match std::str::from_utf8(&bytes) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
Value::string(s),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
)),
|
||||
Err(_) => {
|
||||
//Non utf8 data.
|
||||
match (bytes.get(0), bytes.get(1)) {
|
||||
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
|
||||
// Possibly UTF-16 little endian
|
||||
let utf16 = read_le_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
@ -248,6 +266,68 @@ pub fn fetch(
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
Value::Binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
))
|
||||
}
|
||||
}
|
||||
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
|
||||
// Possibly UTF-16 big endian
|
||||
let utf16 = read_be_u16(&bytes[2..]);
|
||||
|
||||
if let Some(utf16) = utf16 {
|
||||
match std::string::String::from_utf16(&utf16) {
|
||||
Ok(s) => Ok((
|
||||
cwd.extension()
|
||||
.map(|name| name.to_string_lossy().to_string()),
|
||||
Value::string(s),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
)),
|
||||
Err(_) => Ok((
|
||||
None,
|
||||
Value::Binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
None,
|
||||
Value::Binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => Ok((
|
||||
None,
|
||||
Value::Binary(bytes),
|
||||
Tag {
|
||||
span,
|
||||
origin: Some(Uuid::new_v4()),
|
||||
},
|
||||
SpanSource::File(cwd.to_string_lossy().to_string()),
|
||||
)),
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
@ -267,6 +347,36 @@ pub fn fetch(
|
||||
}
|
||||
}
|
||||
|
||||
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
|
||||
if input.len() % 2 != 0 || input.len() < 2 {
|
||||
None
|
||||
} else {
|
||||
let mut result = vec![];
|
||||
let mut pos = 0;
|
||||
while pos < input.len() {
|
||||
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
|
||||
if input.len() % 2 != 0 || input.len() < 2 {
|
||||
None
|
||||
} else {
|
||||
let mut result = vec![];
|
||||
let mut pos = 0;
|
||||
while pos < input.len() {
|
||||
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_as_value(
|
||||
extension: Option<String>,
|
||||
contents: String,
|
||||
|
@ -71,6 +71,17 @@ fn open_can_parse_ini() {
|
||||
assert_eq!(output, "1234")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_can_parse_utf16_ini() {
|
||||
nu!(
|
||||
output,
|
||||
cwd("tests/fixtures/formats"),
|
||||
"open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it"
|
||||
);
|
||||
|
||||
assert_eq!(output, "-236")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_error_if_file_not_found() {
|
||||
nu_error!(
|
||||
|
BIN
tests/fixtures/formats/utf16.ini
vendored
Normal file
BIN
tests/fixtures/formats/utf16.ini
vendored
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user