Add support for utf16 files

This commit is contained in:
Jonathan Turner
2019-08-12 16:11:42 +12:00
parent 7c4a4ec62e
commit 6cf3dc92fc
6 changed files with 149 additions and 23 deletions

View File

@@ -394,6 +394,10 @@ mod tests {
Res {
loc: fixtures().join("sgml_description.json"),
at: 0
},
Res {
loc: fixtures().join("utf16.ini"),
at: 0
}
]
);

View File

@@ -33,11 +33,17 @@ fn get_member(path: &Tagged<String>, obj: &Tagged<Value>) -> Result<Tagged<Value
match current.get_data_by_key(p) {
Some(v) => current = v,
None => {
return Err(ShellError::labeled_error(
"Unknown field",
"object missing field",
path.span(),
));
// Before we give up, see if they gave us a path that matches a field name by itself
match obj.get_data_by_key(&path.item) {
Some(v) => return Ok(v.clone()),
None => {
return Err(ShellError::labeled_error(
"Unknown field",
"object missing field",
path.span(),
));
}
}
}
}
}
@@ -49,11 +55,6 @@ pub fn get(
GetArgs { rest: fields }: GetArgs,
RunnableContext { input, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
// If it's a number, get the row instead of the column
// if let Some(amount) = amount {
// return Ok(input.values.skip(amount as u64).take(1).from_input_stream());
// }
let stream = input
.values
.map(move |item| {

View File

@@ -9,7 +9,6 @@ use mime::Mime;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use uuid::Uuid;
pub struct Open;
#[derive(Deserialize)]
@@ -239,15 +238,96 @@ pub fn fetch(
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => {
//Non utf8 data.
match (bytes.get(0), bytes.get(1)) {
(Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
// Possibly UTF-16 little endian
let utf16 = read_le_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
(Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
// Possibly UTF-16 big endian
let utf16 = read_be_u16(&bytes[2..]);
if let Some(utf16) = utf16 {
match std::string::String::from_utf16(&utf16) {
Ok(s) => Ok((
cwd.extension()
.map(|name| name.to_string_lossy().to_string()),
Value::string(s),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
Err(_) => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
} else {
Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
))
}
}
_ => Ok((
None,
Value::Binary(bytes),
Tag {
span,
origin: Some(Uuid::new_v4()),
},
SpanSource::File(cwd.to_string_lossy().to_string()),
)),
}
}
},
Err(_) => {
return Err(ShellError::labeled_error(
@@ -267,6 +347,36 @@ pub fn fetch(
}
}
fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
if input.len() % 2 != 0 || input.len() < 2 {
None
} else {
let mut result = vec![];
let mut pos = 0;
while pos < input.len() {
result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
pos += 2;
}
Some(result)
}
}
pub fn parse_as_value(
extension: Option<String>,
contents: String,