mirror of
https://github.com/nushell/nushell.git
synced 2024-11-22 08:23:24 +01:00
Fix nushell#10591: encode returns error with utf-16le and utf-16be encodings (nushell#10591) (#12411)
# Description This closes (nushell#10591) The Command encode's help text says that utf-16le and utf-16be encodings are not supported, however you could still use these encodings and they didn't work properly, since they returned the bytes UTF-8 encoded: ```bash "䆺ש" | encode utf-16 Length: 5 (0x5) bytes | printable whitespace ascii_other non_ascii 00000000: e4 86 ba d7 a9 ××××× ``` # User-Facing Changes The Command encode's help text was updated and now when trying to encode with utf-16le and utf-16be returns an error: ![screenshot](https://github.com/nushell/nushell/assets/119532691/c346dc57-8b42-4dfc-93d5-638b0041d89f) # Tests + Formatting - 🟢 `toolkit fmt` - 🟢 `toolkit clippy` - 🟢 `toolkit test` - 🟢 `toolkit test stdlib`
This commit is contained in:
parent
7a7d43344e
commit
e211e96d33
@ -37,6 +37,7 @@ impl Command for Encode {
|
||||
big5, euc-jp, euc-kr, gbk, iso-8859-1, cp1252, latin5
|
||||
|
||||
Note that since the Encoding Standard doesn't specify encoders for utf-16le and utf-16be, these are not yet supported.
|
||||
More information can be found here: https://docs.rs/encoding_rs/latest/encoding_rs/#utf-16le-utf-16be-and-unicode-encoding-schemes
|
||||
|
||||
For a more complete list of encodings, please refer to the encoding_rs
|
||||
documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"#
|
||||
|
@ -50,6 +50,19 @@ pub fn encode(
|
||||
} else {
|
||||
parse_encoding(encoding_name.span, &encoding_name.item)
|
||||
}?;
|
||||
|
||||
// Since the Encoding Standard doesn't specify encoders for "UTF-16BE" and "UTF-16LE"
|
||||
// Check if the encoding is one of them and return an error
|
||||
if ["UTF-16BE", "UTF-16LE"].contains(&encoding.name()) {
|
||||
return Err(ShellError::GenericError {
|
||||
error: format!(r#"{} encoding is not supported"#, &encoding_name.item),
|
||||
msg: "invalid encoding".into(),
|
||||
span: Some(encoding_name.span),
|
||||
help: Some("refer to https://docs.rs/encoding_rs/latest/encoding_rs/index.html#statics for a valid list of encodings".into()),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let (result, _actual_encoding, replacements) = encoding.encode(s);
|
||||
// Because encoding_rs is a Web-facing crate, it defaults to replacing unknowns with HTML entities.
|
||||
// This behaviour can be enabled with -i. Otherwise, it becomes an error.
|
||||
@ -102,9 +115,7 @@ mod test {
|
||||
#[case::iso_8859_1("iso-8859-1", "Some ¼½¿ Data µ¶·¸¹º")]
|
||||
#[case::cp1252("cp1252", "Some ¼½¿ Data")]
|
||||
#[case::latin5("latin5", "Some ¼½¿ Data µ¶·¸¹º")]
|
||||
// Tests for specific renditions of UTF-16 and UTF-8 labels
|
||||
#[case::utf16("utf16", "")]
|
||||
#[case::utf_hyphen_16("utf-16", "")]
|
||||
// Tests for specific renditions of UTF-8 labels
|
||||
#[case::utf8("utf8", "")]
|
||||
#[case::utf_hyphen_8("utf-8", "")]
|
||||
fn smoke(#[case] encoding: String, #[case] expected: &str) {
|
||||
|
Loading…
Reference in New Issue
Block a user