Fix nushell#10591: encode returns error with utf-16le and utf-16be encodings (nushell#10591) (#12411)

# Description

This closes (nushell#10591)

The Command encode's help text says that utf-16le and utf-16be encodings
are not supported, however you could still use these encodings and they
didn't work properly, since they returned the bytes UTF-8 encoded:
```bash
"䆺ש" | encode utf-16
Length: 5 (0x5) bytes | printable whitespace ascii_other non_ascii
00000000: e4 86 ba d7 a9 ×××××
 ```
# User-Facing Changes

The Command encode's help text was updated and now when trying to encode with utf-16le and utf-16be returns an error:
![screenshot](https://github.com/nushell/nushell/assets/119532691/c346dc57-8b42-4dfc-93d5-638b0041d89f)

# Tests + Formatting

- 🟢 `toolkit fmt`
- 🟢 `toolkit clippy`
- 🟢 `toolkit test`
- 🟢 `toolkit test stdlib`
This commit is contained in:
Rodrigo Friães 2024-04-06 15:07:55 +01:00 committed by GitHub
parent 7a7d43344e
commit e211e96d33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 3 deletions

View File

@ -37,6 +37,7 @@ impl Command for Encode {
big5, euc-jp, euc-kr, gbk, iso-8859-1, cp1252, latin5 big5, euc-jp, euc-kr, gbk, iso-8859-1, cp1252, latin5
Note that since the Encoding Standard doesn't specify encoders for utf-16le and utf-16be, these are not yet supported. Note that since the Encoding Standard doesn't specify encoders for utf-16le and utf-16be, these are not yet supported.
More information can be found here: https://docs.rs/encoding_rs/latest/encoding_rs/#utf-16le-utf-16be-and-unicode-encoding-schemes
For a more complete list of encodings, please refer to the encoding_rs For a more complete list of encodings, please refer to the encoding_rs
documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"# documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"#

View File

@ -50,6 +50,19 @@ pub fn encode(
} else { } else {
parse_encoding(encoding_name.span, &encoding_name.item) parse_encoding(encoding_name.span, &encoding_name.item)
}?; }?;
// Since the Encoding Standard doesn't specify encoders for "UTF-16BE" and "UTF-16LE"
// Check if the encoding is one of them and return an error
if ["UTF-16BE", "UTF-16LE"].contains(&encoding.name()) {
return Err(ShellError::GenericError {
error: format!(r#"{} encoding is not supported"#, &encoding_name.item),
msg: "invalid encoding".into(),
span: Some(encoding_name.span),
help: Some("refer to https://docs.rs/encoding_rs/latest/encoding_rs/index.html#statics for a valid list of encodings".into()),
inner: vec![],
});
}
let (result, _actual_encoding, replacements) = encoding.encode(s); let (result, _actual_encoding, replacements) = encoding.encode(s);
// Because encoding_rs is a Web-facing crate, it defaults to replacing unknowns with HTML entities. // Because encoding_rs is a Web-facing crate, it defaults to replacing unknowns with HTML entities.
// This behaviour can be enabled with -i. Otherwise, it becomes an error. // This behaviour can be enabled with -i. Otherwise, it becomes an error.
@ -102,9 +115,7 @@ mod test {
#[case::iso_8859_1("iso-8859-1", "Some ¼½¿ Data µ¶·¸¹º")] #[case::iso_8859_1("iso-8859-1", "Some ¼½¿ Data µ¶·¸¹º")]
#[case::cp1252("cp1252", "Some ¼½¿ Data")] #[case::cp1252("cp1252", "Some ¼½¿ Data")]
#[case::latin5("latin5", "Some ¼½¿ Data µ¶·¸¹º")] #[case::latin5("latin5", "Some ¼½¿ Data µ¶·¸¹º")]
// Tests for specific renditions of UTF-16 and UTF-8 labels // Tests for specific renditions of UTF-8 labels
#[case::utf16("utf16", "")]
#[case::utf_hyphen_16("utf-16", "")]
#[case::utf8("utf8", "")] #[case::utf8("utf8", "")]
#[case::utf_hyphen_8("utf-8", "")] #[case::utf_hyphen_8("utf-8", "")]
fn smoke(#[case] encoding: String, #[case] expected: &str) { fn smoke(#[case] encoding: String, #[case] expected: &str) {