Deprecate hash base64, extend decode and add encode commands (#5863)

* feat: deprecate `hash base64` command

* feat: extend `decode` and `encode` command families

This commit
- Adds `encode` command family
- Backports `hash base64` features to `encode base64` and `decode base64` subcommands.
- Refactors code a bit and extends tests for encodings
- `decode base64` returns a binary `Value` (that may be decoded into a string using `decode` command)

* feat: add `--binary(-b)` flag to `decode base64`

Default output type is now string, but binary can be requested using this new flag.
This commit is contained in:
Benoît Cortier
2022-06-25 17:35:23 -04:00
committed by GitHub
parent f2989bf704
commit 173d60d59d
16 changed files with 466 additions and 192 deletions

View File

@ -0,0 +1,330 @@
use base64::{decode_config, encode_config};
use nu_engine::CallExt;
use nu_protocol::ast::{Call, CellPath};
use nu_protocol::engine::{EngineState, Stack};
use nu_protocol::{PipelineData, ShellError, Span, Spanned, Value};
pub const CHARACTER_SET_DESC: &str = "specify the character rules for encoding the input.\n\
\tValid values are 'standard', 'standard-no-padding', 'url-safe', 'url-safe-no-padding',\
'binhex', 'bcrypt', 'crypt'";
#[derive(Clone)]
pub struct Base64Config {
pub character_set: Spanned<String>,
pub action_type: ActionType,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum ActionType {
Encode,
Decode,
}
pub fn operate(
action_type: ActionType,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let character_set: Option<Spanned<String>> =
call.get_flag(engine_state, stack, "character-set")?;
let binary = call.has_flag("binary");
let column_paths: Vec<CellPath> = call.rest(engine_state, stack, 0)?;
// Default the character set to standard if the argument is not specified.
let character_set = match character_set {
Some(inner_tag) => inner_tag,
None => Spanned {
item: "standard".to_string(),
span: head, // actually this span is always useless, because default character_set is always valid.
},
};
let encoding_config = Base64Config {
character_set,
action_type,
};
input.map(
move |v| {
if column_paths.is_empty() {
match action(&v, binary, &encoding_config, &head) {
Ok(v) => v,
Err(e) => Value::Error { error: e },
}
} else {
let mut ret = v;
for path in &column_paths {
let config = encoding_config.clone();
let r = ret.update_cell_path(
&path.members,
Box::new(move |old| match action(old, binary, &config, &head) {
Ok(v) => v,
Err(e) => Value::Error { error: e },
}),
);
if let Err(error) = r {
return Value::Error { error };
}
}
ret
}
},
engine_state.ctrlc.clone(),
)
}
fn action(
input: &Value,
// only used for `decode` action
output_binary: bool,
base64_config: &Base64Config,
command_span: &Span,
) -> Result<Value, ShellError> {
let config_character_set = &base64_config.character_set;
let base64_config_enum: base64::Config = match config_character_set.item.as_str() {
"standard" => base64::STANDARD,
"standard-no-padding" => base64::STANDARD_NO_PAD,
"url-safe" => base64::URL_SAFE,
"url-safe-no-padding" => base64::URL_SAFE_NO_PAD,
"binhex" => base64::BINHEX,
"bcrypt" => base64::BCRYPT,
"crypt" => base64::CRYPT,
not_valid => return Err(ShellError::GenericError(
"value is not an accepted character set".to_string(),
format!(
"{} is not a valid character-set.\nPlease use `help hash base64` to see a list of valid character sets.",
not_valid
),
Some(config_character_set.span),
None,
Vec::new(),
))
};
match input {
Value::Binary { val, .. } => match base64_config.action_type {
ActionType::Encode => Ok(Value::string(
encode_config(&val, base64_config_enum),
*command_span,
)),
ActionType::Decode => Err(ShellError::UnsupportedInput(
"Binary data can only support encoding".to_string(),
*command_span,
)),
},
Value::String {
val,
span: value_span,
} => {
match base64_config.action_type {
ActionType::Encode => Ok(Value::string(
encode_config(&val, base64_config_enum),
*command_span,
)),
ActionType::Decode => {
// for decode, input val may contains invalid new line character, which is ok to omitted them by default.
let val = val.clone();
let val = val.replace("\r\n", "").replace('\n', "");
match decode_config(&val, base64_config_enum) {
Ok(decoded_value) => {
if output_binary {
Ok(Value::binary(decoded_value, *command_span))
} else {
match String::from_utf8(decoded_value) {
Ok(string_value) => {
Ok(Value::string(string_value, *command_span))
}
Err(e) => Err(ShellError::GenericError(
"base64 payload isn't a valid utf-8 sequence".to_owned(),
e.to_string(),
Some(*value_span),
Some("consider using the `--binary` flag".to_owned()),
Vec::new(),
)),
}
}
}
Err(_) => Err(ShellError::GenericError(
"value could not be base64 decoded".to_string(),
format!(
"invalid base64 input for character set {}",
&config_character_set.item
),
Some(*command_span),
None,
Vec::new(),
)),
}
}
}
}
other => Err(ShellError::TypeMismatch(
format!("value is {}, not string", other.get_type()),
other.span()?,
)),
}
}
#[cfg(test)]
mod tests {
use super::{action, ActionType, Base64Config};
use nu_protocol::{Span, Spanned, Value};
#[test]
fn base64_encode_standard() {
let word = Value::string("Some Data Padding", Span::test_data());
let expected = Value::string("U29tZSBEYXRhIFBhZGRpbmc=", Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "standard".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Encode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_encode_standard_no_padding() {
let word = Value::string("Some Data Padding", Span::test_data());
let expected = Value::string("U29tZSBEYXRhIFBhZGRpbmc", Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "standard-no-padding".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Encode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_encode_url_safe() {
let word = Value::string("this is for url", Span::test_data());
let expected = Value::string("dGhpcyBpcyBmb3IgdXJs", Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "url-safe".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Encode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_decode_binhex() {
let word = Value::string("A5\"KC9jRB@IIF'8bF!", Span::test_data());
let expected = Value::binary(b"a binhex test".as_slice(), Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "binhex".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Decode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_decode_binhex_with_new_line_input() {
let word = Value::string("A5\"KC9jRB\n@IIF'8bF!", Span::test_data());
let expected = Value::binary(b"a binhex test".as_slice(), Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "binhex".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Decode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_encode_binary() {
let word = Value::Binary {
val: vec![77, 97, 110],
span: Span::test_data(),
};
let expected = Value::string("TWFu", Span::test_data());
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "standard".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Encode,
},
&Span::test_data(),
)
.unwrap();
assert_eq!(actual, expected);
}
#[test]
fn base64_decode_binary_expect_error() {
let word = Value::Binary {
val: vec![77, 97, 110],
span: Span::test_data(),
};
let actual = action(
&word,
true,
&Base64Config {
character_set: Spanned {
item: "standard".to_string(),
span: Span::test_data(),
},
action_type: ActionType::Decode,
},
&Span::test_data(),
);
assert!(actual.is_err())
}
}

View File

@ -1,10 +1,9 @@
use encoding_rs::Encoding;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Value,
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Value,
};
#[derive(Clone)]
@ -38,11 +37,21 @@ documentation link at https://docs.rs/encoding_rs/0.8.28/encoding_rs/#statics"#
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Decode the output of an external command",
example: "cat myfile.q | decode utf-8",
result: None,
}]
vec![
Example {
description: "Decode the output of an external command",
example: "^cat myfile.q | decode utf-8",
result: None,
},
Example {
description: "Decode an UTF-16 string into nushell UTF-8 string",
example: r#"0x[00 53 00 6F 00 6D 00 65 00 20 00 44 00 61 00 74 00 61] | decode utf-16be"#,
result: Some(Value::String {
val: "Some Data".to_owned(),
span: Span::test_data(),
}),
},
]
}
fn run(
@ -62,51 +71,10 @@ documentation link at https://docs.rs/encoding_rs/0.8.28/encoding_rs/#statics"#
..
} => {
let bytes: Vec<u8> = stream.into_bytes()?.item;
let encoding = match Encoding::for_label(encoding.item.as_bytes()) {
None => Err(ShellError::GenericError(
format!(
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
encoding.item
),
"invalid encoding".into(),
Some(encoding.span),
None,
Vec::new(),
)),
Some(encoding) => Ok(encoding),
}?;
let result = encoding.decode(&bytes);
Ok(Value::String {
val: result.0.to_string(),
span: head,
}
.into_pipeline_data())
super::encoding::decode(head, encoding, &bytes).map(|val| val.into_pipeline_data())
}
PipelineData::Value(Value::Binary { val: bytes, .. }, ..) => {
let encoding = match Encoding::for_label(encoding.item.as_bytes()) {
None => Err(ShellError::GenericError(
format!(
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
encoding.item
),
"invalid encoding".into(),
Some(encoding.span),
None,
Vec::new(),
)),
Some(encoding) => Ok(encoding),
}?;
let result = encoding.decode(&bytes);
Ok(Value::String {
val: result.0.to_string(),
span: head,
}
.into_pipeline_data())
super::encoding::decode(head, encoding, &bytes).map(|val| val.into_pipeline_data())
}
_ => Err(ShellError::UnsupportedInput(
"non-binary input".into(),

View File

@ -0,0 +1,90 @@
use super::base64::{operate, ActionType, CHARACTER_SET_DESC};
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct DecodeBase64;
impl Command for DecodeBase64 {
fn name(&self) -> &str {
"decode base64"
}
fn signature(&self) -> Signature {
Signature::build("decode base64")
.named(
"character-set",
SyntaxShape::String,
CHARACTER_SET_DESC,
Some('c'),
)
.switch(
"binary",
"do not decode payload as UTF-8 and output binary",
Some('b'),
)
.rest(
"rest",
SyntaxShape::CellPath,
"optionally base64 decode data by column paths",
)
.category(Category::Hash)
}
fn usage(&self) -> &str {
"base64 decode a value"
}
fn extra_usage(&self) -> &str {
r#"Will attempt to decode binary payload as an UTF-8 string by default. Use the `--binary(-b)` argument to force binary output."#
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Base64 decode a value and output as UTF-8 string",
example: "echo 'U29tZSBEYXRh' | decode base64",
result: Some(Value::string("Some Data", Span::test_data())),
},
Example {
description: "Base64 decode a value and output as binary",
example: "echo 'U29tZSBEYXRh' | decode base64 --binary",
result: Some(Value::binary(
[0x53, 0x6f, 0x6d, 0x65, 0x20, 0x44, 0x61, 0x74, 0x61],
Span::test_data(),
)),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
operate(ActionType::Decode, engine_state, stack, call, input)
}
fn input_type(&self) -> Type {
Type::Any
}
fn output_type(&self) -> Type {
Type::Any
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
crate::test_examples(DecodeBase64)
}
}

View File

@ -0,0 +1,95 @@
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Value,
};
#[derive(Clone)]
pub struct Encode;
impl Command for Encode {
fn name(&self) -> &str {
"encode"
}
fn usage(&self) -> &str {
"Encode an UTF-8 string into other kind of representations."
}
fn search_terms(&self) -> Vec<&str> {
vec!["text", "encoding", "decoding"]
}
fn signature(&self) -> nu_protocol::Signature {
Signature::build("encode")
.required("encoding", SyntaxShape::String, "the text encoding to use")
.category(Category::Strings)
}
fn extra_usage(&self) -> &str {
r#"Multiple encodings are supported, here is an example of a few:
big5, euc-jp, euc-kr, gbk, iso-8859-1, cp1252, latin5
Note that since the Encoding Standard doesn't specify encoders for utf-16le and utf-16be, these are not yet supported.
For a more complete list of encodings please refer to the encoding_rs
documentation link at https://docs.rs/encoding_rs/0.8.28/encoding_rs/#statics"#
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Encode an UTF-8 string into Shift-JIS",
example: r#"echo "" | encode shift-jis"#,
result: Some(Value::Binary {
val: vec![
0x95, 0x89, 0x82, 0xaf, 0x82, 0xe9, 0x82, 0xc6, 0x92, 0x6d, 0x82, 0xc1, 0x82,
0xc4, 0x90, 0xed, 0x82, 0xa4, 0x82, 0xcc, 0x82, 0xaa, 0x81, 0x41, 0x97, 0x79,
0x82, 0xa9, 0x82, 0xc9, 0x94, 0xfc, 0x82, 0xb5, 0x82, 0xa2, 0x82, 0xcc, 0x82,
0xbe,
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let encoding: Spanned<String> = call.req(engine_state, stack, 0)?;
match input {
PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::new(call.head)),
PipelineData::ExternalStream {
stdout: Some(stream),
..
} => {
let s = stream.into_string()?.item;
super::encoding::encode(head, encoding, &s).map(|val| val.into_pipeline_data())
}
PipelineData::Value(Value::String { val: s, .. }, ..) => {
super::encoding::encode(head, encoding, &s).map(|val| val.into_pipeline_data())
}
_ => Err(ShellError::UnsupportedInput(
"non-string input".into(),
head,
)),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
crate::test_examples(Encode)
}
}

View File

@ -0,0 +1,78 @@
use super::base64::{operate, ActionType, CHARACTER_SET_DESC};
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct EncodeBase64;
impl Command for EncodeBase64 {
fn name(&self) -> &str {
"encode base64"
}
fn signature(&self) -> Signature {
Signature::build("encode base64")
.named(
"character-set",
SyntaxShape::String,
CHARACTER_SET_DESC,
Some('c'),
)
.rest(
"rest",
SyntaxShape::CellPath,
"optionally base64 encode data by column paths",
)
.category(Category::Hash)
}
fn usage(&self) -> &str {
"base64 encode a value"
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Base64 encode a string with default settings",
example: "echo 'Some Data' | encode base64",
result: Some(Value::string("U29tZSBEYXRh", Span::test_data())),
},
Example {
description: "Base64 encode a string with the binhex character set",
example: "echo 'Some Data' | encode base64 --character-set binhex",
result: Some(Value::string(r#"7epXB5"%A@4J"#, Span::test_data())),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
operate(ActionType::Encode, engine_state, stack, call, input)
}
fn input_type(&self) -> Type {
Type::Any
}
fn output_type(&self) -> Type {
Type::String
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples() {
crate::test_examples(EncodeBase64)
}
}

View File

@ -0,0 +1,67 @@
use encoding_rs::Encoding;
use nu_protocol::{ShellError, Span, Spanned, Value};
pub fn decode(head: Span, encoding: Spanned<String>, bytes: &[u8]) -> Result<Value, ShellError> {
let encoding = parse_encoding(encoding.span, &encoding.item)?;
let (result, ..) = encoding.decode(bytes);
Ok(Value::String {
val: result.into_owned(),
span: head,
})
}
pub fn encode(head: Span, encoding: Spanned<String>, s: &str) -> Result<Value, ShellError> {
let encoding = parse_encoding(encoding.span, &encoding.item)?;
let (result, ..) = encoding.encode(s);
Ok(Value::Binary {
val: result.into_owned(),
span: head,
})
}
fn parse_encoding(span: Span, label: &str) -> Result<&'static Encoding, ShellError> {
match Encoding::for_label_no_replacement(label.as_bytes()) {
None => Err(ShellError::GenericError(
format!(
r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
label
),
"invalid encoding".into(),
Some(span),
None,
Vec::new(),
)),
Some(encoding) => Ok(encoding),
}
}
#[cfg(test)]
mod test {
use super::*;
use rstest::rstest;
#[rstest]
#[case::big5("big5", "簡体字")]
#[case::shift_jis("shift-jis", "何だと?……無駄な努力だ?……百も承知だ!")]
#[case::euc_jp("euc-jp", "だがな、勝つ望みがある時ばかり、戦うのとは訳が違うぞ!")]
#[case::euc_kr("euc-kr", "가셨어요?")]
#[case::gbk("gbk", "簡体字")]
#[case::iso_8859_1("iso-8859-1", "Some ¼½¿ Data µ¶·¸¹º")]
#[case::cp1252("cp1252", "Some ¼½¿ Data")]
#[case::latin5("latin5", "Some ¼½¿ Data µ¶·¸¹º")]
fn smoke(#[case] encoding: String, #[case] expected: &str) {
let test_span = Span::test_data();
let encoding = Spanned {
item: encoding,
span: test_span,
};
let encoded = encode(test_span, encoding.clone(), expected).unwrap();
let encoded = encoded.as_binary().unwrap();
let decoded = decode(test_span, encoding, encoded).unwrap();
let decoded = decoded.as_string().unwrap();
assert_eq!(decoded, expected);
}
}

View File

@ -0,0 +1,11 @@
mod base64;
mod decode;
mod decode_base64;
mod encode;
mod encode_base64;
mod encoding;
pub use self::decode::Decode;
pub use self::decode_base64::DecodeBase64;
pub use self::encode::Encode;
pub use self::encode_base64::EncodeBase64;

View File

@ -1,7 +1,7 @@
mod build_string;
mod char_;
mod decode;
mod detect_columns;
mod encode_decode;
mod format;
mod parse;
mod size;
@ -10,8 +10,8 @@ mod str_;
pub use build_string::BuildString;
pub use char_::Char;
pub use decode::*;
pub use detect_columns::*;
pub use encode_decode::*;
pub use format::*;
pub use parse::*;
pub use size::Size;