encode/decode for multiple alphabets (#13428)

Based on the discussion in #13419.


## Description

Reworks the `decode`/`encode` commands by adding/changing the following
bases:

- `base32`
- `base32hex`
- `hex`
- `new-base64`

The `hex` base is compatible with the previous version of `hex` out of
the box (it only adds more flags). `base64` isn't, so the PR adds a new
version and deprecates the old one.

All commands have `string -> binary` signature for decoding and `string
| binary -> string` signature for encoding. A few `base64` encodings,
which are not a part of the
[RFC4648](https://datatracker.ietf.org/doc/html/rfc4648#section-6), have
been dropped.


## Example usage

```Nushell
~/fork/nushell> "string" | encode base32 | decode base32 | decode
string
```

```Nushell
~/fork/nushell> "ORSXG5A=" | decode base32
# `decode` always returns a binary value
Length: 4 (0x4) bytes | printable whitespace ascii_other non_ascii
00000000:   74 65 73 74                                          test
```


## User-Facing Changes

- New commands: `encode/decode base32/base32hex`.
- `encode hex` gets a `--lower` flag.
- `encode/decode base64` deprecated in favor of `encode/decode
new-base64`.
This commit is contained in:
Andrej Kolčin
2024-08-23 19:18:51 +03:00
committed by GitHub
parent 39b0f3bdda
commit 0560826414
23 changed files with 1122 additions and 210 deletions

View File

@ -0,0 +1,180 @@
use data_encoding::Encoding;
use nu_engine::command_prelude::*;
const EXTRA_USAGE: &str = r"The default alphabet is taken from RFC 4648, section 6.
Note this command will collect stream input.";
#[derive(Clone)]
pub struct DecodeBase32;
impl Command for DecodeBase32 {
fn name(&self) -> &str {
"decode base32"
}
fn signature(&self) -> Signature {
Signature::build("decode base32")
.input_output_types(vec![(Type::String, Type::Binary)])
.allow_variants_without_examples(true)
.switch("nopad", "Do not pad the output.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Decode a Base32 value."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Decode arbitrary binary data",
example: r#""AEBAGBAF" | decode base32"#,
result: Some(Value::test_binary(vec![1, 2, 3, 4, 5])),
},
Example {
description: "Decode an encoded string",
example: r#""NBUQ====" | decode base32 | decode"#,
result: None,
},
Example {
description: "Parse a string without padding",
example: r#""NBUQ" | decode base32 --nopad"#,
result: Some(Value::test_binary(vec![0x68, 0x69])),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag(engine_state, stack, "nopad")? {
data_encoding::BASE32_NOPAD
} else {
data_encoding::BASE32
};
super::decode(encoding, call.span(), input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag_const(working_set, "nopad")? {
data_encoding::BASE32_NOPAD
} else {
data_encoding::BASE32
};
super::decode(encoding, call.span(), input)
}
}
#[derive(Clone)]
pub struct EncodeBase32;
impl Command for EncodeBase32 {
fn name(&self) -> &str {
"encode base32"
}
fn signature(&self) -> Signature {
Signature::build("encode base32")
.input_output_types(vec![
(Type::String, Type::String),
(Type::Binary, Type::String),
])
.switch("nopad", "Don't accept padding.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Encode a string or binary value using Base32."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Encode a binary value",
example: r#"0x[01 02 10] | encode base32"#,
result: Some(Value::test_string("AEBBA===")),
},
Example {
description: "Encode a string",
example: r#""hello there" | encode base32"#,
result: Some(Value::test_string("NBSWY3DPEB2GQZLSMU======")),
},
Example {
description: "Don't apply padding to the output",
example: r#""hi" | encode base32 --nopad"#,
result: Some(Value::test_string("NBUQ")),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag(engine_state, stack, "nopad")? {
data_encoding::BASE32_NOPAD
} else {
data_encoding::BASE32
};
super::encode(encoding, call.span(), input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag_const(working_set, "nopad")? {
data_encoding::BASE32_NOPAD
} else {
data_encoding::BASE32
};
super::encode(encoding, call.span(), input)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples_decode() {
crate::test_examples(DecodeBase32)
}
#[test]
fn test_examples_encode() {
crate::test_examples(EncodeBase32)
}
}

View File

@ -0,0 +1,181 @@
use nu_engine::command_prelude::*;
const EXTRA_USAGE: &str = r"This command uses an alternative Base32 alphabet, defined in RFC 4648, section 7.
Note this command will collect stream input.";
#[derive(Clone)]
pub struct DecodeBase32Hex;
impl Command for DecodeBase32Hex {
fn name(&self) -> &str {
"decode base32hex"
}
fn signature(&self) -> Signature {
Signature::build("decode base32hex")
.input_output_types(vec![(Type::String, Type::Binary)])
.allow_variants_without_examples(true)
.switch("nopad", "Reject input with padding.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Encode a base32hex value."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Decode arbitrary binary data",
example: r#""ATNAQ===" | decode base32hex"#,
result: Some(Value::test_binary(vec![0x57, 0x6E, 0xAD])),
},
Example {
description: "Decode an encoded string",
example: r#""D1KG====" | decode base32hex | decode"#,
result: None,
},
Example {
description: "Parse a string without padding",
example: r#""ATNAQ" | decode base32hex --nopad"#,
result: Some(Value::test_binary(vec![0x57, 0x6E, 0xAD])),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag(engine_state, stack, "nopad")? {
data_encoding::BASE32HEX_NOPAD
} else {
data_encoding::BASE32HEX
};
super::decode(encoding, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag_const(working_set, "nopad")? {
data_encoding::BASE32HEX_NOPAD
} else {
data_encoding::BASE32HEX
};
super::decode(encoding, call.head, input)
}
}
#[derive(Clone)]
pub struct EncodeBase32Hex;
impl Command for EncodeBase32Hex {
fn name(&self) -> &str {
"encode base32hex"
}
fn signature(&self) -> Signature {
Signature::build("encode base32hex")
.input_output_types(vec![
(Type::String, Type::String),
(Type::Binary, Type::String),
])
.switch("nopad", "Don't pad the output.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Encode a binary value or a string using base32hex."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Encode a binary value",
example: r#"0x[57 6E AD] | encode base32hex"#,
result: Some(Value::test_string("ATNAQ===")),
},
Example {
description: "Encode a string",
example: r#""hello there" | encode base32hex"#,
result: Some(Value::test_string("D1IMOR3F41Q6GPBICK======")),
},
Example {
description: "Don't apply padding to the output",
example: r#""hello there" | encode base32hex --nopad"#,
result: Some(Value::test_string("D1IMOR3F41Q6GPBICK")),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag(engine_state, stack, "nopad")? {
data_encoding::BASE32HEX_NOPAD
} else {
data_encoding::BASE32HEX
};
super::encode(encoding, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag_const(working_set, "nopad")? {
data_encoding::BASE32HEX_NOPAD
} else {
data_encoding::BASE32HEX
};
super::encode(encoding, call.head, input)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples_decode() {
crate::test_examples(DecodeBase32Hex)
}
#[test]
fn test_examples_encode() {
crate::test_examples(EncodeBase32Hex)
}
}

View File

@ -0,0 +1,193 @@
use data_encoding::Encoding;
use nu_engine::command_prelude::*;
const EXTRA_USAGE: &str = r"The default alphabet is taken from RFC 4648, section 4. A URL-safe version is available.
Note this command will collect stream input.";
fn get_encoding_from_flags(url: bool, nopad: bool) -> Encoding {
match (url, nopad) {
(false, false) => data_encoding::BASE64,
(false, true) => data_encoding::BASE64_NOPAD,
(true, false) => data_encoding::BASE64URL,
(true, true) => data_encoding::BASE64URL_NOPAD,
}
}
fn get_encoding(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
) -> Result<Encoding, ShellError> {
let url = call.has_flag(engine_state, stack, "url")?;
let nopad = call.has_flag(engine_state, stack, "nopad")?;
Ok(get_encoding_from_flags(url, nopad))
}
fn get_encoding_const(working_set: &StateWorkingSet, call: &Call) -> Result<Encoding, ShellError> {
let url = call.has_flag_const(working_set, "url")?;
let nopad = call.has_flag_const(working_set, "nopad")?;
Ok(get_encoding_from_flags(url, nopad))
}
#[derive(Clone)]
pub struct DecodeBase64;
impl Command for DecodeBase64 {
fn name(&self) -> &str {
"decode new-base64"
}
fn signature(&self) -> Signature {
Signature::build("decode new-base64")
.input_output_types(vec![(Type::String, Type::Binary)])
.allow_variants_without_examples(true)
.switch("url", "Decode the URL-safe Base64 version.", None)
.switch("nopad", "Reject padding.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Decode a Base64 value."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Decode a Base64 string",
example: r#""U29tZSBEYXRh" | decode new-base64 | decode"#,
result: None,
},
Example {
description: "Decode arbitrary data",
example: r#""/w==" | decode new-base64"#,
result: Some(Value::test_binary(vec![0xFF])),
},
Example {
description: "Decode a URL-safe Base64 string",
example: r#""_w==" | decode new-base64 --url"#,
result: Some(Value::test_binary(vec![0xFF])),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = get_encoding(engine_state, stack, call)?;
super::decode(encoding, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = get_encoding_const(working_set, call)?;
super::decode(encoding, call.head, input)
}
}
#[derive(Clone)]
pub struct EncodeBase64;
impl Command for EncodeBase64 {
fn name(&self) -> &str {
"encode new-base64"
}
fn signature(&self) -> Signature {
Signature::build("encode new-base64")
.input_output_types(vec![
(Type::String, Type::String),
(Type::Binary, Type::String),
])
.switch("url", "Use the URL-safe Base64 version.", None)
.switch("nopad", "Don't pad the output.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Encode a string or binary value using Base64."
}
fn extra_description(&self) -> &str {
EXTRA_USAGE
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Encode a string with Base64",
example: r#""Alphabet from A to Z" | encode new-base64"#,
result: Some(Value::test_string("QWxwaGFiZXQgZnJvbSBBIHRvIFo=")),
},
Example {
description: "Encode arbitrary data",
example: r#"0x[BE EE FF] | encode new-base64"#,
result: Some(Value::test_string("vu7/")),
},
Example {
description: "Use a URL-safe alphabet",
example: r#"0x[BE EE FF] | encode new-base64 --url"#,
result: Some(Value::test_string("vu7_")),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = get_encoding(engine_state, stack, call)?;
super::encode(encoding, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = get_encoding_const(working_set, call)?;
super::encode(encoding, call.head, input)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples_decode() {
crate::test_examples(DecodeBase64)
}
#[test]
fn test_examples_encode() {
crate::test_examples(EncodeBase64)
}
}

View File

@ -0,0 +1,151 @@
use nu_engine::command_prelude::*;
#[derive(Clone)]
pub struct DecodeHex;
impl Command for DecodeHex {
fn name(&self) -> &str {
"decode hex"
}
fn signature(&self) -> Signature {
Signature::build("decode hex")
.input_output_types(vec![(Type::String, Type::Binary)])
.category(Category::Formats)
}
fn description(&self) -> &str {
"Hex decode a value."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Decode arbitrary binary data",
example: r#""09FD" | decode hex"#,
result: Some(Value::test_binary(vec![0x09, 0xFD])),
},
Example {
description: "Lowercase Hex is also accepted",
example: r#""09fd" | decode hex"#,
result: Some(Value::test_binary(vec![0x09, 0xFD])),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
super::decode(data_encoding::HEXLOWER_PERMISSIVE, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
todo!()
}
}
#[derive(Clone)]
pub struct EncodeHex;
impl Command for EncodeHex {
fn name(&self) -> &str {
"encode hex"
}
fn signature(&self) -> Signature {
Signature::build("encode hex")
.input_output_types(vec![
(Type::String, Type::String),
(Type::Binary, Type::String),
])
.switch("lower", "Encode to lowercase hex.", None)
.category(Category::Formats)
}
fn description(&self) -> &str {
"Hex encode a binary value or a string."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Encode a binary value",
example: r#"0x[C3 06] | encode hex"#,
result: Some(Value::test_string("C306")),
},
Example {
description: "Encode a string",
example: r#""hello" | encode hex"#,
result: Some(Value::test_string("68656C6C6F")),
},
Example {
description: "Output a Lowercase version of the encoding",
example: r#"0x[AD EF] | encode hex --lower"#,
result: Some(Value::test_string("adef")),
},
]
}
fn is_const(&self) -> bool {
true
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag(engine_state, stack, "lower")? {
data_encoding::HEXLOWER
} else {
data_encoding::HEXUPPER
};
super::encode(encoding, call.head, input)
}
fn run_const(
&self,
working_set: &StateWorkingSet,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let encoding = if call.has_flag_const(working_set, "lower")? {
data_encoding::HEXLOWER
} else {
data_encoding::HEXUPPER
};
super::encode(encoding, call.head, input)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_examples_decode() {
crate::test_examples(DecodeHex)
}
#[test]
fn test_examples_encode() {
crate::test_examples(EncodeHex)
}
}

View File

@ -0,0 +1,99 @@
#![allow(unused)]
use data_encoding::Encoding;
use nu_engine::command_prelude::*;
mod base32;
mod base32hex;
mod base64;
mod hex;
pub use base32::{DecodeBase32, EncodeBase32};
pub use base32hex::{DecodeBase32Hex, EncodeBase32Hex};
pub use base64::{DecodeBase64, EncodeBase64};
pub use hex::{DecodeHex, EncodeHex};
pub fn decode(
encoding: Encoding,
call_span: Span,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let metadata = input.metadata();
let (input_str, input_span) = get_string(input, call_span)?;
let output = match encoding.decode(input_str.as_bytes()) {
Ok(output) => output,
Err(err) => {
return Err(ShellError::IncorrectValue {
msg: err.to_string(),
val_span: input_span,
call_span,
});
}
};
Ok(Value::binary(output, call_span).into_pipeline_data_with_metadata(metadata))
}
pub fn encode(
encoding: Encoding,
call_span: Span,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let metadata = input.metadata();
let (input_bytes, _) = get_binary(input, call_span)?;
let output = encoding.encode(&input_bytes);
Ok(Value::string(output, call_span).into_pipeline_data_with_metadata(metadata))
}
fn get_string(input: PipelineData, call_span: Span) -> Result<(String, Span), ShellError> {
match input {
PipelineData::Value(val, ..) => {
let span = val.span();
match val {
Value::String { val, .. } => Ok((val, span)),
_ => {
todo!("Invalid type")
}
}
}
PipelineData::ListStream(..) => {
todo!()
}
PipelineData::ByteStream(stream, ..) => {
let span = stream.span();
Ok((stream.into_string()?, span))
}
PipelineData::Empty => Err(ShellError::PipelineEmpty {
dst_span: call_span,
}),
}
}
fn get_binary(input: PipelineData, call_span: Span) -> Result<(Vec<u8>, Span), ShellError> {
match input {
PipelineData::Value(val, ..) => {
let span = val.span();
match val {
Value::Binary { val, .. } => Ok((val, span)),
Value::String { val, .. } => Ok((val.into_bytes(), span)),
_ => {
todo!("Invalid type")
}
}
}
PipelineData::ListStream(..) => {
todo!()
}
PipelineData::ByteStream(stream, ..) => {
let span = stream.span();
Ok((stream.into_bytes()?, span))
}
PipelineData::Empty => {
todo!("Can't have empty data");
}
}
}

View File

@ -1,10 +1,11 @@
use super::base64::{operate, ActionType, Base64CommandArguments, CHARACTER_SET_DESC};
use nu_engine::command_prelude::*;
use nu_protocol::{report_warning_new, ParseWarning};
#[derive(Clone)]
pub struct DecodeBase64;
pub struct DecodeBase64Old;
impl Command for DecodeBase64 {
impl Command for DecodeBase64Old {
fn name(&self) -> &str {
"decode base64"
}
@ -77,6 +78,16 @@ impl Command for DecodeBase64 {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
report_warning_new(
engine_state,
&ParseWarning::DeprecatedWarning {
old_command: "decode base64".into(),
new_suggestion: "the new `decode new-base64` version".into(),
span: call.head,
url: "`help decode new-base64`".into(),
},
);
let character_set: Option<Spanned<String>> =
call.get_flag(engine_state, stack, "character-set")?;
let binary = call.has_flag(engine_state, stack, "binary")?;
@ -114,6 +125,6 @@ mod tests {
#[test]
fn test_examples() {
crate::test_examples(DecodeBase64)
crate::test_examples(DecodeBase64Old)
}
}

View File

@ -1,10 +1,11 @@
use super::base64::{operate, ActionType, Base64CommandArguments, CHARACTER_SET_DESC};
use nu_engine::command_prelude::*;
use nu_protocol::{report_warning_new, ParseWarning};
#[derive(Clone)]
pub struct EncodeBase64;
pub struct EncodeBase64Old;
impl Command for EncodeBase64 {
impl Command for EncodeBase64Old {
fn name(&self) -> &str {
"encode base64"
}
@ -81,6 +82,16 @@ impl Command for EncodeBase64 {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
report_warning_new(
engine_state,
&ParseWarning::DeprecatedWarning {
old_command: "encode base64".into(),
new_suggestion: "the new `encode new-base64` version".into(),
span: call.head,
url: "`help encode new-base64`".into(),
},
);
let character_set: Option<Spanned<String>> =
call.get_flag(engine_state, stack, "character-set")?;
let binary = call.has_flag(engine_state, stack, "binary")?;
@ -118,6 +129,6 @@ mod tests {
#[test]
fn test_examples() {
crate::test_examples(EncodeBase64)
crate::test_examples(EncodeBase64Old)
}
}

View File

@ -6,6 +6,6 @@ mod encode_base64;
mod encoding;
pub use self::decode::Decode;
pub use self::decode_base64::DecodeBase64;
pub use self::decode_base64::DecodeBase64Old;
pub use self::encode::Encode;
pub use self::encode_base64::EncodeBase64;
pub use self::encode_base64::EncodeBase64Old;

View File

@ -1,3 +1,4 @@
mod base;
mod char_;
mod detect_columns;
mod encode_decode;
@ -7,6 +8,10 @@ mod parse;
mod split;
mod str_;
pub use base::{
DecodeBase32, DecodeBase32Hex, DecodeBase64, DecodeHex, EncodeBase32, EncodeBase32Hex,
EncodeBase64, EncodeHex,
};
pub use char_::Char;
pub use detect_columns::*;
pub use encode_decode::*;