From 81df42d63b6097c6c73fc49202a710846531659e Mon Sep 17 00:00:00 2001 From: WindSoilder Date: Sat, 9 Jul 2022 10:42:31 +0800 Subject: [PATCH] add more bytes cmd (#5989) --- crates/nu-command/src/bytes/at.rs | 281 +++++++++++++++++++++++ crates/nu-command/src/bytes/index_of.rs | 177 ++++++++++++++ crates/nu-command/src/bytes/mod.rs | 4 + crates/nu-command/src/default_context.rs | 4 +- 4 files changed, 465 insertions(+), 1 deletion(-) create mode 100644 crates/nu-command/src/bytes/at.rs create mode 100644 crates/nu-command/src/bytes/index_of.rs diff --git a/crates/nu-command/src/bytes/at.rs b/crates/nu-command/src/bytes/at.rs new file mode 100644 index 000000000..ee7e4036a --- /dev/null +++ b/crates/nu-command/src/bytes/at.rs @@ -0,0 +1,281 @@ +use super::{operate, BytesArgument}; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::ast::CellPath; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use std::cmp::Ordering; + +#[derive(Clone)] +pub struct BytesAt; + +struct Arguments { + start: isize, + end: isize, + arg_span: Span, + column_paths: Option>, +} + +impl BytesArgument for Arguments { + fn take_column_paths(&mut self) -> Option> { + self.column_paths.take() + } +} + +/// ensure given `range` is valid, and returns [start, end, val_span] pair. +fn parse_range(range: Value, head: Span) -> Result<(isize, isize, Span), ShellError> { + let (start, end, span) = match range { + Value::List { mut vals, span } => { + if vals.len() != 2 { + return Err(ShellError::UnsupportedInput( + "More than two indices given".to_string(), + span, + )); + } else { + let end = vals.pop().expect("Already check has size 2"); + let end = match end { + Value::Int { val, .. } => val.to_string(), + Value::String { val, .. } => val, + other => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes. Expecting a string or int".to_string(), + other.span().unwrap_or(head), + )) + } + }; + let start = vals.pop().expect("Already check has size 1"); + let start = match start { + Value::Int { val, .. } => val.to_string(), + Value::String { val, .. } => val, + other => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes. Expecting a string or int".to_string(), + other.span().unwrap_or(head), + )) + } + }; + (start, end, span) + } + } + Value::String { val, span } => { + let splitted_result = val.split_once(','); + match splitted_result { + Some((start, end)) => (start.to_string(), end.to_string(), span), + None => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes".to_string(), + span, + )) + } + } + } + other => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes".to_string(), + other.span().unwrap_or(head), + )) + } + }; + + let start: isize = if start.is_empty() || start == "_" { + 0 + } else { + match start.trim().parse() { + Ok(s) => s, + Err(_) => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes".to_string(), + span, + )) + } + } + }; + let end: isize = if end.is_empty() || end == "_" { + isize::max_value() + } else { + match end.trim().parse() { + Ok(s) => s, + Err(_) => { + return Err(ShellError::UnsupportedInput( + "could not perform subbytes".to_string(), + span, + )) + } + } + }; + Ok((start, end, span)) +} + +impl Command for BytesAt { + fn name(&self) -> &str { + "bytes at" + } + + fn signature(&self) -> Signature { + Signature::build("bytes at") + .required("range", SyntaxShape::Any, "the indexes to get bytes") + .rest( + "rest", + SyntaxShape::CellPath, + "optionally get bytes by column paths", + ) + .category(Category::Bytes) + } + + fn usage(&self) -> &str { + "Get bytes defined by a range. Note that the start is included but the end is excluded, and that the first byte is index 0." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["slice"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let range: Value = call.req(engine_state, stack, 0)?; + let (start, end, arg_span) = parse_range(range, call.head)?; + let column_paths: Vec = call.rest(engine_state, stack, 1)?; + let column_paths = if column_paths.is_empty() { + None + } else { + Some(column_paths) + }; + let arg = Arguments { + start, + end, + arg_span, + column_paths, + }; + operate(at, arg, input, call.head, engine_state.ctrlc.clone()) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Get a subbytes `0x[10 01]` from the bytes `0x[33 44 55 10 01 13]`", + example: " 0x[33 44 55 10 01 13] | bytes at [3 4]", + result: Some(Value::Binary { + val: vec![0x10], + span: Span::test_data(), + }), + }, + Example { + description: "Alternatively, you can use the form", + example: " 0x[33 44 55 10 01 13] | bytes at '3,4'", + result: Some(Value::Binary { + val: vec![0x10], + span: Span::test_data(), + }), + }, + Example { + description: "Drop the last `n` characters from the string", + example: " 0x[33 44 55 10 01 13] | bytes at ',-3'", + result: Some(Value::Binary { + val: vec![0x33, 0x44, 0x55], + span: Span::test_data(), + }), + }, + Example { + description: "Get the remaining characters from a starting index", + example: " 0x[33 44 55 10 01 13] | bytes at '3,'", + result: Some(Value::Binary { + val: vec![0x10, 0x01, 0x13], + span: Span::test_data(), + }), + }, + Example { + description: "Get the characters from the beginning until ending index", + example: " 0x[33 44 55 10 01 13] | bytes at ',4'", + result: Some(Value::Binary { + val: vec![0x33, 0x44, 0x55, 0x10], + span: Span::test_data(), + }), + }, + Example { + description: + "Or the characters from the beginning until ending index inside a table", + example: r#" [[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes at "1," ColB ColC"#, + result: Some(Value::List { + vals: vec![Value::Record { + cols: vec!["ColA".to_string(), "ColB".to_string(), "ColC".to_string()], + vals: vec![ + Value::Binary { + val: vec![0x11, 0x12, 0x13], + span: Span::test_data(), + }, + Value::Binary { + val: vec![0x15, 0x16], + span: Span::test_data(), + }, + Value::Binary { + val: vec![0x18, 0x19], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }], + span: Span::test_data(), + }), + }, + ] + } +} + +fn at(input: &[u8], arg: &Arguments, span: Span) -> Value { + let len: isize = input.len() as isize; + + let start: isize = if arg.start < 0 { + arg.start + len + } else { + arg.start + }; + let end: isize = if arg.end < 0 { + std::cmp::max(len + arg.end, 0) + } else { + arg.end + }; + + if start < len && end >= 0 { + match start.cmp(&end) { + Ordering::Equal => Value::Binary { val: vec![], span }, + Ordering::Greater => Value::Error { + error: ShellError::UnsupportedInput( + "End must be greater than or equal to Start".to_string(), + arg.arg_span, + ), + }, + Ordering::Less => Value::Binary { + val: { + let input_iter = input.iter().copied().skip(start as usize); + if end == isize::max_value() { + input_iter.collect() + } else { + input_iter.take((end - start) as usize).collect() + } + }, + span, + }, + } + } else { + Value::Binary { val: vec![], span } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(BytesAt {}) + } +} diff --git a/crates/nu-command/src/bytes/index_of.rs b/crates/nu-command/src/bytes/index_of.rs new file mode 100644 index 000000000..dc6fa677e --- /dev/null +++ b/crates/nu-command/src/bytes/index_of.rs @@ -0,0 +1,177 @@ +use super::{operate, BytesArgument}; +use nu_engine::CallExt; +use nu_protocol::ast::{Call, CellPath}; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; + +struct Arguments { + pattern: Vec, + end: bool, + all: bool, + column_paths: Option>, +} + +impl BytesArgument for Arguments { + fn take_column_paths(&mut self) -> Option> { + self.column_paths.take() + } +} + +#[derive(Clone)] +pub struct BytesIndexOf; + +impl Command for BytesIndexOf { + fn name(&self) -> &str { + "bytes index-of" + } + + fn signature(&self) -> Signature { + Signature::build("bytes index-of") + .required( + "pattern", + SyntaxShape::Binary, + "the pattern to find index of", + ) + .rest( + "rest", + SyntaxShape::CellPath, + "optionally returns index of pattern in string by column paths", + ) + .switch("all", "returns all matched index", Some('a')) + .switch("end", "search from the end of the binary", Some('e')) + .category(Category::Bytes) + } + + fn usage(&self) -> &str { + "Returns start index of first occurrence of pattern in bytes, or -1 if no match" + } + + fn search_terms(&self) -> Vec<&str> { + vec!["pattern", "match", "find", "search", "index"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let pattern: Vec = call.req(engine_state, stack, 0)?; + let column_paths: Vec = call.rest(engine_state, stack, 1)?; + let column_paths = if column_paths.is_empty() { + None + } else { + Some(column_paths) + }; + let arg = Arguments { + pattern, + end: call.has_flag("end"), + all: call.has_flag("all"), + column_paths, + }; + operate(index_of, arg, input, call.head, engine_state.ctrlc.clone()) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns index of pattern in bytes", + example: " 0x[33 44 55 10 01 13 44 55] | bytes index-of 0x[44 55]", + result: Some(Value::test_int(1)), + }, + Example { + description: "Returns index of pattern, search from end", + example: " 0x[33 44 55 10 01 13 44 55] | bytes index-of -e 0x[44 55]", + result: Some(Value::test_int(6)), + }, + Example { + description: "Returns all matched index", + example: " 0x[33 44 55 10 01 33 44 33 44] | bytes index-of -a 0x[33 44]", + result: Some(Value::List { + vals: vec![Value::test_int(0), Value::test_int(5), Value::test_int(7)], + span: Span::test_data(), + }), + }, + Example { + description: "Returns index of pattern for specific column", + example: r#" [[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes index-of 0x[11] ColA ColC"#, + result: Some(Value::List { + vals: vec![Value::Record { + cols: vec!["ColA".to_string(), "ColB".to_string(), "ColC".to_string()], + vals: vec![ + Value::test_int(0), + Value::Binary { + val: vec![0x14, 0x15, 0x16], + span: Span::test_data(), + }, + Value::test_int(-1), + ], + span: Span::test_data(), + }], + span: Span::test_data(), + }), + }, + ] + } +} + +fn index_of(input: &[u8], arg: &Arguments, span: Span) -> Value { + // currently, `--all` flag doesn't support finding from end. + if arg.all { + let mut result = vec![]; + // doing find stuff. + let (mut left, mut right) = (0, arg.pattern.len()); + let input_len = input.len(); + let pattern_len = arg.pattern.len(); + while right <= input_len { + if input[left..right] == arg.pattern { + result.push(Value::Int { + val: left as i64, + span, + }); + left += pattern_len; + right += pattern_len; + } else { + left += 1; + right += 1; + } + } + Value::List { vals: result, span } + } else { + let mut iter = input.windows(arg.pattern.len()); + + if arg.end { + Value::Int { + val: iter + .rev() + .position(|sub_bytes| sub_bytes == arg.pattern) + .map(|x| (input.len() - arg.pattern.len() - x) as i64) + .unwrap_or(-1), + span, + } + } else { + Value::Int { + val: iter + .position(|sub_bytes| sub_bytes == arg.pattern) + .map(|x| x as i64) + .unwrap_or(-1), + span, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(BytesIndexOf {}) + } +} diff --git a/crates/nu-command/src/bytes/mod.rs b/crates/nu-command/src/bytes/mod.rs index 6f2674a0d..3aef3a4b0 100644 --- a/crates/nu-command/src/bytes/mod.rs +++ b/crates/nu-command/src/bytes/mod.rs @@ -1,6 +1,8 @@ mod add; +mod at; mod bytes_; mod ends_with; +mod index_of; mod length; mod replace; mod reverse; @@ -12,8 +14,10 @@ use std::sync::atomic::AtomicBool; use std::sync::Arc; pub use add::BytesAdd; +pub use at::BytesAt; pub use bytes_::Bytes; pub use ends_with::BytesEndsWith; +pub use index_of::BytesIndexOf; pub use length::BytesLen; pub use replace::BytesReplace; pub use reverse::BytesReverse; diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index f14966932..ea5b326ab 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -215,7 +215,9 @@ pub fn create_default_context(cwd: impl AsRef) -> EngineState { BytesEndsWith, BytesReverse, BytesReplace, - BytesAdd + BytesAdd, + BytesAt, + BytesIndexOf, } // FileSystem