From 87086262f356d1c63c320bddb0d6654132a50a28 Mon Sep 17 00:00:00 2001 From: mike <98623181+1Kinoti@users.noreply.github.com> Date: Sun, 2 Apr 2023 19:28:36 +0300 Subject: [PATCH] make `bytes at` use ranges (#8710) # Description follow up to #8660 # User-Facing Changes **BREAKING CHANGE** any scripts using the previous string or list syntax will BREAK --- crates/nu-command/src/bytes/at.rs | 294 ++++++------------ .../nu-command/src/strings/str_/substring.rs | 37 +-- crates/nu-command/src/util.rs | 41 ++- 3 files changed, 145 insertions(+), 227 deletions(-) diff --git a/crates/nu-command/src/bytes/at.rs b/crates/nu-command/src/bytes/at.rs index df81e33cd..89d9cb313 100644 --- a/crates/nu-command/src/bytes/at.rs +++ b/crates/nu-command/src/bytes/at.rs @@ -1,20 +1,19 @@ -use crate::input_handler::{operate, CmdArgument}; -use nu_engine::CallExt; -use nu_protocol::ast::Call; -use nu_protocol::ast::CellPath; -use nu_protocol::engine::{Command, EngineState, Stack}; -use nu_protocol::{ - Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, +use crate::{ + input_handler::{operate, CmdArgument}, + util, +}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::{Call, CellPath}, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, Range, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use std::cmp::Ordering; #[derive(Clone)] pub struct BytesAt; struct Arguments { - start: isize, - end: isize, - arg_span: Span, + indexes: Subbytes, cell_paths: Option<Vec<CellPath>>, } @@ -24,104 +23,15 @@ impl CmdArgument for Arguments { } } -/// ensure given `range` is valid, and returns [start, end, val_span] pair. -fn parse_range(range: Value, head: Span) -> Result<(isize, isize, Span), ShellError> { - let (start, end, span) = match range { - Value::List { mut vals, span } => { - if vals.len() != 2 { - return Err(ShellError::UnsupportedInput( - "More than two indices in range".to_string(), - "value originates from here".to_string(), - head, - span, - )); - } else { - let end = vals.pop().expect("Already check has size 2"); - let end = match end { - Value::Int { val, .. } => val.to_string(), - Value::String { val, .. } => val, - // Explicitly propagate errors instead of dropping them. - Value::Error { error } => return Err(*error), - other => { - return Err(ShellError::UnsupportedInput( - "Only string or list<int> ranges are supported".into(), - format!("input type: {:?}", other.get_type()), - head, - other.expect_span(), - )) - } - }; - let start = vals.pop().expect("Already check has size 1"); - let start = match start { - Value::Int { val, .. } => val.to_string(), - Value::String { val, .. } => val, - // Explicitly propagate errors instead of dropping them. - Value::Error { error } => return Err(*error), - other => { - return Err(ShellError::UnsupportedInput( - "Only string or list<int> ranges are supported".into(), - format!("input type: {:?}", other.get_type()), - head, - other.expect_span(), - )) - } - }; - (start, end, span) - } - } - Value::String { val, span } => { - let split_result = val.split_once(','); - match split_result { - Some((start, end)) => (start.to_string(), end.to_string(), span), - None => { - return Err(ShellError::UnsupportedInput( - "could not perform subbytes".to_string(), - "with this range".to_string(), - head, - span, - )) - } - } - } - // Explicitly propagate errors instead of dropping them. - Value::Error { error } => return Err(*error), - other => { - return Err(ShellError::UnsupportedInput( - "could not perform subbytes".to_string(), - "with this range".to_string(), - head, - other.expect_span(), - )) - } - }; - - let start: isize = if start.is_empty() || start == "_" { - 0 - } else { - start.trim().parse().map_err(|_| { - ShellError::UnsupportedInput( - "could not perform subbytes".to_string(), - "with this range".to_string(), - head, - span, - ) - })? - }; - let end: isize = if end.is_empty() || end == "_" { - isize::max_value() - } else { - end.trim().parse().map_err(|_| { - ShellError::UnsupportedInput( - "could not perform subbytes".to_string(), - "with this range".to_string(), - head, - span, - ) - })? - }; - Ok((start, end, span)) +impl From<(isize, isize)> for Subbytes { + fn from(input: (isize, isize)) -> Self { + Self(input.0, input.1) + } } +#[derive(Clone, Copy)] +struct Subbytes(isize, isize); + impl Command for BytesAt { fn name(&self) -> &str { "bytes at" @@ -131,7 +41,7 @@ impl Command for BytesAt { Signature::build("bytes at") .input_output_types(vec![(Type::Binary, Type::Binary)]) .vectorizes_over_list(true) - .required("range", SyntaxShape::Any, "the indexes to get bytes") + .required("range", SyntaxShape::Range, "the range to get bytes") .rest( "rest", SyntaxShape::CellPath, @@ -141,7 +51,7 @@ impl Command for BytesAt { } fn usage(&self) -> &str { - "Get bytes defined by a range. Note that the start is included but the end is excluded, and that the first byte is index 0." + "Get bytes defined by a range" } fn search_terms(&self) -> Vec<&str> { @@ -155,48 +65,45 @@ impl Command for BytesAt { call: &Call, input: PipelineData, ) -> Result<PipelineData, ShellError> { - let range: Value = call.req(engine_state, stack, 0)?; - let (start, end, arg_span) = parse_range(range, call.head)?; + let range: Range = call.req(engine_state, stack, 0)?; + let indexes = match util::process_range(&range) { + Ok(idxs) => idxs.into(), + Err(processing_error) => { + return Err(processing_error("could not perform subbytes", call.head)); + } + }; + let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let arg = Arguments { - start, - end, - arg_span, + let args = Arguments { + indexes, cell_paths, }; - operate(at, arg, input, call.head, engine_state.ctrlc.clone()) + + operate(action, args, input, call.head, engine_state.ctrlc.clone()) } fn examples(&self) -> Vec<Example> { vec![ Example { description: "Get a subbytes `0x[10 01]` from the bytes `0x[33 44 55 10 01 13]`", - example: " 0x[33 44 55 10 01 13] | bytes at [3 4]", + example: " 0x[33 44 55 10 01 13] | bytes at 3..<4", result: Some(Value::Binary { val: vec![0x10], span: Span::test_data(), }), }, Example { - description: "Alternatively, you can use the form", - example: " 0x[33 44 55 10 01 13] | bytes at '3,4'", + description: "Get a subbytes `0x[10 01 13]` from the bytes `0x[33 44 55 10 01 13]`", + example: " 0x[33 44 55 10 01 13] | bytes at 3..6", result: Some(Value::Binary { - val: vec![0x10], - span: Span::test_data(), - }), - }, - Example { - description: "Drop the last `n` characters from the string", - example: " 0x[33 44 55 10 01 13] | bytes at ',-3'", - result: Some(Value::Binary { - val: vec![0x33, 0x44, 0x55], + val: vec![0x10, 0x01, 0x13], span: Span::test_data(), }), }, Example { description: "Get the remaining characters from a starting index", - example: " 0x[33 44 55 10 01 13] | bytes at '3,'", + example: " 0x[33 44 55 10 01 13] | bytes at 3..", result: Some(Value::Binary { val: vec![0x10, 0x01, 0x13], span: Span::test_data(), @@ -204,7 +111,7 @@ impl Command for BytesAt { }, Example { description: "Get the characters from the beginning until ending index", - example: " 0x[33 44 55 10 01 13] | bytes at ',4'", + example: " 0x[33 44 55 10 01 13] | bytes at ..<4", result: Some(Value::Binary { val: vec![0x33, 0x44, 0x55, 0x10], span: Span::test_data(), @@ -213,7 +120,7 @@ impl Command for BytesAt { Example { description: "Or the characters from the beginning until ending index inside a table", - example: r#" [[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes at "1," ColB ColC"#, + example: r#" [[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes at 1.. ColB ColC"#, result: Some(Value::List { vals: vec![Value::Record { cols: vec!["ColA".to_string(), "ColB".to_string(), "ColC".to_string()], @@ -240,73 +147,66 @@ impl Command for BytesAt { } } -fn at(val: &Value, args: &Arguments, span: Span) -> Value { - match val { - Value::Binary { - val, - span: val_span, - } => at_impl(val, args, *val_span), - // Propagate errors by explicitly matching them before the final case. - Value::Error { .. } => val.clone(), +fn action(input: &Value, args: &Arguments, head: Span) -> Value { + let range = &args.indexes; + match input { + Value::Binary { val, .. } => { + use std::cmp::{self, Ordering}; + let len = val.len() as isize; + + let start = if range.0 < 0 { range.0 + len } else { range.0 }; + + let end = if range.1 < 0 { + cmp::max(range.1 + len, 0) + } else { + range.1 + }; + + if start < len && end >= 0 { + match start.cmp(&end) { + Ordering::Equal => Value::Binary { + val: vec![], + span: head, + }, + Ordering::Greater => Value::Error { + error: Box::new(ShellError::TypeMismatch { + err_message: "End must be greater than or equal to Start".to_string(), + span: head, + }), + }, + Ordering::Less => Value::Binary { + val: { + if end == isize::max_value() { + val.iter().skip(start as usize).copied().collect() + } else { + val.iter() + .skip(start as usize) + .take((end - start) as usize) + .copied() + .collect() + } + }, + span: head, + }, + } + } else { + Value::Binary { + val: vec![], + span: head, + } + } + } + + Value::Error { .. } => input.clone(), + other => Value::Error { - error: Box::new(ShellError::OnlySupportsThisInputType { - exp_input_type: "binary".into(), - wrong_type: other.get_type().to_string(), - dst_span: span, - src_span: other.expect_span(), - }), + error: Box::new(ShellError::UnsupportedInput( + "Only binary values are supported".into(), + format!("input type: {:?}", other.get_type()), + head, + // This line requires the Value::Error match above. + other.expect_span(), + )), }, } } - -fn at_impl(input: &[u8], arg: &Arguments, span: Span) -> Value { - let len: isize = input.len() as isize; - - let start: isize = if arg.start < 0 { - arg.start + len - } else { - arg.start - }; - let end: isize = if arg.end < 0 { - std::cmp::max(len + arg.end, 0) - } else { - arg.end - }; - - if start < len && end >= 0 { - match start.cmp(&end) { - Ordering::Equal => Value::Binary { val: vec![], span }, - Ordering::Greater => Value::Error { - error: Box::new(ShellError::TypeMismatch { - err_message: "End must be greater than or equal to Start".to_string(), - span: arg.arg_span, - }), - }, - Ordering::Less => Value::Binary { - val: { - let input_iter = input.iter().copied().skip(start as usize); - if end == isize::max_value() { - input_iter.collect() - } else { - input_iter.take((end - start) as usize).collect() - } - }, - span, - }, - } - } else { - Value::Binary { val: vec![], span } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_examples() { - use crate::test_examples; - - test_examples(BytesAt {}) - } -} diff --git a/crates/nu-command/src/strings/str_/substring.rs b/crates/nu-command/src/strings/str_/substring.rs index 93e740026..020358a40 100644 --- a/crates/nu-command/src/strings/str_/substring.rs +++ b/crates/nu-command/src/strings/str_/substring.rs @@ -1,5 +1,5 @@ -use crate::grapheme_flags; use crate::input_handler::{operate, CmdArgument}; +use crate::{grapheme_flags, util}; use nu_engine::CallExt; use nu_protocol::ast::Call; use nu_protocol::ast::CellPath; @@ -81,7 +81,14 @@ impl Command for SubCommand { input: PipelineData, ) -> Result<PipelineData, ShellError> { let range: Range = call.req(engine_state, stack, 0)?; - let indexes: Substring = process_arguments(&range, call.head)?.into(); + + let indexes = match util::process_range(&range) { + Ok(idxs) => idxs.into(), + Err(processing_error) => { + return Err(processing_error("could not perform substring", call.head)) + } + }; + let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let args = Arguments { @@ -186,32 +193,6 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { } } -fn process_arguments(range: &Range, head: Span) -> Result<(isize, isize), ShellError> { - let start = match &range.from { - Value::Int { val, .. } => *val as isize, - Value::Nothing { .. } => 0, - _ => { - return Err(ShellError::TypeMismatch { - err_message: "could not perform substring".to_string(), - span: head, - }) - } - }; - - let end = match &range.to { - Value::Int { val, .. } => *val as isize, - Value::Nothing { .. } => isize::max_value(), - _ => { - return Err(ShellError::TypeMismatch { - err_message: "could not perform substring".to_string(), - span: head, - }) - } - }; - - Ok((start, end)) -} - #[cfg(test)] mod tests { use super::{action, Arguments, Span, SubCommand, Substring, Value}; diff --git a/crates/nu-command/src/util.rs b/crates/nu-command/src/util.rs index 41fe5ebdb..dff05b408 100644 --- a/crates/nu-command/src/util.rs +++ b/crates/nu-command/src/util.rs @@ -1,5 +1,8 @@ -use nu_protocol::engine::{EngineState, Stack, StateWorkingSet}; -use nu_protocol::CliError; +use nu_protocol::{ + ast::RangeInclusion, + engine::{EngineState, Stack, StateWorkingSet}, + CliError, Range, ShellError, Span, Value, +}; use std::path::PathBuf; pub fn report_error( @@ -38,3 +41,37 @@ pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf crate::util::get_init_cwd() }) } + +type MakeRangeError = fn(&str, Span) -> ShellError; + +pub fn process_range(range: &Range) -> Result<(isize, isize), MakeRangeError> { + let start = match &range.from { + Value::Int { val, .. } => *val as isize, + Value::Nothing { .. } => 0, + _ => { + return Err(|msg, span| ShellError::TypeMismatch { + err_message: msg.to_string(), + span, + }) + } + }; + + let end = match &range.to { + Value::Int { val, .. } => { + if matches!(range.inclusion, RangeInclusion::Inclusive) { + *val as isize + } else { + *val as isize - 1 + } + } + Value::Nothing { .. } => isize::max_value(), + _ => { + return Err(|msg, span| ShellError::TypeMismatch { + err_message: msg.to_string(), + span, + }) + } + }; + + Ok((start, end)) +}