nushell/crates/nu-command/src/conversions/into/int.rs
Stefan Holderbach a52386e837
Box ShellError in Value::Error (#8375)
# Description

Our `ShellError` at the moment has a `std::mem::size_of<ShellError>` of
136 bytes (on AMD64). As a result `Value` directly storing the struct
also required 136 bytes (thanks to alignment requirements).

This change stores the `Value::Error` `ShellError` on the heap.

Pro:
- Value now needs just 80 bytes
- Should be 1 cacheline less (still at least 2 cachelines)

Con:
- More small heap allocations when dealing with `Value::Error`
  - More heap fragmentation
  - Potential for additional required memcopies

# Further code changes

Includes a small refactor of `try` due to a type mismatch in its large
match.

# User-Facing Changes

None for regular users.

Plugin authors may have to update their matches on `Value` if they use
`nu-protocol`

Needs benchmarking to see if there is a benefit in real world workloads.
**Update** small improvements in runtime for workloads with high volume
of values. Significant reduction in maximum resident set size, when many
values are held in memory.

# Tests + Formatting
2023-03-12 09:57:27 +01:00

541 lines
18 KiB
Rust

use chrono::{FixedOffset, TimeZone};
use crate::input_handler::{operate, CmdArgument};
use nu_engine::CallExt;
use nu_protocol::{
ast::{Call, CellPath},
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
struct Arguments {
radix: u32,
cell_paths: Option<Vec<CellPath>>,
little_endian: bool,
}
impl CmdArgument for Arguments {
fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
self.cell_paths.take()
}
}
#[derive(Clone)]
pub struct SubCommand;
impl Command for SubCommand {
fn name(&self) -> &str {
"into int"
}
fn signature(&self) -> Signature {
Signature::build("into int")
.input_output_types(vec![
(Type::String, Type::Int),
(Type::Number, Type::Int),
(Type::Bool, Type::Int),
// Unix timestamp in nanoseconds
(Type::Date, Type::Int),
// TODO: Users should do this by dividing a Filesize by a Filesize explicitly
(Type::Filesize, Type::Int),
])
.vectorizes_over_list(true)
.named("radix", SyntaxShape::Number, "radix of integer", Some('r'))
.switch("little-endian", "use little-endian byte decoding", None)
.rest(
"rest",
SyntaxShape::CellPath,
"for a data structure input, convert data at the given cell paths",
)
.category(Category::Conversions)
}
fn usage(&self) -> &str {
"Convert value to integer."
}
fn search_terms(&self) -> Vec<&str> {
vec!["convert", "number", "natural"]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cell_paths = call.rest(engine_state, stack, 0)?;
let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
let radix = call.get_flag::<Value>(engine_state, stack, "radix")?;
let radix: u32 = match radix {
Some(Value::Int { val, span }) => {
if !(2..=36).contains(&val) {
return Err(ShellError::TypeMismatch {
err_message: "Radix must lie in the range [2, 36]".to_string(),
span,
});
}
val as u32
}
Some(_) => 10,
None => 10,
};
let args = Arguments {
radix,
little_endian: call.has_flag("little-endian"),
cell_paths,
};
operate(action, args, input, call.head, engine_state.ctrlc.clone())
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert string to integer in table",
example: "[[num]; ['-5'] [4] [1.5]] | into int num",
result: None,
},
Example {
description: "Convert string to integer",
example: "'2' | into int",
result: Some(Value::test_int(2)),
},
Example {
description: "Convert decimal to integer",
example: "5.9 | into int",
result: Some(Value::test_int(5)),
},
Example {
description: "Convert decimal string to integer",
example: "'5.9' | into int",
result: Some(Value::test_int(5)),
},
Example {
description: "Convert file size to integer",
example: "4KB | into int",
result: Some(Value::test_int(4000)),
},
Example {
description: "Convert bool to integer",
example: "[false, true] | into int",
result: Some(Value::List {
vals: vec![Value::test_int(0), Value::test_int(1)],
span: Span::test_data(),
}),
},
Example {
description: "Convert date to integer (Unix nanosecond timestamp)",
example: "1983-04-13T12:09:14.123456789-05:00 | into int",
result: Some(Value::test_int(419101754123456789)),
},
Example {
description: "Convert to integer from binary",
example: "'1101' | into int -r 2",
result: Some(Value::test_int(13)),
},
Example {
description: "Convert to integer from hex",
example: "'FF' | into int -r 16",
result: Some(Value::test_int(255)),
},
Example {
description: "Convert octal string to integer",
example: "'0o10132' | into int",
result: Some(Value::test_int(4186)),
},
Example {
description: "Convert 0 padded string to integer",
example: "'0010132' | into int",
result: Some(Value::test_int(10132)),
},
Example {
description: "Convert 0 padded string to integer with radix",
example: "'0010132' | into int -r 8",
result: Some(Value::test_int(4186)),
},
]
}
}
fn action(input: &Value, args: &Arguments, span: Span) -> Value {
let radix = args.radix;
let little_endian = args.little_endian;
match input {
Value::Int { val: _, .. } => {
if radix == 10 {
input.clone()
} else {
convert_int(input, span, radix)
}
}
Value::Filesize { val, .. } => Value::Int { val: *val, span },
Value::Float { val, .. } => Value::Int {
val: {
if radix == 10 {
*val as i64
} else {
match convert_int(
&Value::Int {
val: *val as i64,
span,
},
span,
radix,
)
.as_i64()
{
Ok(v) => v,
_ => {
return Value::Error {
error: Box::new(ShellError::CantConvert {
to_type: "float".to_string(),
from_type: "integer".to_string(),
span,
help: None,
}),
}
}
}
}
},
span,
},
Value::String { val, .. } => {
if radix == 10 {
match int_from_string(val, span) {
Ok(val) => Value::Int { val, span },
Err(error) => Value::Error {
error: Box::new(error),
},
}
} else {
convert_int(input, span, radix)
}
}
Value::Bool { val, .. } => {
if *val {
Value::Int { val: 1, span }
} else {
Value::Int { val: 0, span }
}
}
Value::Date { val, .. } => {
if val
< &FixedOffset::east_opt(0)
.expect("constant")
.with_ymd_and_hms(1677, 9, 21, 0, 12, 44)
.unwrap()
|| val
> &FixedOffset::east_opt(0)
.expect("constant")
.with_ymd_and_hms(2262, 4, 11, 23, 47, 16)
.unwrap()
{
Value::Error {
error: Box::new(ShellError::IncorrectValue {
msg: "DateTime out of range for timestamp: 1677-09-21T00:12:43Z to 2262-04-11T23:47:16".to_string(),
span
}),
}
} else {
Value::Int {
val: val.timestamp_nanos(),
span,
}
}
}
Value::Duration { val, .. } => Value::Int { val: *val, span },
Value::Binary { val, span } => {
use byteorder::{BigEndian, ByteOrder, LittleEndian};
let mut val = val.to_vec();
if little_endian {
while val.len() < 8 {
val.push(0);
}
val.resize(8, 0);
Value::int(LittleEndian::read_i64(&val), *span)
} else {
while val.len() < 8 {
val.insert(0, 0);
}
val.resize(8, 0);
Value::int(BigEndian::read_i64(&val), *span)
}
}
// Propagate errors by explicitly matching them before the final case.
Value::Error { .. } => input.clone(),
other => Value::Error {
error: Box::new(ShellError::OnlySupportsThisInputType {
exp_input_type: "integer, float, filesize, date, string, binary, duration or bool"
.into(),
wrong_type: other.get_type().to_string(),
dst_span: span,
src_span: other.expect_span(),
}),
},
}
}
fn convert_int(input: &Value, head: Span, radix: u32) -> Value {
let i = match input {
Value::Int { val, .. } => val.to_string(),
Value::String { val, .. } => {
let val = val.trim();
if val.starts_with("0x") // hex
|| val.starts_with("0b") // binary
|| val.starts_with("0o")
// octal
{
match int_from_string(val, head) {
Ok(x) => return Value::int(x, head),
Err(e) => return Value::Error { error: Box::new(e) },
}
} else if val.starts_with("00") {
// It's a padded string
match i64::from_str_radix(val, radix) {
Ok(n) => return Value::int(n, head),
Err(e) => {
return Value::Error {
error: Box::new(ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "int".to_string(),
span: head,
help: Some(e.to_string()),
}),
}
}
}
}
val.to_string()
}
// Propagate errors by explicitly matching them before the final case.
Value::Error { .. } => return input.clone(),
other => {
return Value::Error {
error: Box::new(ShellError::OnlySupportsThisInputType {
exp_input_type: "string and integer".into(),
wrong_type: other.get_type().to_string(),
dst_span: head,
src_span: other.expect_span(),
}),
};
}
};
match i64::from_str_radix(i.trim(), radix) {
Ok(n) => Value::int(n, head),
Err(_reason) => Value::Error {
error: Box::new(ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "int".to_string(),
span: head,
help: None,
}),
},
}
}
fn int_from_string(a_string: &str, span: Span) -> Result<i64, ShellError> {
let trimmed = a_string.trim();
match trimmed {
b if b.starts_with("0b") => {
let num = match i64::from_str_radix(b.trim_start_matches("0b"), 2) {
Ok(n) => n,
Err(_reason) => {
return Err(ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(r#"digits following "0b" can only be 0 or 1"#.to_string()),
})
}
};
Ok(num)
}
h if h.starts_with("0x") => {
let num =
match i64::from_str_radix(h.trim_start_matches("0x"), 16) {
Ok(n) => n,
Err(_reason) => return Err(ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(
r#"hexadecimal digits following "0x" should be in 0-9, a-f, or A-F"#
.to_string(),
),
}),
};
Ok(num)
}
o if o.starts_with("0o") => {
let num = match i64::from_str_radix(o.trim_start_matches("0o"), 8) {
Ok(n) => n,
Err(_reason) => {
return Err(ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(r#"octal digits following "0o" should be in 0-7"#.to_string()),
})
}
};
Ok(num)
}
_ => match trimmed.parse::<i64>() {
Ok(n) => Ok(n),
Err(_) => match a_string.parse::<f64>() {
Ok(f) => Ok(f as i64),
_ => Err(ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#"string "{trimmed}" does not represent a valid integer"#
)),
}),
},
},
}
}
#[cfg(test)]
mod test {
use chrono::{DateTime, FixedOffset};
use rstest::rstest;
use super::Value;
use super::*;
use nu_protocol::Type::Error;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(SubCommand {})
}
#[test]
fn turns_to_integer() {
let word = Value::test_string("10");
let expected = Value::test_int(10);
let actual = action(
&word,
&Arguments {
radix: 10,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
assert_eq!(actual, expected);
}
#[test]
fn turns_binary_to_integer() {
let s = Value::test_string("0b101");
let actual = action(
&s,
&Arguments {
radix: 10,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
assert_eq!(actual, Value::test_int(5));
}
#[test]
fn turns_hex_to_integer() {
let s = Value::test_string("0xFF");
let actual = action(
&s,
&Arguments {
radix: 16,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
assert_eq!(actual, Value::test_int(255));
}
#[test]
fn communicates_parsing_error_given_an_invalid_integerlike_string() {
let integer_str = Value::test_string("36anra");
let actual = action(
&integer_str,
&Arguments {
radix: 10,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
assert_eq!(actual.get_type(), Error)
}
#[rstest]
#[case("2262-04-11T23:47:16+00:00", 0x7fff_ffff_ffff_ffff)]
#[case("1970-01-01T00:00:00+00:00", 0)]
#[case("1677-09-21T00:12:44+00:00", -0x7fff_ffff_ffff_ffff)]
fn datetime_to_int_values_that_work(
#[case] dt_in: DateTime<FixedOffset>,
#[case] int_expected: i64,
) {
let s = Value::test_date(dt_in);
let actual = action(
&s,
&Arguments {
radix: 10,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
// ignore fractional seconds -- I don't want to hard code test values that might vary due to leap nanoseconds.
let exp_truncated = (int_expected / 1_000_000_000) * 1_000_000_000;
assert_eq!(actual, Value::test_int(exp_truncated));
}
#[rstest]
#[case("2262-04-11T23:47:17+00:00", "DateTime out of range for timestamp")]
#[case("1677-09-21T00:12:43+00:00", "DateTime out of range for timestamp")]
fn datetime_to_int_values_that_fail(
#[case] dt_in: DateTime<FixedOffset>,
#[case] err_expected: &str,
) {
let s = Value::test_date(dt_in);
let actual = action(
&s,
&Arguments {
radix: 10,
cell_paths: None,
little_endian: false,
},
Span::test_data(),
);
if let Value::Error { error } = actual {
if let ShellError::IncorrectValue { msg: e, .. } = *error {
assert!(
e.contains(err_expected),
"{e:?} doesn't contain {err_expected}"
);
} else {
panic!("Unexpected error variant {error:?}")
}
} else {
panic!("Unexpected actual value {actual:?}")
}
}
}