nushell/crates/nu-command/src/strings/char_.rs
Ian Manske 8da27a1a09
Create Record type (#10103)
# Description
This PR creates a new `Record` type to reduce duplicate code and
possibly bugs as well. (This is an edited version of #9648.)
- `Record` implements `FromIterator` and `IntoIterator` and so can be
iterated over or collected into. For example, this helps with
conversions to and from (hash)maps. (Also, no more
`cols.iter().zip(vals)`!)
- `Record` has a `push(col, val)` function to help insure that the
number of columns is equal to the number of values. I caught a few
potential bugs thanks to this (e.g. in the `ls` command).
- Finally, this PR also adds a `record!` macro that helps simplify
record creation. It is used like so:
   ```rust
   record! {
       "key1" => some_value,
       "key2" => Value::string("text", span),
       "key3" => Value::int(optional_int.unwrap_or(0), span),
       "key4" => Value::bool(config.setting, span),
   }
   ```
Since macros hinder formatting, etc., the right hand side values should
be relatively short and sweet like the examples above.

Where possible, prefer `record!` or `.collect()` on an iterator instead
of multiple `Record::push`s, since the first two automatically set the
record capacity and do less work overall.

# User-Facing Changes
Besides the changes in `nu-protocol` the only other breaking changes are
to `nu-table::{ExpandedTable::build_map, JustTable::kv_table}`.
2023-08-25 07:50:29 +12:00

355 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use indexmap::indexmap;
use indexmap::map::IndexMap;
use nu_engine::CallExt;
use nu_protocol::engine::{EngineState, Stack};
use nu_protocol::record;
use nu_protocol::{
ast::Call, engine::Command, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData,
PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use once_cell::sync::Lazy;
// Character used to separate directories in a Path Environment variable on windows is ";"
#[cfg(target_family = "windows")]
const ENV_PATH_SEPARATOR_CHAR: char = ';';
// Character used to separate directories in a Path Environment variable on linux/mac/unix is ":"
#[cfg(not(target_family = "windows"))]
const ENV_PATH_SEPARATOR_CHAR: char = ':';
#[derive(Clone)]
pub struct Char;
static CHAR_MAP: Lazy<IndexMap<&'static str, String>> = Lazy::new(|| {
indexmap! {
// These are some regular characters that either can't be used or
// it's just easier to use them like this.
// This are the "normal" characters section
"newline" => '\n'.to_string(),
"enter" => '\n'.to_string(),
"nl" => '\n'.to_string(),
"line_feed" => '\n'.to_string(),
"lf" => '\n'.to_string(),
"carriage_return" => '\r'.to_string(),
"cr" => '\r'.to_string(),
"crlf" => "\r\n".to_string(),
"tab" => '\t'.to_string(),
"sp" => ' '.to_string(),
"space" => ' '.to_string(),
"pipe" => '|'.to_string(),
"left_brace" => '{'.to_string(),
"lbrace" => '{'.to_string(),
"right_brace" => '}'.to_string(),
"rbrace" => '}'.to_string(),
"left_paren" => '('.to_string(),
"lp" => '('.to_string(),
"lparen" => '('.to_string(),
"right_paren" => ')'.to_string(),
"rparen" => ')'.to_string(),
"rp" => ')'.to_string(),
"left_bracket" => '['.to_string(),
"lbracket" => '['.to_string(),
"right_bracket" => ']'.to_string(),
"rbracket" => ']'.to_string(),
"single_quote" => '\''.to_string(),
"squote" => '\''.to_string(),
"sq" => '\''.to_string(),
"double_quote" => '\"'.to_string(),
"dquote" => '\"'.to_string(),
"dq" => '\"'.to_string(),
"path_sep" => std::path::MAIN_SEPARATOR.to_string(),
"psep" => std::path::MAIN_SEPARATOR.to_string(),
"separator" => std::path::MAIN_SEPARATOR.to_string(),
"esep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
"env_sep" => ENV_PATH_SEPARATOR_CHAR.to_string(),
"tilde" => '~'.to_string(), // ~
"twiddle" => '~'.to_string(), // ~
"squiggly" => '~'.to_string(), // ~
"home" => '~'.to_string(), // ~
"hash" => '#'.to_string(), // #
"hashtag" => '#'.to_string(), // #
"pound_sign" => '#'.to_string(), // #
"sharp" => '#'.to_string(), // #
"root" => '#'.to_string(), // #
// This is the unicode section
// Unicode names came from https://www.compart.com/en/unicode
// Private Use Area (U+E000-U+F8FF)
// Unicode can't be mixed with Ansi or it will break width calculation
"nf_branch" => '\u{e0a0}'.to_string(), // 
"nf_segment" => '\u{e0b0}'.to_string(), // 
"nf_left_segment" => '\u{e0b0}'.to_string(), // 
"nf_left_segment_thin" => '\u{e0b1}'.to_string(), // 
"nf_right_segment" => '\u{e0b2}'.to_string(), // 
"nf_right_segment_thin" => '\u{e0b3}'.to_string(), // 
"nf_git" => '\u{f1d3}'.to_string(), // 
"nf_git_branch" => "\u{e709}\u{e0a0}".to_string(), // 
"nf_folder1" => '\u{f07c}'.to_string(), // 
"nf_folder2" => '\u{f115}'.to_string(), // 
"nf_house1" => '\u{f015}'.to_string(), // 
"nf_house2" => '\u{f7db}'.to_string(), // 
"identical_to" => '\u{2261}'.to_string(), // ≡
"hamburger" => '\u{2261}'.to_string(), // ≡
"not_identical_to" => '\u{2262}'.to_string(), // ≢
"branch_untracked" => '\u{2262}'.to_string(), // ≢
"strictly_equivalent_to" => '\u{2263}'.to_string(), // ≣
"branch_identical" => '\u{2263}'.to_string(), // ≣
"upwards_arrow" => '\u{2191}'.to_string(), // ↑
"branch_ahead" => '\u{2191}'.to_string(), // ↑
"downwards_arrow" => '\u{2193}'.to_string(), // ↓
"branch_behind" => '\u{2193}'.to_string(), // ↓
"up_down_arrow" => '\u{2195}'.to_string(), // ↕
"branch_ahead_behind" => '\u{2195}'.to_string(), // ↕
"black_right_pointing_triangle" => '\u{25b6}'.to_string(), // ▶
"prompt" => '\u{25b6}'.to_string(), // ▶
"vector_or_cross_product" => '\u{2a2f}'.to_string(), //
"failed" => '\u{2a2f}'.to_string(), //
"high_voltage_sign" => '\u{26a1}'.to_string(), // ⚡
"elevated" => '\u{26a1}'.to_string(), // ⚡
// This is the emoji section
// Weather symbols
// https://www.babelstone.co.uk/Unicode/whatisit.html
"sun" => "☀️".to_string(), //2600 + fe0f
"sunny" => "☀️".to_string(), //2600 + fe0f
"sunrise" => "☀️".to_string(), //2600 + fe0f
"moon" => "🌛".to_string(), //1f31b
"cloudy" => "☁️".to_string(), //2601 + fe0f
"cloud" => "☁️".to_string(), //2601 + fe0f
"clouds" => "☁️".to_string(), //2601 + fe0f
"rainy" => "🌦️".to_string(), //1f326 + fe0f
"rain" => "🌦️".to_string(), //1f326 + fe0f
"foggy" => "🌫️".to_string(), //1f32b + fe0f
"fog" => "🌫️".to_string(), //1f32b + fe0f
"mist" => '\u{2591}'.to_string(), //2591
"haze" => '\u{2591}'.to_string(), //2591
"snowy" => "❄️".to_string(), //2744 + fe0f
"snow" => "❄️".to_string(), //2744 + fe0f
"thunderstorm" => "🌩️".to_string(),//1f329 + fe0f
"thunder" => "🌩️".to_string(), //1f329 + fe0f
// This is the "other" section
"bel" => '\x07'.to_string(), // Terminal Bell
"backspace" => '\x08'.to_string(), // Backspace
// separators
"file_separator" => '\x1c'.to_string(),
"file_sep" => '\x1c'.to_string(),
"fs" => '\x1c'.to_string(),
"group_separator" => '\x1d'.to_string(),
"group_sep" => '\x1d'.to_string(),
"gs" => '\x1d'.to_string(),
"record_separator" => '\x1e'.to_string(),
"record_sep" => '\x1e'.to_string(),
"rs" => '\x1e'.to_string(),
"unit_separator" => '\x1f'.to_string(),
"unit_sep" => '\x1f'.to_string(),
"us" => '\x1f'.to_string(),
}
});
impl Command for Char {
fn name(&self) -> &str {
"char"
}
fn signature(&self) -> Signature {
Signature::build("char")
.input_output_types(vec![
(Type::Nothing, Type::String),
(Type::Nothing, Type::Table(vec![])),
])
.optional(
"character",
SyntaxShape::Any,
"the name of the character to output",
)
.rest("rest", SyntaxShape::Any, "multiple Unicode bytes")
.switch("list", "List all supported character names", Some('l'))
.switch("unicode", "Unicode string i.e. 1f378", Some('u'))
.switch("integer", "Create a codepoint from an integer", Some('i'))
.allow_variants_without_examples(true)
.category(Category::Strings)
}
fn usage(&self) -> &str {
"Output special characters (e.g., 'newline')."
}
fn search_terms(&self) -> Vec<&str> {
vec!["line break", "newline", "Unicode"]
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Output newline",
example: r#"char newline"#,
result: Some(Value::test_string("\n")),
},
Example {
description: "List available characters",
example: r#"char --list"#,
result: None,
},
Example {
description: "Output prompt character, newline and a hamburger menu character",
example: r#"(char prompt) + (char newline) + (char hamburger)"#,
result: Some(Value::test_string("\u{25b6}\n\u{2261}")),
},
Example {
description: "Output Unicode character",
example: r#"char -u 1f378"#,
result: Some(Value::test_string("\u{1f378}")),
},
Example {
description: "Create Unicode from integer codepoint values",
example: r#"char -i (0x60 + 1) (0x60 + 2)"#,
result: Some(Value::test_string("ab")),
},
Example {
description: "Output multi-byte Unicode character",
example: r#"char -u 1F468 200D 1F466 200D 1F466"#,
result: Some(Value::test_string(
"\u{1F468}\u{200D}\u{1F466}\u{200D}\u{1F466}",
)),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let call_span = call.head;
// handle -l flag
if call.has_flag("list") {
return Ok(CHAR_MAP
.iter()
.map(move |(name, s)| {
let unicode = Value::string(
s.chars()
.map(|c| format!("{:x}", c as u32))
.collect::<Vec<String>>()
.join(" "),
call_span,
);
let record = record! {
"name" => Value::string(*name, call_span),
"character" => Value::string(s, call_span),
"unicode" => unicode,
};
Value::record(record, call_span)
})
.into_pipeline_data(engine_state.ctrlc.clone()));
}
// handle -u flag
if call.has_flag("integer") {
let args: Vec<i64> = call.rest(engine_state, stack, 0)?;
if args.is_empty() {
return Err(ShellError::MissingParameter {
param_name: "missing at least one unicode character".into(),
span: call_span,
});
}
let mut multi_byte = String::new();
for (i, &arg) in args.iter().enumerate() {
let span = call
.positional_nth(i)
.expect("Unexpected missing argument")
.span;
multi_byte.push(integer_to_unicode_char(arg, span)?)
}
Ok(Value::string(multi_byte, call_span).into_pipeline_data())
} else if call.has_flag("unicode") {
let args: Vec<String> = call.rest(engine_state, stack, 0)?;
if args.is_empty() {
return Err(ShellError::MissingParameter {
param_name: "missing at least one unicode character".into(),
span: call_span,
});
}
let mut multi_byte = String::new();
for (i, arg) in args.iter().enumerate() {
let span = call
.positional_nth(i)
.expect("Unexpected missing argument")
.span;
multi_byte.push(string_to_unicode_char(arg, span)?)
}
Ok(Value::string(multi_byte, call_span).into_pipeline_data())
} else {
let args: Vec<String> = call.rest(engine_state, stack, 0)?;
if args.is_empty() {
return Err(ShellError::MissingParameter {
param_name: "missing name of the character".into(),
span: call_span,
});
}
let special_character = str_to_character(&args[0]);
if let Some(output) = special_character {
Ok(Value::string(output, call_span).into_pipeline_data())
} else {
Err(ShellError::TypeMismatch {
err_message: "error finding named character".into(),
span: call
.positional_nth(0)
.expect("Unexpected missing argument")
.span,
})
}
}
}
}
fn integer_to_unicode_char(value: i64, t: Span) -> Result<char, ShellError> {
let decoded_char = value.try_into().ok().and_then(std::char::from_u32);
if let Some(ch) = decoded_char {
Ok(ch)
} else {
Err(ShellError::TypeMismatch {
err_message: "not a valid Unicode codepoint".into(),
span: t,
})
}
}
fn string_to_unicode_char(s: &str, t: Span) -> Result<char, ShellError> {
let decoded_char = u32::from_str_radix(s, 16)
.ok()
.and_then(std::char::from_u32);
if let Some(ch) = decoded_char {
Ok(ch)
} else {
Err(ShellError::TypeMismatch {
err_message: "error decoding Unicode character".into(),
span: t,
})
}
}
fn str_to_character(s: &str) -> Option<String> {
CHAR_MAP.get(s).map(|s| s.into())
}
#[cfg(test)]
mod tests {
use super::Char;
#[test]
fn examples_work_as_expected() {
use crate::test_examples;
test_examples(Char {})
}
}