Merge branch 'main' into polars_categorical_2

This commit is contained in:
Jack Wright 2025-03-31 08:28:44 -07:00
commit dbce0068b0
26 changed files with 460 additions and 228 deletions

View File

@ -10,4 +10,4 @@ jobs:
uses: actions/checkout@v4.1.7
- name: Check spelling
uses: crate-ci/typos@v1.29.10
uses: crate-ci/typos@v1.31.0

View File

@ -486,9 +486,10 @@ impl NuCompleter {
externals: bool,
strip: bool,
) -> Vec<SemanticSuggestion> {
let config = self.engine_state.get_config();
let mut command_completions = CommandCompletion {
internals,
externals,
externals: !internals || (externals && config.completions.external.enable),
};
let (new_span, prefix) = strip_placeholder_if_any(working_set, &span, strip);
let ctx = Context::new(working_set, new_span, prefix, offset);

View File

@ -864,7 +864,7 @@ fn do_auto_cd(
path.to_string_lossy().to_string()
};
if let PermissionResult::PermissionDenied(_) = have_permission(path.clone()) {
if let PermissionResult::PermissionDenied = have_permission(path.clone()) {
report_shell_error(
engine_state,
&ShellError::Io(IoError::new_with_additional_context(

View File

@ -10,7 +10,7 @@ use nu_cli::NuCompleter;
use nu_engine::eval_block;
use nu_parser::parse;
use nu_path::expand_tilde;
use nu_protocol::{debugger::WithoutDebug, engine::StateWorkingSet, PipelineData};
use nu_protocol::{debugger::WithoutDebug, engine::StateWorkingSet, Config, PipelineData};
use reedline::{Completer, Suggestion};
use rstest::{fixture, rstest};
use support::{
@ -367,7 +367,7 @@ export def say [
/// External command only if starts with `^`
#[test]
fn external_commands_only() {
fn external_commands() {
let engine = new_external_engine();
let mut completer = NuCompleter::new(
Arc::new(engine),
@ -390,6 +390,31 @@ fn external_commands_only() {
match_suggestions(&expected, &suggestions);
}
/// Disable external commands except for those start with `^`
#[test]
fn external_commands_disabled() {
let mut engine = new_external_engine();
let mut config = Config::default();
config.completions.external.enable = false;
engine.set_config(config);
let stack = nu_protocol::engine::Stack::new();
let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack));
let completion_str = "ls; ^sleep";
let suggestions = completer.complete(completion_str, completion_str.len());
#[cfg(windows)]
let expected: Vec<_> = vec!["sleep.exe"];
#[cfg(not(windows))]
let expected: Vec<_> = vec!["sleep"];
match_suggestions(&expected, &suggestions);
let completion_str = "sleep";
let suggestions = completer.complete(completion_str, completion_str.len());
let expected: Vec<_> = vec!["sleep"];
match_suggestions(&expected, &suggestions);
}
/// Which completes both internals and externals
#[test]
fn which_command_completions() {

View File

@ -4,6 +4,9 @@ use human_date_parser::{from_human_time, ParseResult};
use nu_cmd_base::input_handler::{operate, CmdArgument};
use nu_engine::command_prelude::*;
const HOUR: i32 = 60 * 60;
#[derive(Clone, Debug)]
struct Arguments {
zone_options: Option<Spanned<Zone>>,
format_options: Option<DatetimeFormat>,
@ -272,7 +275,7 @@ impl Command for IntoDatetime {
}
}
#[derive(Clone)]
#[derive(Clone, Debug)]
struct DatetimeFormat(String);
fn action(input: &Value, args: &Arguments, head: Span) -> Value {
@ -322,7 +325,6 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
};
}
}
const HOUR: i32 = 60 * 60;
// Check to see if input looks like a Unix timestamp (i.e. can it be parsed to an int?)
let timestamp = match input {
@ -403,10 +405,56 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
let parse_as_string = |val: &str| {
match dateformat {
Some(dt) => match DateTime::parse_from_str(val, &dt.0) {
Ok(d) => Value::date ( d, head ),
Some(dt_format) => match DateTime::parse_from_str(val, &dt_format.0) {
Ok(dt) => {
match timezone {
None => {
Value::date ( dt, head )
},
Some(Spanned { item, span }) => match item {
Zone::Utc => {
Value::date ( dt, head )
}
Zone::Local => {
Value::date(dt.with_timezone(&Local).into(), *span)
}
Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) {
Some(eastoffset) => {
Value::date(dt.with_timezone(&eastoffset), *span)
}
None => Value::error(
ShellError::DatetimeParseError {
msg: input.to_abbreviated_string(&nu_protocol::Config::default()),
span: *span,
},
*span,
),
},
Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) {
Some(westoffset) => {
Value::date(dt.with_timezone(&westoffset), *span)
}
None => Value::error(
ShellError::DatetimeParseError {
msg: input.to_abbreviated_string(&nu_protocol::Config::default()),
span: *span,
},
*span,
),
},
Zone::Error => Value::error(
// This is an argument error, not an input error
ShellError::TypeMismatch {
err_message: "Invalid timezone or offset".to_string(),
span: *span,
},
*span,
),
},
}
},
Err(reason) => {
match NaiveDateTime::parse_from_str(val, &dt.0) {
match NaiveDateTime::parse_from_str(val, &dt_format.0) {
Ok(d) => {
let dt_fixed =
Local.from_local_datetime(&d).single().unwrap_or_default();
@ -415,7 +463,7 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
}
Err(_) => {
Value::error (
ShellError::CantConvert { to_type: format!("could not parse as datetime using format '{}'", dt.0), from_type: reason.to_string(), span: head, help: Some("you can use `into datetime` without a format string to enable flexible parsing".to_string()) },
ShellError::CantConvert { to_type: format!("could not parse as datetime using format '{}'", dt_format.0), from_type: reason.to_string(), span: head, help: Some("you can use `into datetime` without a format string to enable flexible parsing".to_string()) },
head,
)
}
@ -629,6 +677,49 @@ mod tests {
assert_eq!(actual, expected)
}
#[test]
fn takes_timestamp_offset_as_int_with_formatting() {
let date_int = Value::test_int(1_614_434_140);
let timezone_option = Some(Spanned {
item: Zone::East(8),
span: Span::test_data(),
});
let fmt_options = Some(DatetimeFormat("%s".to_string()));
let args = Arguments {
zone_options: timezone_option,
format_options: fmt_options,
cell_paths: None,
};
let actual = action(&date_int, &args, Span::test_data());
let expected = Value::date(
DateTime::parse_from_str("2021-02-27 21:55:40 +08:00", "%Y-%m-%d %H:%M:%S %z").unwrap(),
Span::test_data(),
);
assert_eq!(actual, expected)
}
#[test]
fn takes_timestamp_offset_as_int_with_local_timezone() {
let date_int = Value::test_int(1_614_434_140);
let timezone_option = Some(Spanned {
item: Zone::Local,
span: Span::test_data(),
});
let fmt_options = Some(DatetimeFormat("%s".to_string()));
let args = Arguments {
zone_options: timezone_option,
format_options: fmt_options,
cell_paths: None,
};
let actual = action(&date_int, &args, Span::test_data());
let expected = Value::date(
Utc.timestamp_opt(1_614_434_140, 0).unwrap().into(),
Span::test_data(),
);
assert_eq!(actual, expected)
}
#[test]
fn takes_timestamp() {
let date_str = Value::test_string("1614434140000000000");
@ -643,7 +734,7 @@ mod tests {
};
let actual = action(&date_str, &args, Span::test_data());
let expected = Value::date(
Local.timestamp_opt(1614434140, 0).unwrap().into(),
Local.timestamp_opt(1_614_434_140, 0).unwrap().into(),
Span::test_data(),
);
@ -662,7 +753,7 @@ mod tests {
cell_paths: None,
};
let expected = Value::date(
Local.timestamp_opt(1614434140, 0).unwrap().into(),
Local.timestamp_opt(1_614_434_140, 0).unwrap().into(),
Span::test_data(),
);
let actual = action(&expected, &args, Span::test_data());
@ -681,7 +772,7 @@ mod tests {
let actual = action(&date_str, &args, Span::test_data());
let expected = Value::date(
Utc.timestamp_opt(1614434140, 0).unwrap().into(),
Utc.timestamp_opt(1_614_434_140, 0).unwrap().into(),
Span::test_data(),
);

View File

@ -132,7 +132,7 @@ impl Command for Cd {
stack.set_cwd(path)?;
Ok(PipelineData::empty())
}
PermissionResult::PermissionDenied(_) => {
PermissionResult::PermissionDenied => {
Err(IoError::new(std::io::ErrorKind::PermissionDenied, call.head, path).into())
}
}

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
#[derive(Clone)]
pub struct MathAbs;
@ -57,10 +56,7 @@ impl Command for MathAbs {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(move |value| abs_helper(value, head), engine_state.signals())
}
@ -80,10 +76,7 @@ impl Command for MathAbs {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| abs_helper(value, head),

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
#[derive(Clone)]
pub struct MathCeil;
@ -56,10 +55,7 @@ impl Command for MathCeil {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(move |value| operate(value, head), engine_state.signals())
}
@ -83,10 +79,7 @@ impl Command for MathCeil {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| operate(value, head),

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
#[derive(Clone)]
pub struct MathFloor;
@ -56,10 +55,7 @@ impl Command for MathFloor {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(move |value| operate(value, head), engine_state.signals())
}
@ -83,10 +79,7 @@ impl Command for MathFloor {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| operate(value, head),

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
use nu_protocol::Signals;
#[derive(Clone)]
@ -59,10 +58,7 @@ impl Command for MathLog {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
log(base, call.head, input, engine_state.signals())
}
@ -83,10 +79,7 @@ impl Command for MathLog {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
log(base, call.head, input, working_set.permanent().signals())
}

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
#[derive(Clone)]
pub struct MathRound;
@ -63,10 +62,7 @@ impl Command for MathRound {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| operate(value, head, precision_param),
@ -94,10 +90,7 @@ impl Command for MathRound {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| operate(value, head, precision_param),

View File

@ -1,6 +1,5 @@
use crate::math::utils::ensure_bounded;
use nu_engine::command_prelude::*;
use nu_protocol::Range;
#[derive(Clone)]
pub struct MathSqrt;
@ -56,10 +55,7 @@ impl Command for MathSqrt {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(move |value| operate(value, head), engine_state.signals())
}
@ -83,10 +79,7 @@ impl Command for MathSqrt {
..,
) = input
{
match &**val {
Range::IntRange(range) => ensure_bounded(range.end(), internal_span, head)?,
Range::FloatRange(range) => ensure_bounded(range.end(), internal_span, head)?,
}
ensure_bounded(val.as_ref(), internal_span, head)?;
}
input.map(
move |value| operate(value, head),

View File

@ -1,4 +1,4 @@
use core::{ops::Bound, slice};
use core::slice;
use indexmap::IndexMap;
use nu_protocol::{
engine::Call, IntoPipelineData, PipelineData, Range, ShellError, Signals, Span, Value,
@ -93,10 +93,7 @@ pub fn calculate(
Ok(Value::record(record, span))
}
PipelineData::Value(Value::Range { val, .. }, ..) => {
match *val {
Range::IntRange(range) => ensure_bounded(range.end(), span, name)?,
Range::FloatRange(range) => ensure_bounded(range.end(), span, name)?,
}
ensure_bounded(val.as_ref(), span, name)?;
let new_vals: Result<Vec<Value>, ShellError> = val
.into_range_iter(span, Signals::empty())
.map(|val| mf(&[val], span, name))
@ -117,17 +114,13 @@ pub fn calculate(
}
}
pub fn ensure_bounded<T>(
bound: Bound<T>,
val_span: Span,
call_span: Span,
) -> Result<(), ShellError> {
match bound {
Bound::<T>::Unbounded => Err(ShellError::IncorrectValue {
pub fn ensure_bounded(range: &Range, val_span: Span, call_span: Span) -> Result<(), ShellError> {
if range.is_bounded() {
return Ok(());
}
Err(ShellError::IncorrectValue {
msg: "Range must be bounded".to_string(),
val_span,
call_span,
}),
_ => Ok(()),
}
})
}

View File

@ -82,6 +82,14 @@ fn catch_block_can_use_error_object() {
assert_eq!(output.out, "Division by zero.")
}
#[test]
fn catch_input_type_mismatch_and_rethrow() {
let actual = nu!(
"let x: any = 1; try { $x | get 1 } catch {|err| error make { msg: ($err | get msg) } }"
);
assert!(actual.err.contains("Input type not supported"));
}
// This test is disabled on Windows because they cause a stack overflow in CI (but not locally!).
// For reasons we don't understand, the Windows CI runners are prone to stack overflow.
// TODO: investigate so we can enable on Windows

View File

@ -1026,8 +1026,7 @@ fn eval_call<D: DebugContext>(
// Set up redirect modes
let mut caller_stack = caller_stack.push_redirection(redirect_out.take(), redirect_err.take());
let result;
let result = (|| {
if let Some(block_id) = decl.block_id() {
// If the decl is a custom command
let block = engine_state.get_block(block_id);
@ -1052,15 +1051,17 @@ fn eval_call<D: DebugContext>(
// recoverable in Rust.
callee_stack.recursion_count += 1;
result = eval_block_with_early_return::<D>(engine_state, &mut callee_stack, block, input);
let result =
eval_block_with_early_return::<D>(engine_state, &mut callee_stack, block, input);
// Move environment variables back into the caller stack scope if requested to do so
if block.redirect_env {
redirect_env(engine_state, &mut caller_stack, &callee_stack);
}
result
} else {
check_input_types(&input, &decl.signature(), head)?;
// FIXME: precalculate this and save it somewhere
let span = Span::merge_many(
std::iter::once(head).chain(
@ -1081,8 +1082,9 @@ fn eval_call<D: DebugContext>(
};
// Run the call
result = decl.run(engine_state, &mut caller_stack, &(&call).into(), input);
};
decl.run(engine_state, &mut caller_stack, &(&call).into(), input)
}
})();
drop(caller_stack);

View File

@ -38,6 +38,17 @@ impl LanguageServer {
});
}
for warn in working_set.parse_warnings.iter() {
let message = warn.to_string();
diagnostics.diagnostics.push(Diagnostic {
range: span_to_range(&warn.span(), file, span.start),
severity: Some(DiagnosticSeverity::WARNING),
message,
..Default::default()
});
}
self.connection
.sender
.send(lsp_server::Message::Notification(

View File

@ -1,6 +1,7 @@
//! A Range is an iterator over integers or floats.
use crate::{ast::RangeInclusion, ShellError, Signals, Span, Value};
use core::ops::Bound;
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display};
@ -631,6 +632,13 @@ impl Range {
}
}
pub fn is_bounded(&self) -> bool {
match self {
Range::IntRange(range) => range.end() != Bound::<i64>::Unbounded,
Range::FloatRange(range) => range.end() != Bound::<f64>::Unbounded,
}
}
pub fn into_range_iter(self, span: Span, signals: Signals) -> Iter {
match self {
Range::IntRange(range) => Iter::IntIter(range.into_range_iter(signals), span),

View File

@ -11,6 +11,9 @@ export def copy [
--ansi (-a) # Copy ansi formatting
]: any -> nothing {
let input = $in | collect
if not $ansi {
$env.config.use_ansi_coloring = false
}
let text = match ($input | describe -d | get type) {
$type if $type in [ table, record, list ] => {
$input | table -e
@ -18,18 +21,7 @@ export def copy [
_ => {$input}
}
let do_strip_ansi = match $ansi {
true => {{||}}
false => {{|| ansi strip }}
}
let output = (
$text
| do $do_strip_ansi
| encode base64
)
print -n $'(ansi osc)52;c;($output)(ansi st)'
print -n $'(ansi osc)52;c;($text | encode base64)(ansi st)'
}
# Paste contents of system clipboard

View File

@ -1,29 +1,23 @@
#[cfg(unix)]
use nix::unistd::{access, AccessFlags};
#[cfg(any(windows, unix))]
use std::path::Path;
#[cfg(unix)]
use {
nix::{
sys::stat::{mode_t, Mode},
unistd::{Gid, Uid},
},
std::os::unix::fs::MetadataExt,
};
// The result of checking whether we have permission to cd to a directory
#[derive(Debug)]
pub enum PermissionResult<'a> {
pub enum PermissionResult {
PermissionOk,
PermissionDenied(&'a str),
PermissionDenied,
}
// TODO: Maybe we should use file_attributes() from https://doc.rust-lang.org/std/os/windows/fs/trait.MetadataExt.html
// More on that here: https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
#[cfg(windows)]
pub fn have_permission(dir: impl AsRef<Path>) -> PermissionResult<'static> {
pub fn have_permission(dir: impl AsRef<Path>) -> PermissionResult {
match dir.as_ref().read_dir() {
Err(e) => {
if matches!(e.kind(), std::io::ErrorKind::PermissionDenied) {
PermissionResult::PermissionDenied("Folder is unable to be read")
PermissionResult::PermissionDenied
} else {
PermissionResult::PermissionOk
}
@ -33,73 +27,15 @@ pub fn have_permission(dir: impl AsRef<Path>) -> PermissionResult<'static> {
}
#[cfg(unix)]
pub fn have_permission(dir: impl AsRef<Path>) -> PermissionResult<'static> {
match dir.as_ref().metadata() {
Ok(metadata) => {
let mode = Mode::from_bits_truncate(metadata.mode() as mode_t);
let current_user_uid = users::get_current_uid();
if current_user_uid.is_root() {
return PermissionResult::PermissionOk;
}
let current_user_gid = users::get_current_gid();
let owner_user = Uid::from_raw(metadata.uid());
let owner_group = Gid::from_raw(metadata.gid());
match (
current_user_uid == owner_user,
current_user_gid == owner_group,
) {
(true, _) => {
if mode.contains(Mode::S_IXUSR) {
PermissionResult::PermissionOk
} else {
PermissionResult::PermissionDenied(
"You are the owner but do not have execute permission",
)
}
}
(false, true) => {
if mode.contains(Mode::S_IXGRP) {
PermissionResult::PermissionOk
} else {
PermissionResult::PermissionDenied(
"You are in the group but do not have execute permission",
)
}
}
(false, false) => {
if mode.contains(Mode::S_IXOTH)
|| (mode.contains(Mode::S_IXGRP)
&& any_group(current_user_gid, owner_group))
{
PermissionResult::PermissionOk
} else {
PermissionResult::PermissionDenied(
"You are neither the owner, in the group, nor the super user and do not have permission",
)
}
}
}
}
Err(_) => PermissionResult::PermissionDenied("Could not retrieve file metadata"),
}
}
#[cfg(any(target_os = "linux", target_os = "freebsd", target_os = "android"))]
fn any_group(_current_user_gid: Gid, owner_group: Gid) -> bool {
users::current_user_groups()
.unwrap_or_default()
.contains(&owner_group)
}
#[cfg(all(
unix,
not(any(target_os = "linux", target_os = "freebsd", target_os = "android"))
))]
fn any_group(current_user_gid: Gid, owner_group: Gid) -> bool {
users::get_current_username()
.and_then(|name| users::get_user_groups(&name, current_user_gid))
.unwrap_or_default()
.contains(&owner_group)
/// Check that the process' user id has permissions to execute or
/// in the case of a directory traverse the particular directory
pub fn have_permission(dir: impl AsRef<Path>) -> PermissionResult {
// We check permissions for real user id, but that's fine, because in
// proper installations of nushell, effective UID (EUID) rarely differs
// from real UID (RUID). We strongly advise against setting the setuid bit
// on the nushell executable or shebang scripts starts with `#!/usr/bin/env nu` e.g.
// Most Unix systems ignore setuid on shebang by default anyway.
access(dir.as_ref(), AccessFlags::X_OK).into()
}
#[cfg(unix)]
@ -209,3 +145,12 @@ pub mod users {
}
}
}
impl<T, E> From<Result<T, E>> for PermissionResult {
fn from(value: Result<T, E>) -> Self {
match value {
Ok(_) => Self::PermissionOk,
Err(_) => Self::PermissionDenied,
}
}
}

View File

@ -32,7 +32,7 @@ serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.53"}
polars-io = { version = "0.46", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.46"}
polars-ops = { version = "0.46", features = ["pivot"]}
polars-ops = { version = "0.46", features = ["pivot", "cutqcut"]}
polars-plan = { version = "0.46", features = ["regex"]}
polars-utils = { version = "0.46"}
typetag = "0.2"

View File

@ -0,0 +1,89 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type};
use polars::prelude::PlSmallStr;
use crate::{
values::{CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
pub struct CutSeries;
impl PluginCommand for CutSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cut"
}
fn description(&self) -> &str {
"Bin continuous values into discrete categories for a series."
}
fn signature(&self) -> nu_protocol::Signature {
Signature::build(self.name())
.required("breaks", SyntaxShape::Any, "Dataframe that contains a series of unique cut points.")
.named(
"labels",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Names of the categories. The number of labels must be equal to the number of cut points plus one.",
Some('l'),
)
.switch("left_closed", "Set the intervals to be left-closed instead of right-closed.", Some('c'))
.switch("include_breaks", "Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.", Some('b'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Divide the column into three categories.",
example: r#"[-2, -1, 0, 1, 2] | polars into-df | polars cut [-1, 1] --labels ["a", "b", "c"]"#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, nu_protocol::LabeledError> {
command(plugin, engine, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let breaks = call.req::<Vec<f64>>(0)?;
let labels: Option<Vec<PlSmallStr>> = call.get_flag::<Vec<String>>("labels")?.map(|l| {
l.into_iter()
.map(PlSmallStr::from)
.collect::<Vec<PlSmallStr>>()
});
let left_closed = call.has_flag("left_closed")?;
let include_breaks = call.has_flag("include_breaks")?;
let new_series = polars_ops::series::cut(&series, breaks, labels, left_closed, include_breaks)
.map_err(|e| ShellError::GenericError {
error: "Error cutting series".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series(new_series, call.head)?.to_pipeline_data(plugin, engine, call.head)
}

View File

@ -5,6 +5,7 @@ mod cast;
mod col;
mod collect;
mod concat;
mod cut;
mod drop;
mod drop_duplicates;
mod drop_nulls;
@ -22,6 +23,7 @@ mod last;
mod len;
mod lit;
mod pivot;
mod qcut;
mod query_df;
mod rename;
mod reverse;
@ -75,6 +77,7 @@ pub(crate) fn data_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
vec![
Box::new(AppendDF),
Box::new(CastDF),
Box::new(cut::CutSeries),
Box::new(DropDF),
Box::new(concat::ConcatDF),
Box::new(DropDuplicates),
@ -108,6 +111,7 @@ pub(crate) fn data_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(LazySortBy),
Box::new(LazyFilter),
Box::new(Shift),
Box::new(qcut::QCutSeries),
Box::new(Unique),
Box::new(unnest::UnnestDF),
]

View File

@ -0,0 +1,98 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type};
use polars::prelude::PlSmallStr;
use crate::{
values::{CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
pub struct QCutSeries;
impl PluginCommand for QCutSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars qcut"
}
fn description(&self) -> &str {
"Bin continuous values into discrete categories based on their quantiles for a series."
}
fn signature(&self) -> nu_protocol::Signature {
Signature::build(self.name())
.required("quantiles", SyntaxShape::Any, "Either a list of quantile probabilities between 0 and 1 or a positive integer determining the number of bins with uniform probability.")
.named(
"labels",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Names of the categories. The number of labels must be equal to the number of cut points plus one.",
Some('l'),
)
.switch("left_closed", "Set the intervals to be left-closed instead of right-closed.", Some('c'))
.switch("include_breaks", "Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.", Some('b'))
.switch("allow_duplicates", "If set, duplicates in the resulting quantiles are dropped, rather than raising an error. This can happen even with unique probabilities, depending on the data.", Some('d'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Divide a column into three categories according to pre-defined quantile probabilities.",
example: r#"[-2, -1, 0, 1, 2] | polars into-df | polars qcut [0.25, 0.75] --labels ["a", "b", "c"]"#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, nu_protocol::LabeledError> {
command(plugin, engine, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let quantiles = call.req::<Vec<f64>>(0)?;
let labels: Option<Vec<PlSmallStr>> = call.get_flag::<Vec<String>>("labels")?.map(|l| {
l.into_iter()
.map(PlSmallStr::from)
.collect::<Vec<PlSmallStr>>()
});
let left_closed = call.has_flag("left_closed")?;
let include_breaks = call.has_flag("include_breaks")?;
let allow_duplicates = call.has_flag("allow_duplicates")?;
let new_series = polars_ops::series::qcut(
&series,
quantiles,
labels,
left_closed,
allow_duplicates,
include_breaks,
)
.map_err(|e| ShellError::GenericError {
error: "Error cutting series".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series(new_series, call.head)?.to_pipeline_data(plugin, engine, call.head)
}

View File

@ -246,6 +246,11 @@ fn env_shlvl_commandstring_does_not_increment() {
// test run will likely hang, at least for some users.
// Instead, use -e / --execute with an `exit` to test REPL
// functionality as demonstrated below.
//
// We've also learned that `-e 'exit'` is not enough to
// prevent failures entirely. For now we're going to ignore
// these tests until we can find a better solution.
#[ignore = "Causing hangs when both tests overlap"]
#[test]
fn env_shlvl_in_repl() {
let actual = nu!("
@ -256,6 +261,7 @@ fn env_shlvl_in_repl() {
assert_eq!(actual.out, "6");
}
#[ignore = "Causing hangs when both tests overlap"]
#[test]
fn env_shlvl_in_exec_repl() {
let actual = nu!(r#"

View File

@ -22,6 +22,7 @@ extend-ignore-re = [
[type.rust.extend-words]
nd = "nd"
typ = "typ"
numer = "numer"
styl = "styl"