Move dataframes support to a plugin (#12220)

WIP

This PR covers migration crates/nu-cmd-dataframes to a new plugin
./crates/nu_plugin_polars

## TODO List

Other:
- [X] Fix examples
- [x] Fix Plugin Test Harness
- [X] Move Cache to Mutex<BTreeMap>
- [X] Logic for disabling/enabling plugin GC based off whether items are
cached.
- [x] NuExpression custom values
- [X] Optimize caching (don't cache every object creation). 
- [x] Fix dataframe operations (in NuDataFrameCustomValue::operations)
- [x] Added plugin_debug! macro that for checking an env variable
POLARS_PLUGIN_DEBUG

Fix duplicated commands:
- [x] There are two polars median commands, one for lazy and one for
expr.. there should only be one that works for both. I temporarily
called on polars expr-median (inside expressions_macros.rs)
- [x] polars quantile (lazy, and expr). the expr one is temporarily
expr-median
- [x] polars is-in (renamed one series-is-in)

Commands:
- [x] AppendDF
- [x] CastDF
- [X] ColumnsDF
- [x] DataTypes
- [x] Summary
- [x] DropDF
- [x] DropDuplicates
- [x] DropNulls
- [x] Dummies
- [x] FilterWith
- [X] FirstDF
- [x] GetDF
- [x] LastDF
- [X] ListDF
- [x] MeltDF
- [X] OpenDataFrame
- [x] QueryDf
- [x] RenameDF
- [x] SampleDF
- [x] SchemaDF
- [x] ShapeDF
- [x] SliceDF
- [x] TakeDF
- [X] ToArrow
- [x] ToAvro
- [X] ToCSV
- [X] ToDataFrame
- [X] ToNu
- [x] ToParquet
- [x] ToJsonLines
- [x] WithColumn
- [x] ExprAlias
- [x] ExprArgWhere
- [x] ExprCol
- [x] ExprConcatStr
- [x] ExprCount
- [x] ExprLit
- [x] ExprWhen
- [x] ExprOtherwise
- [x] ExprQuantile
- [x] ExprList
- [x] ExprAggGroups
- [x] ExprCount
- [x] ExprIsIn
- [x] ExprNot
- [x] ExprMax
- [x] ExprMin
- [x] ExprSum
- [x] ExprMean
- [x] ExprMedian
- [x] ExprStd
- [x] ExprVar
- [x] ExprDatePart
- [X] LazyAggregate
- [x] LazyCache
- [X] LazyCollect
- [x] LazyFetch
- [x] LazyFillNA
- [x] LazyFillNull
- [x] LazyFilter
- [x] LazyJoin
- [x] LazyQuantile
- [x] LazyMedian
- [x] LazyReverse
- [x] LazySelect
- [x] LazySortBy
- [x] ToLazyFrame
- [x] ToLazyGroupBy
- [x] LazyExplode
- [x] LazyFlatten
- [x] AllFalse
- [x] AllTrue
- [x] ArgMax
- [x] ArgMin
- [x] ArgSort
- [x] ArgTrue
- [x] ArgUnique
- [x] AsDate
- [x] AsDateTime
- [x] Concatenate
- [x] Contains
- [x] Cumulative
- [x] GetDay
- [x] GetHour
- [x] GetMinute
- [x] GetMonth
- [x] GetNanosecond
- [x] GetOrdinal
- [x] GetSecond
- [x] GetWeek
- [x] GetWeekDay
- [x] GetYear
- [x] IsDuplicated
- [x] IsIn
- [x] IsNotNull
- [x] IsNull
- [x] IsUnique
- [x] NNull
- [x] NUnique
- [x] NotSeries
- [x] Replace
- [x] ReplaceAll
- [x] Rolling
- [x] SetSeries
- [x] SetWithIndex
- [x] Shift
- [x] StrLengths
- [x] StrSlice
- [x] StrFTime
- [x] ToLowerCase
- [x] ToUpperCase
- [x] Unique
- [x] ValueCount

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2024-04-09 17:31:43 -07:00 committed by GitHub
parent cbbccaa722
commit efc1cfa939
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
137 changed files with 21181 additions and 379 deletions

823
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -47,6 +47,7 @@ members = [
"crates/nu_plugin_query",
"crates/nu_plugin_custom_values",
"crates/nu_plugin_formats",
"crates/nu_plugin_polars",
"crates/nu-std",
"crates/nu-table",
"crates/nu-term-grid",

View File

@ -48,7 +48,6 @@ impl Command for ToLazyFrame {
let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
let value = Value::custom(Box::new(lazy), call.head);
Ok(PipelineData::Value(value, None))
}
}

View File

@ -0,0 +1,78 @@
[package]
authors = ["The Nushell Project Developers"]
description = "Nushell dataframe plugin commands based on polars."
edition = "2021"
license = "MIT"
name = "nu_plugin_polars"
repository = "https://github.com/nushell/nushell/tree/main/crates/nu-cmd-dataframe"
version = "0.92.2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
name = "nu_plugin_polars"
bench = false
[lib]
bench = false
[dependencies]
nu-protocol = { path = "../nu-protocol", version = "0.92.2" }
nu-plugin = { path = "../nu-plugin", version = "0.92.2" }
# Potential dependencies for extras
chrono = { workspace = true, features = ["std", "unstable-locales"], default-features = false }
chrono-tz = "0.8"
fancy-regex = { workspace = true }
indexmap = { version = "2.2" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.43"}
polars-io = { version = "0.37", features = ["avro"]}
polars-arrow = { version = "0.37"}
polars-ops = { version = "0.37"}
polars-plan = { version = "0.37", features = ["regex"]}
polars-utils = { version = "0.37"}
typetag = "0.2"
uuid = { version = "1.7", features = ["v4", "serde"] }
[dependencies.polars]
features = [
"arg_where",
"checked_arithmetic",
"concat_str",
"cross_join",
"csv",
"cum_agg",
"default",
"dtype-categorical",
"dtype-datetime",
"dtype-struct",
"dtype-i8",
"dtype-i16",
"dtype-u8",
"dtype-u16",
"dynamic_group_by",
"ipc",
"is_in",
"json",
"lazy",
"object",
"parquet",
"random",
"rolling_window",
"rows",
"serde",
"serde-lazy",
"strings",
"to_dummies",
]
optional = false
version = "0.37"
[dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.92.2" }
nu-engine = { path = "../nu-engine", version = "0.92.2" }
nu-parser = { path = "../nu-parser", version = "0.92.2" }
nu-command = { path = "../nu-command", version = "0.92.2" }
nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.92.2" }

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 - 2023 The Nushell Project Developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,122 @@
use std::{
collections::HashMap,
sync::{Mutex, MutexGuard},
};
use nu_plugin::EngineInterface;
use nu_protocol::{LabeledError, ShellError};
use uuid::Uuid;
use crate::{plugin_debug, values::PolarsPluginObject, PolarsPlugin};
#[derive(Default)]
pub struct Cache {
cache: Mutex<HashMap<Uuid, PolarsPluginObject>>,
}
impl Cache {
fn lock(&self) -> Result<MutexGuard<HashMap<Uuid, PolarsPluginObject>>, ShellError> {
self.cache.lock().map_err(|e| ShellError::GenericError {
error: format!("error acquiring cache lock: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})
}
/// Removes an item from the plugin cache.
/// The maybe_engine parameter is required outside of testing
pub fn remove(
&self,
maybe_engine: Option<&EngineInterface>,
uuid: &Uuid,
) -> Result<Option<PolarsPluginObject>, ShellError> {
let mut lock = self.lock()?;
let removed = lock.remove(uuid);
plugin_debug!("PolarsPlugin: removing {uuid} from cache: {removed:?}");
// Once there are no more entries in the cache
// we can turn plugin gc back on
match maybe_engine {
Some(engine) if lock.is_empty() => {
plugin_debug!("PolarsPlugin: Cache is empty enabling GC");
engine.set_gc_disabled(false).map_err(LabeledError::from)?;
}
_ => (),
};
drop(lock);
Ok(removed)
}
/// Inserts an item into the plugin cache.
/// The maybe_engine parameter is required outside of testing
pub fn insert(
&self,
maybe_engine: Option<&EngineInterface>,
uuid: Uuid,
value: PolarsPluginObject,
) -> Result<Option<PolarsPluginObject>, ShellError> {
let mut lock = self.lock()?;
plugin_debug!("PolarsPlugin: Inserting {uuid} into cache: {value:?}");
// turn off plugin gc the first time an entry is added to the cache
// as we don't want the plugin to be garbage collected if there
// is any live data
match maybe_engine {
Some(engine) if lock.is_empty() => {
plugin_debug!("PolarsPlugin: Cache has values disabling GC");
engine.set_gc_disabled(true).map_err(LabeledError::from)?;
}
_ => (),
};
let result = lock.insert(uuid, value);
drop(lock);
Ok(result)
}
pub fn get(&self, uuid: &Uuid) -> Result<Option<PolarsPluginObject>, ShellError> {
let lock = self.lock()?;
let result = lock.get(uuid).cloned();
drop(lock);
Ok(result)
}
pub fn process_entries<F, T>(&self, mut func: F) -> Result<Vec<T>, ShellError>
where
F: FnMut((&Uuid, &PolarsPluginObject)) -> Result<T, ShellError>,
{
let lock = self.lock()?;
let mut vals: Vec<T> = Vec::new();
for entry in lock.iter() {
eprintln!("entry: {:?}", entry);
let val = func(entry)?;
vals.push(val);
}
drop(lock);
Ok(vals)
}
}
pub trait Cacheable: Sized + Clone {
fn cache_id(&self) -> &Uuid;
fn to_cache_value(&self) -> Result<PolarsPluginObject, ShellError>;
fn from_cache_value(cv: PolarsPluginObject) -> Result<Self, ShellError>;
fn cache(self, plugin: &PolarsPlugin, engine: &EngineInterface) -> Result<Self, ShellError> {
plugin.cache.insert(
Some(engine),
self.cache_id().to_owned(),
self.to_cache_value()?,
)?;
Ok(self)
}
fn get_cached(plugin: &PolarsPlugin, id: &Uuid) -> Result<Option<Self>, ShellError> {
if let Some(cache_value) = plugin.cache.get(id)? {
Ok(Some(Self::from_cache_value(cache_value)?))
} else {
Ok(None)
}
}
}

View File

@ -0,0 +1,12 @@
# Dataframe
This dataframe directory holds all of the definitions of the dataframe data structures and commands.
There are three sections of commands:
* [eager](./eager)
* [series](./series)
* [values](./values)
For more details see the
[Nushell book section on dataframes](https://www.nushell.sh/book/dataframes.html)

View File

@ -0,0 +1,144 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
values::{to_pipeline_data, Axis, Column, CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
#[derive(Clone)]
pub struct AppendDF;
impl PluginCommand for AppendDF {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "other dataframe to append")
.switch("col", "append as new columns instead of rows", None)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
fn name(&self) -> &str {
"polars append"
}
fn usage(&self) -> &str {
"Appends a new dataframe."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Appends a dataframe as new columns",
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df);
$a | polars append $a"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"a_x".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b_x".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Appends a dataframe merging at the end of columns",
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df); $a | polars append $a --col"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(3),
Value::test_int(1),
Value::test_int(3),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(2),
Value::test_int(4),
Value::test_int(2),
Value::test_int(4),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let other: Value = call.req(0)?;
let axis = if call.has_flag("col")? {
Axis::Column
} else {
Axis::Row
};
let df_other = NuDataFrame::try_from_value_coerce(plugin, &other, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = df.append_df(&df_other, axis, call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AppendDF)
}
}

View File

@ -0,0 +1,202 @@
use crate::{
dataframe::values::{str_to_dtype, to_pipeline_data, NuExpression, NuLazyFrame},
values::{cant_convert_err, PolarsPluginObject, PolarsPluginType},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use polars::prelude::*;
#[derive(Clone)]
pub struct CastDF;
impl PluginCommand for CastDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cast"
}
fn usage(&self) -> &str {
"Cast a column to a different dtype."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.required(
"dtype",
SyntaxShape::String,
"The dtype to cast the column to",
)
.optional(
"column",
SyntaxShape::String,
"The column to cast. Required when used with a dataframe.",
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Cast a column in a dataframe to a different dtype",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a lazy dataframe to a different dtype",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a expression to a different dtype",
example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#,
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let (dtype, column_nm) = df_args(call)?;
command_lazy(plugin, engine, call, column_nm, dtype, lazy)
}
PolarsPluginObject::NuDataFrame(df) => {
let (dtype, column_nm) = df_args(call)?;
command_eager(plugin, engine, call, column_nm, dtype, df)
}
PolarsPluginObject::NuExpression(expr) => {
let dtype: String = call.req(0)?;
let dtype = str_to_dtype(&dtype, call.head)?;
let expr: NuExpression = expr.to_polars().cast(dtype).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> {
let dtype = dtype_arg(call)?;
let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter {
param_name: "column_name".into(),
span: call.head,
})?;
Ok((dtype, column_nm))
}
fn dtype_arg(call: &EvaluatedCall) -> Result<DataType, ShellError> {
let dtype: String = call.req(0)?;
str_to_dtype(&dtype, call.head)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let column = col(&column_nm).cast(dtype);
let lazy = lazy.to_polars().with_columns(&[column]);
let lazy = NuLazyFrame::new(false, lazy);
to_pipeline_data(plugin, engine, call.head, lazy)
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
nu_df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut df = (*nu_df.df).clone();
let column = df
.column(&column_nm)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let _ = df
.with_column(casted)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&CastDF)
}
}

View File

@ -0,0 +1,79 @@
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct ColumnsDF;
impl PluginCommand for ColumnsDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars columns"
}
fn usage(&self) -> &str {
"Show dataframe columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe columns",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars columns",
result: Some(Value::list(
vec![Value::test_string("a"), Value::test_string("b")],
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| Value::string(*v, call.head))
.collect();
let names = Value::list(names, call.head);
Ok(PipelineData::Value(names, None))
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ColumnsDF)
}
}

View File

@ -0,0 +1,127 @@
use crate::values::to_pipeline_data;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::values::CustomValueSupport;
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropDF;
impl PluginCommand for DropDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop"
}
fn usage(&self) -> &str {
"Creates a new dataframe by dropping the selected columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to be dropped")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let new_df = col_string
.first()
.ok_or_else(|| ShellError::GenericError {
error: "Empty names list".into(),
msg: "No column names were found".into(),
span: Some(col_span),
help: None,
inner: vec![],
})
.and_then(|col| {
df.as_ref()
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
new_df
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
let final_df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, final_df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropDF)
}
}

View File

@ -0,0 +1,133 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::UniqueKeepStrategy;
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropDuplicates;
impl PluginCommand for DropDuplicates {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop-duplicates"
}
fn usage(&self) -> &str {
"Drops duplicate values in dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table(vec![]),
"subset of columns to drop duplicates",
)
.switch("maintain", "maintain order", Some('m'))
.switch(
"last",
"keeps last duplicate value (by default keeps first)",
Some('l'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(3), Value::test_int(1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span)
}
None => (None, call.head),
};
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let keep_strategy = if call.has_flag("last")? {
UniqueKeepStrategy::Last
} else {
UniqueKeepStrategy::First
};
let polars_df = df
.as_ref()
.unique(subset_slice, keep_strategy, None)
.map_err(|e| ShellError::GenericError {
error: "Error dropping duplicates".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropDuplicates)
}
}

View File

@ -0,0 +1,149 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropNulls;
impl PluginCommand for DropNulls {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop-nulls"
}
fn usage(&self) -> &str {
"Drops null values in dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table(vec![]),
"subset of columns to drop nulls",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "drop null values in dataframe",
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df);
let res = ($df.b / $df.b);
let a = ($df | polars with-column $res --name res);
$a | polars drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
Column::new(
"res".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "drop null values in dataframe",
example: r#"let s = ([1 2 0 0 3 4] | polars into-df);
($s / $s) | polars drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"div_0_0".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(1),
Value::test_int(1),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span)
}
None => (None, call.head),
};
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let polars_df = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| ShellError::GenericError {
error: "Error dropping nulls".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropNulls)
}
}

View File

@ -0,0 +1,111 @@
use crate::PolarsPlugin;
use super::super::values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct DataTypes;
impl PluginCommand for DataTypes {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars dtypes"
}
fn usage(&self) -> &str {
"Show dataframe data types."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dtypes",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"column".to_string(),
vec![Value::test_string("a"), Value::test_string("b")],
),
Column::new(
"dtype".to_string(),
vec![Value::test_string("i64"), Value::test_string("i64")],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut dtypes: Vec<Value> = Vec::new();
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| {
let dtype = df
.as_ref()
.column(v)
.expect("using name from list of names from dataframe")
.dtype();
let dtype_str = dtype.to_string();
dtypes.push(Value::string(dtype_str, call.head));
Value::string(*v, call.head)
})
.collect();
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DataTypes)
}
}

View File

@ -0,0 +1,119 @@
use super::super::values::NuDataFrame;
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{prelude::*, series::Series};
#[derive(Clone)]
pub struct Dummies;
impl PluginCommand for Dummies {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars dummies"
}
fn usage(&self) -> &str {
"Creates a new dataframe with dummy variables."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("drop-first", "Drop first row", Some('d'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new dataframe with dummy variables from a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dummies",
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("a_1", &[1_u8, 0]),
Series::new("a_3", &[0_u8, 1]),
Series::new("b_2", &[1_u8, 0]),
Series::new("b_4", &[0_u8, 1]),
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create new dataframe with dummy variables from a series",
example: "[1 2 2 3 3] | polars into-df | polars dummies",
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("0_1", &[1_u8, 0, 0, 0, 0]),
Series::new("0_2", &[0_u8, 1, 1, 0, 0]),
Series::new("0_3", &[0_u8, 0, 0, 1, 1]),
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let drop_first: bool = call.has_flag("drop-first")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df =
df.as_ref()
.to_dummies(None, drop_first)
.map_err(|e| ShellError::GenericError {
error: "Error calculating dummies".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The only allowed column types for dummies are String or Int".into()),
inner: vec![],
})?;
let df: NuDataFrame = polars_df.into();
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Dummies)
}
}

View File

@ -0,0 +1,165 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::LazyFrame;
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct FilterWith;
impl PluginCommand for FilterWith {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using a mask or expression as reference."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"mask or expression",
SyntaxShape::Any,
"boolean mask used to filter data",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([true false] | polars into-df);
[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
)),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(0)?;
let mask_span = mask_value.span();
if NuExpression::can_downcast(&mask_value) {
let expression = NuExpression::try_from_value(plugin, &mask_value)?;
let lazy = df.lazy();
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
to_pipeline_data(plugin, engine, call.head, lazy)
} else {
let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)?
.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| ShellError::GenericError {
error: "Error casting to bool".into(),
msg: e.to_string(),
span: Some(mask_span),
help: Some("Perhaps you want to use a series with booleans as mask".into()),
inner: vec![],
})?;
let polars_df = df
.as_ref()
.filter(mask)
.map_err(|e| ShellError::GenericError {
error: "Error filtering dataframe".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The only allowed column types for dummies are String or Int".into()),
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let expr: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &expr)?;
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FilterWith)
}
}

View File

@ -0,0 +1,137 @@
use crate::{
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct FirstDF;
impl PluginCommand for FirstDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars first"
}
fn usage(&self) -> &str {
"Show only the first number of rows or create a first expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"rows",
SyntaxShape::Int,
"starting from the front, the number of rows to return",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Return the first row of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Return the first two rows of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a first expression from a column",
example: "polars col a | polars first",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().first().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(1);
let res = df.as_ref().head(Some(rows));
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FirstDF)
}
}

View File

@ -0,0 +1,103 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::values::utils::convert_columns_string,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct GetDF;
impl PluginCommand for GetDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get"
}
fn usage(&self) -> &str {
"Creates dataframe with the selected columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns the selected column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = df
.as_ref()
.select(col_string)
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetDF)
}
}

View File

@ -0,0 +1,112 @@
use crate::{
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LastDF;
impl PluginCommand for LastDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars last"
}
fn usage(&self) -> &str {
"Creates new dataframe with tail rows or creates a last expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a last expression from a column",
example: "polars col a | polars last",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().last().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS);
let res = df.as_ref().tail(Some(rows));
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LastDF)
}
}

View File

@ -0,0 +1,96 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, IntoPipelineData, LabeledError, PipelineData, Signature, Value,
};
use crate::{values::PolarsPluginObject, PolarsPlugin};
#[derive(Clone)]
pub struct ListDF;
impl PluginCommand for ListDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars ls"
}
fn usage(&self) -> &str {
"Lists stored dataframes."
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a new dataframe and shows it in the dataframe list",
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df);
polars ls"#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals = plugin.cache.process_entries(|(key, value)| match value {
PolarsPluginObject::NuDataFrame(df) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::int(df.as_ref().width() as i64, call.head),
"rows" => Value::int(df.as_ref().height() as i64, call.head),
"type" => Value::string("NuDataFrame", call.head),
},
call.head,
))),
PolarsPluginObject::NuLazyFrame(lf) => {
let lf = lf.clone().collect(call.head)?;
Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::int(lf.as_ref().width() as i64, call.head),
"rows" => Value::int(lf.as_ref().height() as i64, call.head),
"type" => Value::string("NuLazyFrame", call.head),
},
call.head,
)))
}
PolarsPluginObject::NuExpression(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuExpression", call.head),
},
call.head,
))),
PolarsPluginObject::NuLazyGroupBy(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuLazyGroupBy", call.head),
},
call.head,
))),
PolarsPluginObject::NuWhen(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuWhen", call.head),
},
call.head,
))),
})?;
let vals = vals.into_iter().flatten().collect();
let list = Value::list(vals, call.head);
Ok(list.into_pipeline_data())
}
}

View File

@ -0,0 +1,255 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use crate::{
dataframe::values::utils::convert_columns_string,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct MeltDF;
impl PluginCommand for MeltDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars melt"
}
fn usage(&self) -> &str {
"Unpivot a DataFrame from wide to long format."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required_named(
"columns",
SyntaxShape::Table(vec![]),
"column names for melting",
Some('c'),
)
.required_named(
"values",
SyntaxShape::Table(vec![]),
"column names used as value columns",
Some('v'),
)
.named(
"variable-name",
SyntaxShape::String,
"optional name for variable column",
Some('r'),
)
.named(
"value-name",
SyntaxShape::String,
"optional name for value column",
Some('l'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "melt dataframe",
example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"b".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
],
),
Column::new(
"variable".to_string(),
vec![
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("d"),
Value::test_string("d"),
Value::test_string("d"),
],
),
Column::new(
"value".to_string(),
vec![
Value::test_string("x"),
Value::test_string("y"),
Value::test_string("z"),
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
let mut res = df
.as_ref()
.melt(&id_col_string, &val_col_string)
.map_err(|e| ShellError::GenericError {
error: "Error calculating melt".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
if let Some(name) = &variable_name {
res.rename("variable", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
if let Some(name) = &value_name {
res.rename("value", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
fn check_column_datatypes<T: AsRef<str>>(
df: &polars::prelude::DataFrame,
cols: &[T],
col_span: Span,
) -> Result<(), ShellError> {
if cols.is_empty() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "empty column list".into(),
span: Some(col_span),
help: None,
inner: vec![],
});
}
// Checking if they are same type
if cols.len() > 1 {
for w in cols.windows(2) {
let l_series = df
.column(w[0].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let r_series = df
.column(w[1].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "found different column types in list".into(),
span: Some(col_span),
help: Some(format!(
"datatypes {} and {} are incompatible",
l_series.dtype(),
r_series.dtype()
)),
inner: vec![],
});
}
}
}
Ok(())
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&MeltDF)
}
}

View File

@ -0,0 +1,105 @@
mod append;
mod cast;
mod columns;
mod drop;
mod drop_duplicates;
mod drop_nulls;
mod dtypes;
mod dummies;
mod filter_with;
mod first;
mod get;
mod last;
mod list;
mod melt;
mod open;
mod query_df;
mod rename;
mod sample;
mod schema;
mod shape;
mod slice;
mod sql_context;
mod sql_expr;
mod summary;
mod take;
mod to_arrow;
mod to_avro;
mod to_csv;
mod to_df;
mod to_json_lines;
mod to_nu;
mod to_parquet;
mod with_column;
use crate::PolarsPlugin;
pub use self::open::OpenDataFrame;
pub use append::AppendDF;
pub use cast::CastDF;
pub use columns::ColumnsDF;
pub use drop::DropDF;
pub use drop_duplicates::DropDuplicates;
pub use drop_nulls::DropNulls;
pub use dtypes::DataTypes;
pub use dummies::Dummies;
pub use filter_with::FilterWith;
pub use first::FirstDF;
pub use get::GetDF;
pub use last::LastDF;
pub use list::ListDF;
pub use melt::MeltDF;
use nu_plugin::PluginCommand;
pub use query_df::QueryDf;
pub use rename::RenameDF;
pub use sample::SampleDF;
pub use schema::SchemaCmd;
pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sql_context::SQLContext;
pub use summary::Summary;
pub use take::TakeDF;
pub use to_arrow::ToArrow;
pub use to_avro::ToAvro;
pub use to_csv::ToCSV;
pub use to_df::ToDataFrame;
pub use to_json_lines::ToJsonLines;
pub use to_nu::ToNu;
pub use to_parquet::ToParquet;
pub use with_column::WithColumn;
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(AppendDF),
Box::new(CastDF),
Box::new(ColumnsDF),
Box::new(DataTypes),
Box::new(DropDF),
Box::new(DropDuplicates),
Box::new(DropNulls),
Box::new(Dummies),
Box::new(FilterWith),
Box::new(GetDF),
Box::new(OpenDataFrame),
Box::new(MeltDF),
Box::new(Summary),
Box::new(FirstDF),
Box::new(LastDF),
Box::new(ListDF),
Box::new(RenameDF),
Box::new(SampleDF),
Box::new(ShapeDF),
Box::new(SliceDF),
Box::new(SchemaCmd),
Box::new(TakeDF),
Box::new(ToNu),
Box::new(ToArrow),
Box::new(ToAvro),
Box::new(ToDataFrame),
Box::new(ToCSV),
Box::new(ToJsonLines),
Box::new(ToParquet),
Box::new(QueryDf),
Box::new(WithColumn),
]
}

View File

@ -0,0 +1,531 @@
use crate::{
dataframe::values::NuSchema,
values::{cache_and_to_value, NuLazyFrame},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::PluginCommand;
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use std::{fs::File, io::BufReader, path::PathBuf};
use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
};
use polars_io::{avro::AvroReader, prelude::ParallelStrategy};
#[derive(Clone)]
pub struct OpenDataFrame;
impl PluginCommand for OpenDataFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars open"
}
fn usage(&self) -> &str {
"Opens CSV, JSON, JSON lines, arrow, avro, or parquet file to create dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
)
.switch("lazy", "creates a lazy dataframe", Some('l'))
.named(
"type",
SyntaxShape::String,
"File type: csv, tsv, json, parquet, arrow, avro. If omitted, derive from file extension",
Some('t'),
)
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character. CSV file",
Some('d'),
)
.switch(
"no-header",
"Indicates if file doesn't have header. CSV file",
None,
)
.named(
"infer-schema",
SyntaxShape::Number,
"Number of rows to infer the schema of the file. CSV file",
None,
)
.named(
"skip-rows",
SyntaxShape::Number,
"Number of rows to skip from file. CSV file",
None,
)
.named(
"columns",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Columns to be selected from csv file. CSV and Parquet file",
None,
)
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s')
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a file name and creates a dataframe",
example: "polars open test.csv",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
_input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, LabeledError> {
command(plugin, engine, call).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<PipelineData, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let type_option: Option<Spanned<String>> = call.get_flag("type")?;
let type_id = match &type_option {
Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)),
None => file.item.extension().map(|e| {
(
e.to_string_lossy().into_owned(),
"Invalid extension",
file.span,
)
}),
};
match type_id {
Some((e, msg, blamed)) => match e.as_str() {
"csv" | "tsv" => from_csv(plugin, engine, call),
"parquet" | "parq" => from_parquet(plugin, engine, call),
"ipc" | "arrow" => from_ipc(plugin, engine, call),
"json" => from_json(plugin, engine, call),
"jsonl" => from_jsonl(plugin, engine, call),
"avro" => from_avro(plugin, engine, call),
_ => Err(ShellError::FileNotFoundCustom {
msg: format!(
"{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json, jsonl, avro"
),
span: blamed,
}),
},
None => Err(ShellError::FileNotFoundCustom {
msg: "File without extension".into(),
span: file.span,
}),
}
.map(|value| PipelineData::Value(value, None))
}
fn from_parquet(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let args = ScanArgsParquet {
n_rows: None,
cache: true,
parallel: ParallelStrategy::Auto,
rechunk: false,
row_index: None,
low_memory: false,
cloud_options: None,
use_statistics: false,
hive_partitioning: false,
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = ParquetReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}
fn from_avro(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = AvroReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Avro reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_ipc(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let args = ScanArgsIpc {
n_rows: None,
cache: true,
rechunk: false,
row_index: None,
memmap: true,
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = IpcReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}
fn from_json(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_jsonl(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let file: Spanned<PathBuf> = call.req(0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader)
.with_json_format(JsonFormat::JsonLines)
.infer_schema_len(infer_schema);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json lines reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_csv(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?;
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let csv_reader = LazyCsvReader::new(file);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let df: NuLazyFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
let df: NuDataFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}

View File

@ -0,0 +1,108 @@
use super::super::values::NuDataFrame;
use crate::dataframe::values::Column;
use crate::dataframe::{eager::SQLContext, values::NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
// attribution:
// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you.
// maybe we should just use the crate at some point but it's not published yet.
// https://github.com/pola-rs/polars/tree/master/polars-sql
#[derive(Clone)]
pub struct QueryDf;
impl PluginCommand for QueryDf {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars query"
}
fn usage(&self) -> &str {
"Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("sql", SyntaxShape::String, "sql query")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn search_terms(&self) -> Vec<&str> {
vec!["dataframe", "sql", "search"]
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Query dataframe using SQL",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars query 'select a from df'",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let sql_query: String = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut ctx = SQLContext::new();
ctx.register("df", &df.df);
let df_sql = ctx
.execute(&sql_query)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(!df.from_lazy, df_sql);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&QueryDf)
}
}

View File

@ -0,0 +1,203 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::{utils::extract_strings, values::NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct RenameDF;
impl PluginCommand for RenameDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars rename"
}
fn usage(&self) -> &str {
"Rename a dataframe column."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"columns",
SyntaxShape::Any,
"Column(s) to be renamed. A string or list of strings",
)
.required(
"new names",
SyntaxShape::Any,
"New names for the selected column(s). A string or list of strings",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Renames a series",
example: "[5 6 7 8] | polars into-df | polars rename '0' new_name",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"new_name".to_string(),
vec![
Value::test_int(5),
Value::test_int(6),
Value::test_int(7),
Value::test_int(8),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames two dataframe columns",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b_new".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? {
PolarsPluginObject::NuDataFrame(df) => {
command_eager(plugin, engine, call, df).map_err(LabeledError::from)
}
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
}
_ => Err(LabeledError::new(format!("Unsupported type: {value:?}"))
.with_label("Unsupported Type", call.head)),
}
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(1)?;
let new_names = extract_strings(new_names)?;
let mut polars_df = df.to_polars();
for (from, to) in columns.iter().zip(new_names.iter()) {
polars_df
.rename(from, to)
.map_err(|e| ShellError::GenericError {
error: "Error renaming".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
}
let df = NuDataFrame::new(false, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(1)?;
let new_names = extract_strings(new_names)?;
if columns.len() != new_names.len() {
let value: Value = call.req(1)?;
return Err(ShellError::IncompatibleParametersSingle {
msg: "New name list has different size to column list".into(),
span: value.span(),
});
}
let lazy = lazy.to_polars();
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&RenameDF)
}
}

View File

@ -0,0 +1,138 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type,
};
use polars::prelude::NamedFrom;
use polars::series::Series;
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SampleDF;
impl PluginCommand for SampleDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars sample"
}
fn usage(&self) -> &str {
"Create sample dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"n-rows",
SyntaxShape::Int,
"number of rows to be taken from dataframe",
Some('n'),
)
.named(
"fraction",
SyntaxShape::Number,
"fraction of dataframe to be taken",
Some('f'),
)
.named(
"seed",
SyntaxShape::Number,
"seed for the selection",
Some('s'),
)
.switch("replace", "sample with replace", Some('e'))
.switch("shuffle", "shuffle sample", Some('u'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars sample --n-rows 1",
result: None, // No expected value because sampling is random
},
Example {
description: "Shows sample row using fraction and replace",
example:
"[[a b]; [1 2] [3 4] [5 6]] | polars into-df | polars sample --fraction 0.5 --replace",
result: None, // No expected value because sampling is random
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<Spanned<i64>> = call.get_flag("n-rows")?;
let fraction: Option<Spanned<f64>> = call.get_flag("fraction")?;
let seed: Option<u64> = call.get_flag::<i64>("seed")?.map(|val| val as u64);
let replace: bool = call.has_flag("replace")?;
let shuffle: bool = call.has_flag("shuffle")?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = match (rows, fraction) {
(Some(rows), None) => df
.as_ref()
.sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(rows.span),
help: None,
inner: vec![],
}),
(None, Some(frac)) => df
.as_ref()
.sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(frac.span),
help: None,
inner: vec![],
}),
(Some(_), Some(_)) => Err(ShellError::GenericError {
error: "Incompatible flags".into(),
msg: "Only one selection criterion allowed".into(),
span: Some(call.head),
help: None,
inner: vec![],
}),
(None, None) => Err(ShellError::GenericError {
error: "No selection".into(),
msg: "No selection criterion was found".into(),
span: Some(call.head),
help: Some("Perhaps you want to use the flag -n or -f".into()),
inner: vec![],
}),
};
let df = NuDataFrame::new(false, df?);
to_pipeline_data(plugin, engine, call.head, df)
}

View File

@ -0,0 +1,133 @@
use crate::{values::PolarsPluginObject, PolarsPlugin};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct SchemaCmd;
impl PluginCommand for SchemaCmd {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars schema"
}
fn usage(&self) -> &str {
"Show schema for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe schema",
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | polars into-df | polars schema"#,
result: Some(Value::record(
record! {
"a" => Value::string("i64", Span::test_data()),
"b" => Value::string("str", Span::test_data()),
},
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
if call.has_flag("datatype-list")? {
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
} else {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
PolarsPluginObject::NuDataFrame(df) => {
let schema = df.schema();
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuLazyFrame(lazy) => {
let schema = lazy.schema()?;
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
_ => Err(ShellError::GenericError {
error: "Must be a dataframe or lazy dataframe".into(),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
}),
}
}
fn datatype_list(span: Span) -> Value {
let types: Vec<Value> = [
("null", ""),
("bool", ""),
("u8", ""),
("u16", ""),
("u32", ""),
("u64", ""),
("i8", ""),
("i16", ""),
("i32", ""),
("i64", ""),
("f32", ""),
("f64", ""),
("str", ""),
("binary", ""),
("date", ""),
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
("time", ""),
("object", ""),
("unknown", ""),
("list<dtype>", ""),
]
.iter()
.map(|(dtype, note)| {
Value::record(record! {
"dtype" => Value::string(*dtype, span),
"note" => Value::string(*note, span),
},
span)
})
.collect();
Value::list(types, span)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SchemaCmd)
}
}

View File

@ -0,0 +1,94 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ShapeDF;
impl PluginCommand for ShapeDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars shape"
}
fn usage(&self) -> &str {
"Shows column and row size for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars shape",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("rows".to_string(), vec![Value::test_int(2)]),
Column::new("columns".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let rows = Value::int(df.as_ref().height() as i64, call.head);
let cols = Value::int(df.as_ref().width() as i64, call.head);
let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]);
let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ShapeDF)
}
}

View File

@ -0,0 +1,95 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SliceDF;
impl PluginCommand for SliceDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars slice"
}
fn usage(&self) -> &str {
"Creates new dataframe from a slice of rows."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("offset", SyntaxShape::Int, "start of slice")
.required("size", SyntaxShape::Int, "size of slice")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let offset: i64 = call.req(0)?;
let size: usize = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let res = df.as_ref().slice(offset, size);
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SliceDF)
}
}

View File

@ -0,0 +1,228 @@
use crate::dataframe::eager::sql_expr::parse_sql_expr;
use polars::error::{ErrString, PolarsError};
use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame};
use sqlparser::ast::{
Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor,
Value as SQLValue,
};
use sqlparser::dialect::GenericDialect;
use sqlparser::parser::Parser;
use std::collections::HashMap;
#[derive(Default)]
pub struct SQLContext {
table_map: HashMap<String, LazyFrame>,
dialect: GenericDialect,
}
impl SQLContext {
pub fn new() -> Self {
Self {
table_map: HashMap::new(),
dialect: GenericDialect,
}
}
pub fn register(&mut self, name: &str, df: &DataFrame) {
self.table_map.insert(name.to_owned(), df.clone().lazy());
}
fn execute_select(&self, select_stmt: &Select) -> Result<LazyFrame, PolarsError> {
// Determine involved dataframe
// Implicit join require some more work in query parsers, Explicit join are preferred for now.
let tbl = select_stmt.from.first().ok_or_else(|| {
PolarsError::ComputeError(ErrString::from("No table found in select statement"))
})?;
let mut alias_map = HashMap::new();
let tbl_name = match &tbl.relation {
TableFactor::Table { name, alias, .. } => {
let tbl_name = name
.0
.first()
.ok_or_else(|| {
PolarsError::ComputeError(ErrString::from(
"No table found in select statement",
))
})?
.value
.to_string();
if self.table_map.contains_key(&tbl_name) {
if let Some(alias) = alias {
alias_map.insert(alias.name.value.clone(), tbl_name.to_owned());
};
tbl_name
} else {
return Err(PolarsError::ComputeError(
format!("Table name {tbl_name} was not found").into(),
));
}
}
// Support bare table, optional with alias for now
_ => return Err(PolarsError::ComputeError("Not implemented".into())),
};
let df = &self.table_map[&tbl_name];
let mut raw_projection_before_alias: HashMap<String, usize> = HashMap::new();
let mut contain_wildcard = false;
// Filter Expression
let df = match select_stmt.selection.as_ref() {
Some(expr) => {
let filter_expression = parse_sql_expr(expr)?;
df.clone().filter(filter_expression)
}
None => df.clone(),
};
// Column Projections
let projection = select_stmt
.projection
.iter()
.enumerate()
.map(|(i, select_item)| {
Ok(match select_item {
SelectItem::UnnamedExpr(expr) => {
let expr = parse_sql_expr(expr)?;
raw_projection_before_alias.insert(format!("{expr:?}"), i);
expr
}
SelectItem::ExprWithAlias { expr, alias } => {
let expr = parse_sql_expr(expr)?;
raw_projection_before_alias.insert(format!("{expr:?}"), i);
expr.alias(&alias.value)
}
SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => {
contain_wildcard = true;
col("*")
}
})
})
.collect::<Result<Vec<_>, PolarsError>>()?;
// Check for group by
// After projection since there might be number.
let group_by = match &select_stmt.group_by {
GroupByExpr::All =>
Err(
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into())
)?,
GroupByExpr::Expressions(expressions) => expressions
}
.iter()
.map(
|e|match e {
SqlExpr::Value(SQLValue::Number(idx, _)) => {
let idx = match idx.parse::<usize>() {
Ok(0)| Err(_) => Err(
PolarsError::ComputeError(
format!("Group-By Error: Only positive number or expression are supported, got {idx}").into()
)),
Ok(idx) => Ok(idx)
}?;
Ok(projection[idx].clone())
}
SqlExpr::Value(_) => Err(
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into())
),
_ => parse_sql_expr(e)
}
)
.collect::<Result<Vec<_>, PolarsError>>()?;
let df = if group_by.is_empty() {
df.select(projection)
} else {
// check groupby and projection due to difference between SQL and polars
// Return error on wild card, shouldn't process this
if contain_wildcard {
return Err(PolarsError::ComputeError(
"Group-By Error: Can't process wildcard in group-by".into(),
));
}
// Default polars group by will have group by columns at the front
// need some container to contain position of group by columns and its position
// at the final agg projection, check the schema for the existence of group by column
// and its projections columns, keeping the original index
let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by
.iter()
.map(|expr| raw_projection_before_alias.get(&format!("{expr:?}")))
.enumerate()
.filter(|(_, proj_p)| proj_p.is_some())
.map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p)))
.unzip();
let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection
.iter()
.enumerate()
.filter(|(i, _)| !exclude_expr.contains(i))
.enumerate()
.map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len())))
.unzip();
let agg_df = df.group_by(group_by).agg(agg_projection);
let mut final_proj_pos = groupby_pos
.into_iter()
.chain(agg_proj_pos)
.collect::<Vec<_>>();
final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb));
let final_proj = final_proj_pos
.into_iter()
.map(|(_, shm_p)| {
col(agg_df
.clone()
// FIXME: had to do this mess to get get_index to work, not sure why. need help
.collect()
.unwrap_or_default()
.schema()
.get_at_index(shm_p)
.unwrap_or((&"".into(), &DataType::Null))
.0)
})
.collect::<Vec<_>>();
agg_df.select(final_proj)
};
Ok(df)
}
pub fn execute(&self, query: &str) -> Result<LazyFrame, PolarsError> {
let ast = Parser::parse_sql(&self.dialect, query)
.map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?;
if ast.len() != 1 {
Err(PolarsError::ComputeError(
"One and only one statement at a time please".into(),
))
} else {
let ast = ast
.first()
.ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?;
Ok(match ast {
Statement::Query(query) => {
let rs = match &*query.body {
SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?,
_ => {
return Err(PolarsError::ComputeError(
"INSERT, UPDATE is not supported for polars".into(),
))
}
};
match &query.limit {
Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => {
let nrow = nrow.parse().map_err(|err| {
PolarsError::ComputeError(
format!("Conversion Error: {err:?}").into(),
)
})?;
rs.limit(nrow)
}
None => rs,
_ => {
return Err(PolarsError::ComputeError(
"Only support number argument to LIMIT clause".into(),
))
}
}
}
_ => {
return Err(PolarsError::ComputeError(
format!("Statement type {ast:?} is not supported").into(),
))
}
})
}
}
}

View File

@ -0,0 +1,200 @@
use polars::error::PolarsError;
use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit};
use sqlparser::ast::{
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType,
};
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
Ok(match data_type {
SQLDataType::Char(_)
| SQLDataType::Varchar(_)
| SQLDataType::Uuid
| SQLDataType::Clob(_)
| SQLDataType::Text
| SQLDataType::String(_) => DataType::String,
SQLDataType::Float(_) => DataType::Float32,
SQLDataType::Real => DataType::Float32,
SQLDataType::Double => DataType::Float64,
SQLDataType::TinyInt(_) => DataType::Int8,
SQLDataType::UnsignedTinyInt(_) => DataType::UInt8,
SQLDataType::SmallInt(_) => DataType::Int16,
SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
SQLDataType::Int(_) => DataType::Int32,
SQLDataType::UnsignedInt(_) => DataType::UInt32,
SQLDataType::BigInt(_) => DataType::Int64,
SQLDataType::UnsignedBigInt(_) => DataType::UInt64,
SQLDataType::Boolean => DataType::Boolean,
SQLDataType::Date => DataType::Date,
SQLDataType::Time(_, _) => DataType::Time,
SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None),
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
SQLDataType::Array(array_type_def) => match array_type_def {
ArrayElemTypeDef::AngleBracket(inner_type)
| ArrayElemTypeDef::SquareBracket(inner_type) => {
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
}
_ => {
return Err(PolarsError::ComputeError(
"SQL Datatype Array(None) was not supported in polars-sql yet!".into(),
))
}
},
_ => {
return Err(PolarsError::ComputeError(
format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn cast_(expr: Expr, data_type: &SQLDataType) -> Result<Expr> {
let polars_type = map_sql_polars_datatype(data_type)?;
Ok(expr.cast(polars_type))
}
fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result<Expr> {
Ok(match op {
SQLBinaryOperator::Plus => left + right,
SQLBinaryOperator::Minus => left - right,
SQLBinaryOperator::Multiply => left * right,
SQLBinaryOperator::Divide => left / right,
SQLBinaryOperator::Modulo => left % right,
SQLBinaryOperator::StringConcat => {
left.cast(DataType::String) + right.cast(DataType::String)
}
SQLBinaryOperator::Gt => left.gt(right),
SQLBinaryOperator::Lt => left.lt(right),
SQLBinaryOperator::GtEq => left.gt_eq(right),
SQLBinaryOperator::LtEq => left.lt_eq(right),
SQLBinaryOperator::Eq => left.eq(right),
SQLBinaryOperator::NotEq => left.eq(right).not(),
SQLBinaryOperator::And => left.and(right),
SQLBinaryOperator::Or => left.or(right),
SQLBinaryOperator::Xor => left.xor(right),
_ => {
return Err(PolarsError::ComputeError(
format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn literal_expr(value: &SqlValue) -> Result<Expr> {
Ok(match value {
SqlValue::Number(s, _) => {
// Check for existence of decimal separator dot
if s.contains('.') {
s.parse::<f64>().map(lit).map_err(|_| {
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
})
} else {
s.parse::<i64>().map(lit).map_err(|_| {
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
})
}?
}
SqlValue::SingleQuotedString(s) => lit(s.clone()),
SqlValue::NationalStringLiteral(s) => lit(s.clone()),
SqlValue::HexStringLiteral(s) => lit(s.clone()),
SqlValue::DoubleQuotedString(s) => lit(s.clone()),
SqlValue::Boolean(b) => lit(*b),
SqlValue::Null => Expr::Literal(LiteralValue::Null),
_ => {
return Err(PolarsError::ComputeError(
format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(),
))
}
})
}
pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
Ok(match expr {
SqlExpr::Identifier(e) => col(&e.value),
SqlExpr::BinaryOp { left, op, right } => {
let left = parse_sql_expr(left)?;
let right = parse_sql_expr(right)?;
binary_op_(left, right, op)?
}
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
SqlExpr::Cast {
expr,
data_type,
format: _,
} => cast_(parse_sql_expr(expr)?, data_type)?,
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
SqlExpr::Value(value) => literal_expr(value)?,
_ => {
return Err(PolarsError::ComputeError(
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result<Expr> {
Ok(match &window_type {
Some(wtype) => match wtype {
WindowType::WindowSpec(window_spec) => {
// Process for simple window specification, partition by first
let partition_by = window_spec
.partition_by
.iter()
.map(parse_sql_expr)
.collect::<Result<Vec<_>>>()?;
expr.over(partition_by)
// Order by and Row range may not be supported at the moment
}
// TODO: make NamedWindow work
WindowType::NamedWindow(_named) => {
return Err(PolarsError::ComputeError(
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
))
}
},
None => expr,
})
}
fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
use sqlparser::ast::{FunctionArg, FunctionArgExpr};
// Function name mostly do not have name space, so it mostly take the first args
let function_name = sql_function.name.0[0].value.to_ascii_lowercase();
let args = sql_function
.args
.iter()
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg,
})
.collect::<Vec<_>>();
Ok(
match (
function_name.as_str(),
args.as_slice(),
sql_function.distinct,
) {
("sum", [FunctionArgExpr::Expr(expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum()
}
("count", [FunctionArgExpr::Expr(expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count()
}
("count", [FunctionArgExpr::Expr(expr)], true) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique()
}
// Special case for wildcard args to count function.
("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(),
_ => {
return Err(PolarsError::ComputeError(
format!(
"Function {function_name:?} with args {args:?} was not supported in polars-sql yet!"
)
.into(),
))
}
},
)
}

View File

@ -0,0 +1,293 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::{
chunked_array::ChunkedArray,
prelude::{
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
QuantileInterpolOptions, Series, StringType,
},
};
#[derive(Clone)]
pub struct Summary;
impl PluginCommand for Summary {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars summary"
}
fn usage(&self) -> &str {
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.category(Category::Custom("dataframe".into()))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.named(
"quantiles",
SyntaxShape::Table(vec![]),
"provide optional quantiles",
Some('q'),
)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "list dataframe descriptives",
example: "[[a b]; [1 1] [1 1]] | polars into-df | polars summary",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"descriptor".to_string(),
vec![
Value::test_string("count"),
Value::test_string("sum"),
Value::test_string("mean"),
Value::test_string("median"),
Value::test_string("std"),
Value::test_string("min"),
Value::test_string("25%"),
Value::test_string("50%"),
Value::test_string("75%"),
Value::test_string("max"),
],
),
Column::new(
"a (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
Column::new(
"b (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let quantiles: Option<Vec<Value>> = call.get_flag("quantiles")?;
let quantiles = quantiles.map(|values| {
values
.iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if (&0.0..=&1.0).contains(&val) {
Ok(*val)
} else {
Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be between 0 and 1".into(),
span: Some(span),
help: None,
inner: vec![],
})
}
}
Value::Error { error, .. } => Err(*error.clone()),
_ => Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be a float".into(),
span: Some(span),
help: None,
inner: vec![],
}),
}
})
.collect::<Result<Vec<f64>, ShellError>>()
});
let quantiles = match quantiles {
Some(quantiles) => quantiles?,
None => vec![0.25, 0.50, 0.75],
};
let mut quantiles_labels = quantiles
.iter()
.map(|q| Some(format!("{}%", q * 100.0)))
.collect::<Vec<Option<String>>>();
let mut labels = vec![
Some("count".to_string()),
Some("sum".to_string()),
Some("mean".to_string()),
Some("median".to_string()),
Some("std".to_string()),
Some("min".to_string()),
];
labels.append(&mut quantiles_labels);
labels.push(Some("max".to_string()));
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let names = ChunkedArray::<StringType>::from_slice_options("descriptor", &labels).into_series();
let head = std::iter::once(names);
let tail = df
.as_ref()
.get_columns()
.iter()
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _)))
.map(|col| {
let count = col.len() as f64;
let sum = col.sum_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mean = match col.mean_as_series().get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
};
let median = match col.median_as_series() {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let std = match col.std_as_series(0) {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let min = col.min_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut quantiles = quantiles
.clone()
.into_iter()
.map(|q| {
col.quantile_as_series(q, QuantileInterpolOptions::default())
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
})
.collect::<Vec<Option<f64>>>();
let max = col.max_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
descriptors.append(&mut quantiles);
descriptors.push(max);
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
});
let res = head.chain(tail).collect::<Vec<Series>>();
let polars_df = DataFrame::new(res).map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Summary)
}
}

View File

@ -0,0 +1,162 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::DataType;
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct TakeDF;
impl PluginCommand for TakeDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars take"
}
fn usage(&self) -> &str {
"Creates new dataframe using the given indices."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"indices",
SyntaxShape::Any,
"list of indices used to take data",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes selected rows from dataframe",
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | polars into-df);
let indices = ([0 2] | polars into-df);
$df | polars take $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(4), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
let indices = ([0 2] | polars into-df);
$series | polars take $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(4), Value::test_int(5)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let index_value: Value = call.req(0)?;
let index_span = index_value.span();
let index = NuDataFrame::try_from_value(plugin, &index_value)?.as_series(index_span)?;
let casted = match index.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => index
.cast(&DataType::UInt32)
.map_err(|e| ShellError::GenericError {
error: "Error casting index list".into(),
msg: e.to_string(),
span: Some(index_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "Series with incorrect type".into(),
span: Some(call.head),
help: Some("Consider using a Series with type int type".into()),
inner: vec![],
}),
}?;
let indices = casted.u32().map_err(|e| ShellError::GenericError {
error: "Error casting index list".into(),
msg: e.to_string(),
span: Some(index_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df = df
.to_polars()
.take(indices)
.map_err(|e| ShellError::GenericError {
error: "Error taking values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&TakeDF)
}
}

View File

@ -0,0 +1,87 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{IpcWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToArrow;
impl PluginCommand for ToArrow {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-arrow"
}
fn usage(&self) -> &str {
"Saves dataframe to arrow file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to arrow file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-arrow test.arrow",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
IpcWriter::new(&mut file)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,117 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars_io::avro::{AvroCompression, AvroWriter};
use polars_io::SerWriter;
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToAvro;
impl PluginCommand for ToAvro {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-avro"
}
fn usage(&self) -> &str {
"Saves dataframe to avro file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"compression",
SyntaxShape::String,
"use compression, supports deflate or snappy",
Some('c'),
)
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to avro file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-avro test.avro",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn get_compression(call: &EvaluatedCall) -> Result<Option<AvroCompression>, ShellError> {
if let Some((compression, span)) = call
.get_flag_value("compression")
.map(|e| e.as_str().map(|s| (s.to_owned(), e.span())))
.transpose()?
{
match compression.as_ref() {
"snappy" => Ok(Some(AvroCompression::Snappy)),
"deflate" => Ok(Some(AvroCompression::Deflate)),
_ => Err(ShellError::IncorrectValue {
msg: "compression must be one of deflate or snappy".to_string(),
val_span: span,
call_span: span,
}),
}
} else {
Ok(None)
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let compression = get_compression(call)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
AvroWriter::new(file)
.with_compression(compression)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,133 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{CsvWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToCSV;
impl PluginCommand for ToCSV {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-csv"
}
fn usage(&self) -> &str {
"Saves dataframe to CSV file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character",
Some('d'),
)
.switch("no-header", "Indicates if file doesn't have header", None)
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Saves dataframe to CSV file",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv",
result: None,
},
Example {
description: "Saves dataframe to CSV file using other delimiter",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv --delimiter '|'",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let writer = CsvWriter::new(&mut file);
let writer = if no_header {
writer.include_header(false)
} else {
writer.include_header(true)
};
let mut writer = match delimiter {
None => writer,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one char".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
writer.with_separator(delimiter)
}
}
};
writer
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error writing to file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,201 @@
use crate::{
dataframe::values::NuSchema,
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::{
prelude::{AnyValue, DataType, Field, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct ToDataFrame;
impl PluginCommand for ToDataFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-df"
}
fn usage(&self) -> &str {
"Converts a list, table or record into a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
),
Column::new(
"1".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"2".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list and creates a dataframe",
example: "[a b c] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list of booleans and creates a dataframe",
example: "[true true false] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema",
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a", &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema that adds a new column",
example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: u8, b:str, c:i64} | polars fill-null 3"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a", [1u8, 2]),
Series::new("b", ["foo", "bar"]),
Series::new("c", [3i64, 3]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?;
to_pipeline_data(plugin, engine, call.head, df).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_df() -> Result<(), ShellError> {
test_polars_plugin_command(&ToDataFrame)
}
}

View File

@ -0,0 +1,89 @@
use std::{fs::File, io::BufWriter, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{JsonWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToJsonLines;
impl PluginCommand for ToJsonLines {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-jsonl"
}
fn usage(&self) -> &str {
"Saves dataframe to a JSON lines file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to JSON lines file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-jsonl test.jsonl",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let buf_writer = BufWriter::new(file);
JsonWriter::new(buf_writer)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,144 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use crate::{dataframe::values::NuExpression, values::CustomValueSupport, PolarsPlugin};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToNu;
impl PluginCommand for ToNu {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-nu"
}
fn usage(&self) -> &str {
"Converts a dataframe or an expression into into nushell value for access and exploration."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"rows",
SyntaxShape::Number,
"number of rows to be shown",
Some('n'),
)
.switch("tail", "shows tail rows", Some('t'))
.input_output_types(vec![
(Type::Custom("expression".into()), Type::Any),
(Type::Custom("dataframe".into()), Type::Table(vec![])),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
let rec_1 = Value::test_record(record! {
"index" => Value::test_int(0),
"a" => Value::test_int(1),
"b" => Value::test_int(2),
});
let rec_2 = Value::test_record(record! {
"index" => Value::test_int(1),
"a" => Value::test_int(3),
"b" => Value::test_int(4),
});
let rec_3 = Value::test_record(record! {
"index" => Value::test_int(2),
"a" => Value::test_int(3),
"b" => Value::test_int(4),
});
vec![
Example {
description: "Shows head rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars into-nu",
result: Some(Value::list(vec![rec_1, rec_2], Span::test_data())),
},
Example {
description: "Shows tail rows from dataframe",
example:
"[[a b]; [1 2] [5 6] [3 4]] | polars into-df | polars into-nu --tail --rows 1",
result: Some(Value::list(vec![rec_3], Span::test_data())),
},
Example {
description: "Convert a col expression into a nushell value",
example: "polars col a | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
})),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
dataframe_command(plugin, call, value)
} else {
expression_command(plugin, call, value)
}
.map_err(|e| e.into())
}
}
fn dataframe_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.get_flag("rows")?;
let tail: bool = call.has_flag("tail")?;
let df = NuDataFrame::try_from_value(plugin, &input)?;
let values = if tail {
df.tail(rows, call.head)?
} else {
// if rows is specified, return those rows, otherwise return everything
if rows.is_some() {
df.head(rows, call.head)?
} else {
df.head(Some(df.height()), call.head)?
}
};
let value = Value::list(values, call.head);
Ok(PipelineData::Value(value, None))
}
fn expression_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &input)?;
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToNu)
}
}

View File

@ -0,0 +1,87 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::ParquetWriter;
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToParquet;
impl PluginCommand for ToParquet {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-parquet"
}
fn usage(&self) -> &str {
"Saves dataframe to parquet file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to parquet file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-parquet test.parquet",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let mut polars_df = df.to_polars();
ParquetWriter::new(file)
.finish(&mut polars_df)
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,195 @@
use super::super::values::{Column, NuDataFrame};
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct WithColumn;
impl PluginCommand for WithColumn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars with-column"
}
fn usage(&self) -> &str {
"Adds a series to the dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named("name", SyntaxShape::String, "new column name", Some('n'))
.rest(
"series or expressions",
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-df
| polars with-column ([5 6] | polars into-df) --name c"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(5), Value::test_int(6)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-lazy
| polars with-column [
((polars col a) * 2 | polars as "c")
((polars col a) * 3 | polars as "d")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(ShellError::CantConvert {
to_type: "lazy or eager dataframe".into(),
from_type: value.get_type().to_string(),
span: value.span(),
help: None,
}),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let new_column: Value = call.req(0)?;
let column_span = new_column.span();
if NuExpression::can_downcast(&new_column) {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions));
let df = lazy.collect(call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
} else {
let mut other = NuDataFrame::try_from_value(plugin, &new_column)?.as_series(column_span)?;
let name = match call.get_flag::<String>("name")? {
Some(name) => name,
None => other.name().to_string(),
};
let series = other.rename(&name).clone();
let mut polars_df = df.to_polars();
polars_df
.with_column(series)
.map_err(|e| ShellError::GenericError {
error: "Error adding column to dataframe".into(),
msg: e.to_string(),
span: Some(column_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into();
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&WithColumn)
}
}

View File

@ -0,0 +1,88 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuExpression;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprAlias;
impl PluginCommand for ExprAlias {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as"
}
fn usage(&self) -> &str {
"Creates an alias expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"Alias name",
SyntaxShape::String,
"Alias name for the expression",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates and alias expression",
example: "polars col a | polars as new_a | polars into-nu",
result: {
let record = Value::test_record(record! {
"expr" => Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
}),
"alias" => Value::test_string("new_a"),
});
Some(record)
},
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["aka", "abbr", "otherwise"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let alias: String = call.req(0)?;
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
let expr: NuExpression = expr.to_polars().alias(alias.as_str()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprAlias)
}
}

View File

@ -0,0 +1,79 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::arg_where;
#[derive(Clone)]
pub struct ExprArgWhere;
impl PluginCommand for ExprArgWhere {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-where"
}
fn usage(&self) -> &str {
"Creates an expression that returns the arguments where expression is true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("column name", SyntaxShape::Any, "Expression to evaluate")
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Return a dataframe where the value match the expression",
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
$df | polars select (polars arg-where ((polars col b) >= 2) | polars as b_arg)",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "match", "if"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = arg_where(expr.to_polars()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprArgWhere)
}
}

View File

@ -0,0 +1,70 @@
use crate::{dataframe::values::NuExpression, values::to_pipeline_data, PolarsPlugin};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
use polars::prelude::col;
#[derive(Clone)]
pub struct ExprCol;
impl PluginCommand for ExprCol {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars col"
}
fn usage(&self) -> &str {
"Creates a named column expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"column name",
SyntaxShape::String,
"Name of column to be used",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a named column expression and converts it to a nu object",
example: "polars col a | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
})),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["create"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let name: String = call.req(0)?;
let expr: NuExpression = col(name.as_str()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprCol)
}
}

View File

@ -0,0 +1,109 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::concat_str;
#[derive(Clone)]
pub struct ExprConcatStr;
impl PluginCommand for ExprConcatStr {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars concat-str"
}
fn usage(&self) -> &str {
"Creates a concat string expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"separator",
SyntaxShape::String,
"Separator used during the concatenation",
)
.required(
"concat expressions",
SyntaxShape::List(Box::new(SyntaxShape::Any)),
"Expression(s) that define the string concatenation",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a concat string expression",
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | polars into-df);
$df | polars with-column ((polars concat-str "-" [(polars col a) (polars col b) ((polars col c) * 2)]) | polars as concat)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("three")],
),
Column::new(
"b".to_string(),
vec![Value::test_string("two"), Value::test_string("four")],
),
Column::new(
"c".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"concat".to_string(),
vec![
Value::test_string("one-two-2"),
Value::test_string("three-four-4"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["join", "connect", "update"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let separator: String = call.req(0)?;
let value: Value = call.req(1)?;
let expressions = NuExpression::extract_exprs(plugin, value)?;
let expr: NuExpression = concat_str(expressions, &separator, false).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprConcatStr)
}
}

View File

@ -0,0 +1,165 @@
use super::super::values::NuExpression;
use crate::{
dataframe::values::{Column, NuDataFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use chrono::{DateTime, FixedOffset};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::{
datatypes::{DataType, TimeUnit},
prelude::NamedFrom,
series::Series,
};
#[derive(Clone)]
pub struct ExprDatePart;
impl PluginCommand for ExprDatePart {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars datepart"
}
fn usage(&self) -> &str {
"Creates an expression for capturing the specified datepart in a column."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"Datepart name",
SyntaxShape::String,
"Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
let dt = DateTime::<FixedOffset>::parse_from_str(
"2021-12-30T01:02:03.123456789 +0000",
"%Y-%m-%dT%H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test");
vec![
Example {
description: "Creates an expression to capture the year date part",
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates an expression to capture multiple date parts",
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
(polars col datetime | polars datepart month | polars as datetime_month ),
(polars col datetime | polars datepart day | polars as datetime_day ),
(polars col datetime | polars datepart hour | polars as datetime_hour ),
(polars col datetime | polars datepart minute | polars as datetime_minute ),
(polars col datetime | polars datepart second | polars as datetime_second ),
(polars col datetime | polars datepart nanosecond | polars as datetime_ns ) ]"#,
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("datetime", &[dt.timestamp_nanos_opt()])
.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))
.expect("Error casting to datetime type"),
Series::new("datetime_year", &[2021_i64]), // i32 was coerced to i64
Series::new("datetime_month", &[12_i8]),
Series::new("datetime_day", &[30_i8]),
Series::new("datetime_hour", &[1_i8]),
Series::new("datetime_minute", &[2_i8]),
Series::new("datetime_second", &[3_i8]),
Series::new("datetime_ns", &[123456789_i64]), // i32 was coerced to i64
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec![
"year",
"month",
"week",
"weekday",
"quarter",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
"nanosecond",
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let part: Spanned<String> = call.req(0)?;
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
let expr_dt = expr.to_polars().dt();
let expr: NuExpression = match part.item.as_str() {
"year" => expr_dt.year(),
"quarter" => expr_dt.quarter(),
"month" => expr_dt.month(),
"week" => expr_dt.week(),
"day" => expr_dt.day(),
"hour" => expr_dt.hour(),
"minute" => expr_dt.minute(),
"second" => expr_dt.second(),
"millisecond" => expr_dt.millisecond(),
"microsecond" => expr_dt.microsecond(),
"nanosecond" => expr_dt.nanosecond(),
_ => {
return Err(LabeledError::from(ShellError::UnsupportedInput {
msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item),
input: "value originates from here".to_string(),
msg_span: call.head,
input_span: part.span,
}))
}
}.into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ExprDatePart)
}
}

View File

@ -0,0 +1,645 @@
/// Definition of multiple Expression commands using a macro rule
/// All of these expressions have an identical body and only require
/// to have a change in the name, description and expression function
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)
.map_err(LabeledError::from)?;
let expr: NuExpression = expr.to_polars().$func().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
.plugin_examples($examples)
}
fn run(
&self,
_plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr = NuExpression::try_from_pipeline(input, call.head)
.map_err(LabeledError::from)?;
let expr: NuExpression = expr.into_polars().$func($ddof).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! lazy_expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
} else {
let expr =
NuExpression::try_from_value(plugin, &value).map_err(LabeledError::from)?;
let expr: NuExpression = expr.to_polars().$func().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func($ddof)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().$func($ddof).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// ExprList command
// Expands to a command definition for a list expression
expr_command!(
ExprList,
"polars implode",
"Aggregates a group to a Series.",
vec![Example {
description: "",
example: "",
result: None,
}],
implode,
test_implode
);
// ExprAggGroups command
// Expands to a command definition for a agg groups expression
expr_command!(
ExprAggGroups,
"polars agg-groups",
"Creates an agg_groups expression.",
vec![Example {
description: "",
example: "",
result: None,
}],
agg_groups,
test_groups
);
// ExprCount command
// Expands to a command definition for a count expression
expr_command!(
ExprCount,
"polars count",
"Creates a count expression.",
vec![Example {
description: "",
example: "",
result: None,
}],
count,
test_count
);
// ExprNot command
// Expands to a command definition for a not expression
expr_command!(
ExprNot,
"polars expr-not",
"Creates a not expression.",
vec![Example {
description: "Creates a not expression",
example: "(polars col a) > 2) | polars expr-not",
result: None,
},],
not,
test_not
);
// ExprMax command
// Expands to a command definition for max aggregation
lazy_expr_command!(
ExprMax,
"polars max",
"Creates a max expression or aggregates columns to their max value.",
vec![
Example {
description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars max",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Max aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars max)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
max,
test_max
);
// ExprMin command
// Expands to a command definition for min aggregation
lazy_expr_command!(
ExprMin,
"polars min",
"Creates a min expression or aggregates columns to their min value.",
vec![
Example {
description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars min",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Min aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars min)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
min,
test_min
);
// ExprSum command
// Expands to a command definition for sum aggregation
lazy_expr_command!(
ExprSum,
"polars sum",
"Creates a sum expression for an aggregation or aggregates columns to their sum value.",
vec![
Example {
description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sum",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sum aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars sum)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(6), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
sum,
test_sum
);
// ExprMean command
// Expands to a command definition for mean aggregation
lazy_expr_command!(
ExprMean,
"polars mean",
"Creates a mean expression for an aggregation or aggregates columns to their mean value.",
vec![
Example {
description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars mean",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Mean aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars mean)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
mean,
test_mean
);
// ExprStd command
// Expands to a command definition for std aggregation
lazy_expr_command!(
ExprStd,
"polars std",
"Creates a std expression for an aggregation of std value from columns in a dataframe.",
vec![
Example {
description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars std",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Std aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars std)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
std,
test_std,
1
);
// ExprVar command
// Expands to a command definition for var aggregation
lazy_expr_command!(
ExprVar,
"polars var",
"Create a var expression for an aggregation.",
vec![
Example {
description:
"Var value from columns in a dataframe or aggregates columns to their var value",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars var",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Var aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars var)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
var,
test_var,
1
);

View File

@ -0,0 +1,200 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{is_in, lit, DataType, IntoSeries};
#[derive(Clone)]
pub struct ExprIsIn;
impl PluginCommand for ExprIsIn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-in"
}
fn usage(&self) -> &str {
"Creates an is-in expression or checks to see if the elements are contained in the right series"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("list", SyntaxShape::Any, "List to check if values are in")
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Creates a is-in expression",
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
$df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_string("one"),
Value::test_string("two"),
Value::test_string("three"),
],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
),
Column::new(
"a_in".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks if elements from a series are contained in right series",
example: r#"let other = ([1 3 6] | polars into-df);
[5 6 6 6 8 8 8] | polars into-df | polars is-in $other"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["check", "contained", "is-contain", "match"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command_df(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let list: Vec<Value> = call.req(0)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) {
return Err(ShellError::IncompatibleParametersSingle {
msg: "Cannot use a mixed list as argument".into(),
span: call.head,
});
}
let expr: NuExpression = expr.to_polars().is_in(lit(list)).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
fn command_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let other_value: Value = call.req(0)?;
let other_span = other_value.span();
let other_df = NuDataFrame::try_from_value(plugin, &other_value)?;
let other = other_df.as_series(other_span)?;
let series = df.as_series(call.head)?;
let mut res = is_in(&series, &other)
.map_err(|e| ShellError::GenericError {
error: "Error finding in other".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_in");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ExprIsIn)
}
}

View File

@ -0,0 +1,73 @@
use crate::{
dataframe::values::NuExpression,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprLit;
impl PluginCommand for ExprLit {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars lit"
}
fn usage(&self) -> &str {
"Creates a literal expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"literal",
SyntaxShape::Any,
"literal to construct the expression",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Created a literal expression and converts it to a nu object",
example: "polars lit 2 | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("literal"),
"value" => Value::test_string("2"),
})),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["string", "literal", "expression"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let literal: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &literal)?;
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprLit)
}
}

View File

@ -0,0 +1,48 @@
mod alias;
mod arg_where;
mod col;
mod concat_str;
mod datepart;
mod expressions_macro;
mod is_in;
mod lit;
mod otherwise;
mod when;
use nu_plugin::PluginCommand;
pub use crate::dataframe::expressions::alias::ExprAlias;
pub use crate::dataframe::expressions::arg_where::ExprArgWhere;
pub use crate::dataframe::expressions::col::ExprCol;
pub use crate::dataframe::expressions::concat_str::ExprConcatStr;
pub use crate::dataframe::expressions::datepart::ExprDatePart;
pub use crate::dataframe::expressions::expressions_macro::*;
pub use crate::dataframe::expressions::is_in::ExprIsIn;
pub use crate::dataframe::expressions::lit::ExprLit;
pub use crate::dataframe::expressions::otherwise::ExprOtherwise;
pub use crate::dataframe::expressions::when::ExprWhen;
use crate::PolarsPlugin;
pub(crate) fn expr_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(ExprAlias),
Box::new(ExprArgWhere),
Box::new(ExprAggGroups),
Box::new(ExprCol),
Box::new(ExprConcatStr),
Box::new(ExprCount),
Box::new(ExprDatePart),
Box::new(ExprIsIn),
Box::new(ExprList),
Box::new(ExprLit),
Box::new(ExprNot),
Box::new(ExprMax),
Box::new(ExprMin),
Box::new(ExprOtherwise),
Box::new(ExprSum),
Box::new(ExprMean),
Box::new(ExprStd),
Box::new(ExprVar),
Box::new(ExprWhen),
]
}

View File

@ -0,0 +1,122 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen, NuWhenType},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprOtherwise;
impl PluginCommand for ExprOtherwise {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars otherwise"
}
fn usage(&self) -> &str {
"Completes a when expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"otherwise expression",
SyntaxShape::Any,
"expression to apply when no when predicate matches",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4 | polars otherwise 5",
result: None,
},
Example {
description: "Create a when conditions",
example:
"polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6 | polars otherwise 0",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy
| polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
)
| polars with-column (
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "else"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let otherwise_predicate: Value = call.req(0)?;
let otherwise_predicate = NuExpression::try_from_value(plugin, &otherwise_predicate)?;
let value = input.into_value(call.head);
let complete: NuExpression = match NuWhen::try_from_value(plugin, &value)?.when_type {
NuWhenType::Then(then) => then.otherwise(otherwise_predicate.to_polars()).into(),
NuWhenType::ChainedThen(chained_when) => chained_when
.otherwise(otherwise_predicate.to_polars())
.into(),
};
to_pipeline_data(plugin, engine, call.head, complete).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprOtherwise)
}
}

View File

@ -0,0 +1,144 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen},
values::{to_pipeline_data, CustomValueSupport, NuWhenType},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::when;
#[derive(Clone)]
pub struct ExprWhen;
impl PluginCommand for ExprWhen {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars when"
}
fn usage(&self) -> &str {
"Creates and modifies a when expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"when expression",
SyntaxShape::Any,
"when expression used for matching",
)
.required(
"then expression",
SyntaxShape::Any,
"expression that will be applied when predicate is true",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4",
result: None,
},
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy
| polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
)
| polars with-column (
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "match", "if", "else"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let when_predicate: Value = call.req(0)?;
let when_predicate = NuExpression::try_from_value(plugin, &when_predicate)?;
let then_predicate: Value = call.req(1)?;
let then_predicate = NuExpression::try_from_value(plugin, &then_predicate)?;
let value = input.into_value(call.head);
let when_then: NuWhen = match value {
Value::Nothing { .. } => when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
v => match NuWhen::try_from_value(plugin, &v)?.when_type {
NuWhenType::Then(when_then) => when_then
.when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
NuWhenType::ChainedThen(when_then_then) => when_then_then
.when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
},
};
to_pipeline_data(plugin, engine, call.head, when_then).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprWhen)
}
}

View File

@ -0,0 +1,210 @@
use crate::{
dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::{datatypes::DataType, prelude::Expr};
#[derive(Clone)]
pub struct LazyAggregate;
impl PluginCommand for LazyAggregate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars agg"
}
fn usage(&self) -> &str {
"Performs a series of aggregations from a group-by."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the aggregations to be applied",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?;
for expr in expressions.iter() {
if let Some(name) = get_col_name(expr) {
let dtype = group_by.schema.schema.get(name.as_str());
if matches!(dtype, Some(DataType::Object(..))) {
return Err(ShellError::GenericError {
error: "Object type column not supported for aggregation".into(),
msg: format!("Column '{name}' is type Object"),
span: Some(call.head),
help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
inner: vec![],
}).map_err(|e| e.into());
}
}
}
let polars = group_by.to_polars();
let lazy = NuLazyFrame::new(false, polars.agg(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
fn get_col_name(expr: &Expr) -> Option<String> {
match expr {
Expr::Column(column) => Some(column.to_string()),
Expr::Agg(agg) => match agg {
polars::prelude::AggExpr::Min { input: e, .. }
| polars::prelude::AggExpr::Max { input: e, .. }
| polars::prelude::AggExpr::Median(e)
| polars::prelude::AggExpr::NUnique(e)
| polars::prelude::AggExpr::First(e)
| polars::prelude::AggExpr::Last(e)
| polars::prelude::AggExpr::Mean(e)
| polars::prelude::AggExpr::Implode(e)
| polars::prelude::AggExpr::Count(e, _)
| polars::prelude::AggExpr::Sum(e)
| polars::prelude::AggExpr::AggGroups(e)
| polars::prelude::AggExpr::Std(e, _)
| polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()),
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
},
Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Gather { expr, .. }
| Expr::SortBy { expr, .. }
| Expr::Exclude(expr, _)
| Expr::Alias(expr, _)
| Expr::KeepName(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
Expr::Ternary { .. }
| Expr::AnonymousFunction { .. }
| Expr::Function { .. }
| Expr::Columns(_)
| Expr::DtypeColumn(_)
| Expr::Literal(_)
| Expr::BinaryExpr { .. }
| Expr::Window { .. }
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Len
| Expr::Nth(_)
| Expr::SubPlan(_, _)
| Expr::Selector(_) => None,
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyAggregate)
}
}

View File

@ -0,0 +1,98 @@
use crate::{
dataframe::values::{Column, NuDataFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
Cacheable, PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
#[derive(Clone)]
pub struct LazyCollect;
impl PluginCommand for LazyCollect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars collect"
}
fn usage(&self) -> &str {
"Collect lazy dataframe into eager dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let eager = lazy.collect(call.head)?;
Ok(PipelineData::Value(
eager.cache(plugin, engine)?.into_value(call.head),
None,
))
}
PolarsPluginObject::NuDataFrame(df) => {
// just return the dataframe, add to cache again to be safe
Ok(PipelineData::Value(
df.cache(plugin, engine)?.into_value(call.head),
None,
))
}
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame],
)),
}
.map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyCollect)
}
}

View File

@ -0,0 +1,175 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyExplode;
impl PluginCommand for LazyExplode {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars explode"
}
fn usage(&self) -> &str {
"Explodes a dataframe or creates a explode expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to explode, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Explode the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and explode the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
pub(crate) fn explode(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
let lazy = df.lazy();
explode_lazy(plugin, engine, call, lazy)
}
PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr),
_ => Err(ShellError::CantConvert {
to_type: "dataframe or expression".into(),
from_type: value.get_type().to_string(),
span: call.head,
help: None,
}),
}
}
pub(crate) fn explode_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns = call
.positional
.iter()
.map(|e| e.as_str().map(|s| s.to_string()))
.collect::<Result<Vec<String>, ShellError>>()?;
let exploded = lazy
.to_polars()
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
let lazy = NuLazyFrame::from(exploded);
to_pipeline_data(plugin, engine, call.head, lazy)
}
pub(crate) fn explode_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let expr: NuExpression = expr.to_polars().explode().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyExplode)
}
}

View File

@ -0,0 +1,100 @@
use crate::dataframe::values::{Column, NuDataFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFetch;
impl PluginCommand for LazyFetch {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fetch"
}
fn usage(&self) -> &str {
"Collects the lazyframe to the selected rows."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"rows",
SyntaxShape::Int,
"number of rows to be fetched from lazyframe",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let rows: i64 = call.req(0)?;
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let mut eager: NuDataFrame = lazy
.to_polars()
.fetch(rows as usize)
.map_err(|e| ShellError::GenericError {
error: "Error fetching rows".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
// mark this as not from lazy so it doesn't get converted back to a lazy frame
eager.from_lazy = false;
to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFetch)
}
}

View File

@ -0,0 +1,189 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNA;
impl PluginCommand for LazyFillNA {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-nan"
}
fn usage(&self) -> &str {
"Replaces NaN values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the NAN values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Fills the NaN values with 0",
example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(0),
Value::test_int(3),
Value::test_int(0),
],
)],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Fills the NaN values of a whole dataframe",
example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_float(0.2), Value::test_float(0.1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
cmd_df(plugin, engine, call, df, fill, value.span())
}
PolarsPluginObject::NuLazyFrame(lazy) => cmd_df(
plugin,
engine,
call,
lazy.collect(value.span())?,
fill,
value.span(),
),
PolarsPluginObject::NuExpression(expr) => {
Ok(cmd_expr(plugin, engine, call, expr, fill)?)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
frame: NuDataFrame,
fill: Value,
val_span: Span,
) -> Result<PipelineData, ShellError> {
let columns = frame.columns(val_span)?;
let dataframe = columns
.into_iter()
.map(|column| {
let column_name = column.name().to_string();
let values = column
.into_iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if val.is_nan() {
fill.clone()
} else {
value
}
}
Value::List { vals, .. } => {
NuDataFrame::fill_list_nan(vals, span, fill.clone())
}
_ => value,
}
})
.collect::<Vec<Value>>();
Column::new(column_name, values)
})
.collect::<Vec<Column>>();
let df = NuDataFrame::try_from_columns(dataframe, None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_nan(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNA)
}
}

View File

@ -0,0 +1,127 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNull;
impl PluginCommand for LazyFillNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-null"
}
fn usage(&self) -> &str {
"Replaces NULL values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fills the null values by 0",
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill),
PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill),
PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
fill: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr));
to_pipeline_data(plugin, engine, call.head, lazy)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_null(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNull)
}
}

View File

@ -0,0 +1,104 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFilter;
impl PluginCommand for LazyFilter {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter"
}
fn usage(&self) -> &str {
"Filter dataframe based in expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"Expression that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example:
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr_value: Value = call.req(0)?;
let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
filter_expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars().filter(filter_expr.to_polars()),
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFilter)
}
}

View File

@ -0,0 +1,125 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use crate::{
dataframe::values::{Column, NuDataFrame},
values::CustomValueSupport,
PolarsPlugin,
};
use super::explode::explode;
#[derive(Clone)]
pub struct LazyFlatten;
impl PluginCommand for LazyFlatten {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars flatten"
}
fn usage(&self) -> &str {
"An alias for polars explode."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to flatten, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Flatten the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and flatten the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyFlatten)
}
}

View File

@ -0,0 +1,168 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct ToLazyGroupBy;
impl PluginCommand for ToLazyGroupBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars group-by"
}
fn usage(&self) -> &str {
"Creates a group-by object that can be used for other aggregations."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the lazy group-by",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(0)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only Col expressions".into(),
span: value.span(),
})?;
}
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
expressions: Vec<Expr>,
) -> Result<PipelineData, ShellError> {
let group_by = lazy.to_polars().group_by(expressions);
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
to_pipeline_data(plugin, engine, call.head, group_by)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToLazyGroupBy)
}
}

View File

@ -0,0 +1,260 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{Expr, JoinType};
#[derive(Clone)]
pub struct LazyJoin;
impl PluginCommand for LazyJoin {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars join"
}
fn usage(&self) -> &str {
"Joins a lazy frame with other lazy frame."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "LazyFrame to join with")
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
.switch(
"inner",
"inner joining between lazyframes (default)",
Some('i'),
)
.switch("left", "left join between lazyframes", Some('l'))
.switch("outer", "outer join between lazyframes", Some('o'))
.switch("cross", "cross join between lazyframes", Some('c'))
.named(
"suffix",
SyntaxShape::String,
"Suffix to use on columns with same name",
Some('s'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo | polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let left = call.has_flag("left")?;
let outer = call.has_flag("outer")?;
let cross = call.has_flag("cross")?;
let how = if left {
JoinType::Left
} else if outer {
JoinType::Outer { coalesce: true }
} else if cross {
JoinType::Cross
} else {
JoinType::Inner
};
let other: Value = call.req(0)?;
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
let other = other.to_polars();
let left_on: Value = call.req(1)?;
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
let right_on: Value = call.req(2)?;
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
if left_on.len() != right_on.len() {
let right_on: Value = call.req(2)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "The right column list has a different size to the left column list".into(),
span: right_on.span(),
})?;
}
// Checking that both list of expressions are made out of col expressions or strings
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
let value: Value = call.req(*index)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only a string, col expressions or list of strings".into(),
span: value.span(),
})?;
}
}
let suffix: Option<String> = call.get_flag("suffix")?;
let suffix = suffix.unwrap_or_else(|| "_x".into());
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let from_eager = lazy.from_eager;
let lazy = lazy.to_polars();
let lazy = lazy
.join_builder()
.with(other)
.left_on(left_on)
.right_on(right_on)
.how(how)
.force_parallel(true)
.suffix(suffix)
.finish();
let lazy = NuLazyFrame::new(from_eager, lazy);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyJoin)
}
}

View File

@ -0,0 +1,225 @@
/// Definition of multiple lazyframe commands using a macro rule
/// All of these commands have an identical body and only require
/// to have a change in the name, description and function
use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
macro_rules! lazy_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build($name)
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
.plugin_examples($examples)
}
fn run(
&self,
_plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// LazyReverse command
// Expands to a command definition for reverse
lazy_command!(
LazyReverse,
"polars reverse",
"Reverses the LazyFrame",
vec![Example {
description: "Reverses the dataframe.",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
reverse,
test_reverse
);
// LazyCache command
// Expands to a command definition for cache
lazy_command!(
LazyCache,
"polars cache",
"Caches operations in a new LazyFrame.",
vec![Example {
description: "Caches the result into a new LazyFrame",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache",
result: None,
}],
cache,
test_cache
);

View File

@ -0,0 +1,143 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct LazyMedian;
impl PluginCommand for LazyMedian {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars median"
}
fn usage(&self) -> &str {
"Median value from columns in a dataframe or creates expression for an aggregation"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Median aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars median)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()),
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().median().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let polars_lazy = lazy
.to_polars()
.median()
.map_err(|e| ShellError::GenericError {
error: format!("Error in median operation: {e}"),
msg: "".into(),
help: None,
span: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyMedian)
}
}

View File

@ -0,0 +1,57 @@
mod aggregate;
mod collect;
mod explode;
mod fetch;
mod fill_nan;
mod fill_null;
mod filter;
mod flatten;
pub mod groupby;
mod join;
mod macro_commands;
mod median;
mod quantile;
mod select;
mod sort_by_expr;
mod to_lazy;
use nu_plugin::PluginCommand;
pub use crate::dataframe::lazy::aggregate::LazyAggregate;
pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_nan::LazyFillNA;
pub use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::filter::LazyFilter;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile;
pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
use crate::PolarsPlugin;
pub use explode::LazyExplode;
pub use flatten::LazyFlatten;
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(LazyAggregate),
Box::new(LazyCache),
Box::new(LazyCollect),
Box::new(LazyExplode),
Box::new(LazyFetch),
Box::new(LazyFillNA),
Box::new(LazyFillNull),
Box::new(LazyFilter),
Box::new(LazyFlatten),
Box::new(LazyJoin),
Box::new(median::LazyMedian),
Box::new(LazyReverse),
Box::new(LazySelect),
Box::new(LazySortBy),
Box::new(LazyQuantile),
Box::new(ToLazyFrame),
Box::new(ToLazyGroupBy),
]
}

View File

@ -0,0 +1,160 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{lit, QuantileInterpolOptions};
#[derive(Clone)]
pub struct LazyQuantile;
impl PluginCommand for LazyQuantile {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars quantile"
}
fn usage(&self) -> &str {
"Aggregates the columns to the selected quantile."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Quantile aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars quantile 0.5)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
let quantile: f64 = call.req(0)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
command(plugin, engine, call, df.lazy(), quantile)
}
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr
.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
quantile: f64,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})?,
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyQuantile)
}
}

View File

@ -0,0 +1,85 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazySelect;
impl PluginCommand for LazySelect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars select"
}
fn usage(&self) -> &str {
"Selects columns from lazyframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"select expressions",
SyntaxShape::Any,
"Expression(s) that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazySelect)
}
}

View File

@ -0,0 +1,160 @@
use super::super::values::NuLazyFrame;
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazySortBy;
impl PluginCommand for LazySortBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars sort-by"
}
fn usage(&self) -> &str {
"Sorts a lazy dataframe based on expression(s)."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"sort expression",
SyntaxShape::Any,
"sort expression for the dataframe",
)
.named(
"reverse",
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
"Reverse sorting. Default is false",
Some('r'),
)
.switch(
"nulls-last",
"nulls are shown last in the dataframe",
Some('n'),
)
.switch("maintain-order", "Maintains order during sort", Some('m'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sort dataframe by one column",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sort column using two columns",
example:
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(6),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(4),
Value::test_int(1),
Value::test_int(4),
Value::test_int(2),
],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let nulls_last = call.has_flag("nulls-last")?;
let maintain_order = call.has_flag("maintain-order")?;
let reverse: Option<Vec<bool>> = call.get_flag("reverse")?;
let reverse = match reverse {
Some(list) => {
if expressions.len() != list.len() {
let span = call
.get_flag::<Value>("reverse")?
.expect("already checked and it exists")
.span();
Err(ShellError::GenericError {
error: "Incorrect list size".into(),
msg: "Size doesn't match expression list".into(),
span: Some(span),
help: None,
inner: vec![],
})?
} else {
list
}
}
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazySortBy)
}
}

View File

@ -0,0 +1,61 @@
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
#[derive(Clone)]
pub struct ToLazyFrame;
impl PluginCommand for ToLazyFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-lazy"
}
fn usage(&self) -> &str {
"Converts a dataframe into a lazy dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a dictionary and creates a lazy dataframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
Ok(PipelineData::Value(
lazy.cache(plugin, engine)?.into_value(call.head),
None,
))
}
}

View File

@ -0,0 +1,19 @@
use nu_protocol::{ShellError, Span};
pub mod eager;
pub mod expressions;
pub mod lazy;
pub mod series;
pub mod stub;
mod utils;
pub mod values;
pub fn missing_flag_error(flag: &str, span: Span) -> ShellError {
ShellError::GenericError {
error: format!("Missing flag: {flag}"),
msg: "".into(),
span: Some(span),
help: None,
inner: vec![],
}
}

View File

@ -0,0 +1,119 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct AllFalse;
impl PluginCommand for AllFalse {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars all-false"
}
fn usage(&self) -> &str {
"Returns true if all values are false."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are false",
example: "[false false false] | polars into-df | polars all-false",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(true)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 10] | polars into-df);
let res = ($s > 9);
$res | polars all-false"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(false)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|_| ShellError::GenericError {
error: "Error converting to bool".into(),
msg: "all-false only works with series of type bool".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let value = Value::bool(!bool.any(), call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_false".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AllFalse)
}
}

View File

@ -0,0 +1,119 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct AllTrue;
impl PluginCommand for AllTrue {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars all-true"
}
fn usage(&self) -> &str {
"Returns true if all values are true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are true",
example: "[true true true] | polars into-df | polars all-true",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(true)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | polars into-df);
let res = ($s > 9);
$res | polars all-true"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(false)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|_| ShellError::GenericError {
error: "Error converting to bool".into(),
msg: "all-false only works with series of type bool".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let value = Value::bool(bool.all(), call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_true".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AllTrue)
}
}

View File

@ -0,0 +1,96 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
#[derive(Clone)]
pub struct ArgMax;
impl PluginCommand for ArgMax {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-max"
}
fn usage(&self) -> &str {
"Return index for max value in series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argmax", "maximum", "most", "largest", "greatest"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for max value",
example: "[1 3 2] | polars into-df | polars arg-max",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let res = series.arg_max();
let chunked = match res {
Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]),
None => UInt32Chunked::from_slice("arg_max", &[]),
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgMax)
}
}

View File

@ -0,0 +1,96 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
#[derive(Clone)]
pub struct ArgMin;
impl PluginCommand for ArgMin {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-min"
}
fn usage(&self) -> &str {
"Return index for min value in series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argmin", "minimum", "least", "smallest", "lowest"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for min value",
example: "[1 3 2] | polars into-df | polars arg-min",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let res = series.arg_min();
let chunked = match res {
Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]),
None => UInt32Chunked::from_slice("arg_min", &[]),
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgMin)
}
}

View File

@ -0,0 +1,159 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::prelude::{DataType, IntoSeries};
use polars_ops::prelude::{cum_max, cum_min, cum_sum};
enum CumulativeType {
Min,
Max,
Sum,
}
impl CumulativeType {
fn from_str(roll_type: &str, span: Span) -> Result<Self, ShellError> {
match roll_type {
"min" => Ok(Self::Min),
"max" => Ok(Self::Max),
"sum" => Ok(Self::Sum),
_ => Err(ShellError::GenericError {
error: "Wrong operation".into(),
msg: "Operation not valid for cumulative".into(),
span: Some(span),
help: Some("Allowed values: max, min, sum".into()),
inner: vec![],
}),
}
}
fn to_str(&self) -> &'static str {
match self {
CumulativeType::Min => "cumulative_min",
CumulativeType::Max => "cumulative_max",
CumulativeType::Sum => "cumulative_sum",
}
}
}
#[derive(Clone)]
pub struct Cumulative;
impl PluginCommand for Cumulative {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cumulative"
}
fn usage(&self) -> &str {
"Cumulative calculation for a series."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("type", SyntaxShape::String, "rolling operation")
.switch("reverse", "Reverse cumulative calculation", Some('r'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Cumulative sum for a series",
example: "[1 2 3 4 5] | polars into-df | polars cumulative sum",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_cumulative_sum".to_string(),
vec![
Value::test_int(1),
Value::test_int(3),
Value::test_int(6),
Value::test_int(10),
Value::test_int(15),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cum_type: Spanned<String> = call.req(0)?;
let reverse = call.has_flag("reverse")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
if let DataType::Object(..) = series.dtype() {
return Err(ShellError::GenericError {
error: "Found object series".into(),
msg: "Series of type object cannot be used for cumulative operation".into(),
span: Some(call.head),
help: None,
inner: vec![],
});
}
let cum_type = CumulativeType::from_str(&cum_type.item, cum_type.span)?;
let mut res = match cum_type {
CumulativeType::Max => cum_max(&series, reverse),
CumulativeType::Min => cum_min(&series, reverse),
CumulativeType::Sum => cum_sum(&series, reverse),
}
.map_err(|e| ShellError::GenericError {
error: "Error creating cumulative".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let name = format!("{}_{}", series.name(), cum_type.to_str());
res.rename(&name);
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Cumulative)
}
}

View File

@ -0,0 +1,101 @@
use crate::{values::to_pipeline_data, PolarsPlugin};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, SyntaxShape, Type,
};
use polars::prelude::{IntoSeries, StringMethods};
#[derive(Clone)]
pub struct AsDate;
impl PluginCommand for AsDate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as-date"
}
fn usage(&self) -> &str {
r#"Converts string to date."#
}
fn extra_usage(&self) -> &str {
r#"Format example:
"%Y-%m-%d" => 2021-12-31
"%d-%m-%Y" => 31-12-2021
"%Y%m%d" => 2021319 (2021-03-19)"#
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formatting date string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Converts string to date",
example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let format: String = call.req(0)?;
let not_exact = call.has_flag("not-exact")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = if not_exact {
casted.as_date_not_exact(Some(format.as_str()))
} else {
casted.as_date(Some(format.as_str()), false)
};
let mut res = res
.map_err(|e| ShellError::GenericError {
error: "Error creating datetime".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("date");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}

View File

@ -0,0 +1,198 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use chrono::DateTime;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
#[derive(Clone)]
pub struct AsDateTime;
impl PluginCommand for AsDateTime {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as-datetime"
}
fn usage(&self) -> &str {
r#"Converts string to datetime."#
}
fn extra_usage(&self) -> &str {
r#"Format example:
"%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98
"%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01
"%y%m%d %H:%M:%S" => 210319 23:58:50
"%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98
"%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01
"%Y%m%d %H:%M:%S" => 20210319 23:58:50
"%FT%H:%M:%S" => 2019-04-18T02:45:55
"%FT%H:%M:%S.%6f" => microseconds
"%FT%H:%M:%S.%9f" => nanoseconds"#
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formatting date time string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Converts string to datetime",
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Converts string to datetime with high resolutions",
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00.123456789 +0000",
"%Y-%m-%d %H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00.123456789 +0000",
"%Y-%m-%d %H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let format: String = call.req(0)?;
let not_exact = call.has_flag("not-exact")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = if not_exact {
casted.as_datetime_not_exact(
Some(format.as_str()),
TimeUnit::Nanoseconds,
false,
None,
&Default::default(),
)
} else {
casted.as_datetime(
Some(format.as_str()),
TimeUnit::Nanoseconds,
false,
false,
None,
&Default::default(),
)
};
let mut res = res
.map_err(|e| ShellError::GenericError {
error: "Error creating datetime".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("datetime");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AsDateTime)
}
}

View File

@ -0,0 +1,105 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetDay;
impl PluginCommand for GetDay {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-day"
}
fn usage(&self) -> &str {
"Gets day from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns day from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-day"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[4i8, 4]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
fn extra_usage(&self) -> &str {
""
}
fn search_terms(&self) -> Vec<&str> {
vec![]
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.day().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetDay)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetHour;
impl PluginCommand for GetHour {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-hour"
}
fn usage(&self) -> &str {
"Gets hour from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns hour from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-hour"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[16i8, 16]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.hour().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetHour)
}
}

View File

@ -0,0 +1,95 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use polars::{prelude::NamedFrom, series::Series};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)]
pub struct GetMinute;
impl PluginCommand for GetMinute {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-minute"
}
fn usage(&self) -> &str {
"Gets minute from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns minute from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-minute"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[39i8, 39]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.minute().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetMinute)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetMonth;
impl PluginCommand for GetMonth {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-month"
}
fn usage(&self) -> &str {
"Gets month from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns month from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-month"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[8i8, 8]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.month().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetMonth)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetNanosecond;
impl PluginCommand for GetNanosecond {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-nanosecond"
}
fn usage(&self) -> &str {
"Gets nanosecond from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns nanosecond from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-nanosecond"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[0i32, 0]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.nanosecond().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetNanosecond)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetOrdinal;
impl PluginCommand for GetOrdinal {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-ordinal"
}
fn usage(&self) -> &str {
"Gets ordinal from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns ordinal from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-ordinal"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[217i16, 217]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.ordinal().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetOrdinal)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetSecond;
impl PluginCommand for GetSecond {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-second"
}
fn usage(&self) -> &str {
"Gets second from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns second from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-second"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[18i8, 18]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.second().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetSecond)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetWeek;
impl PluginCommand for GetWeek {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-week"
}
fn usage(&self) -> &str {
"Gets week from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns week from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-week"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[32i8, 32]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.week().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetWeek)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetWeekDay;
impl PluginCommand for GetWeekDay {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-weekday"
}
fn usage(&self) -> &str {
"Gets weekday from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns weekday from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-weekday"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[2i8, 2]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.weekday().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetWeekDay)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetYear;
impl PluginCommand for GetYear {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-year"
}
fn usage(&self) -> &str {
"Gets year from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns year from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-year"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[2020i32, 2020]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.year().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetYear)
}
}

View File

@ -0,0 +1,25 @@
mod as_date;
mod as_datetime;
mod get_day;
mod get_hour;
mod get_minute;
mod get_month;
mod get_nanosecond;
mod get_ordinal;
mod get_second;
mod get_week;
mod get_weekday;
mod get_year;
pub use as_date::AsDate;
pub use as_datetime::AsDateTime;
pub use get_day::GetDay;
pub use get_hour::GetHour;
pub use get_minute::GetMinute;
pub use get_month::GetMonth;
pub use get_nanosecond::GetNanosecond;
pub use get_ordinal::GetOrdinal;
pub use get_second::GetSecond;
pub use get_week::GetWeek;
pub use get_weekday::GetWeekDay;
pub use get_year::GetYear;

View File

@ -0,0 +1,140 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{IntoSeries, SortOptions};
#[derive(Clone)]
pub struct ArgSort;
impl PluginCommand for ArgSort {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-sort"
}
fn usage(&self) -> &str {
"Returns indexes for a sorted series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argsort", "order", "arrange"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("reverse", "reverse order", Some('r'))
.switch("nulls-last", "nulls ordered last", Some('n'))
.switch(
"maintain-order",
"maintain order on sorted items",
Some('m'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | polars into-df | polars arg-sort",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(4),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | polars into-df | polars arg-sort --reverse",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(3),
Value::test_int(4),
Value::test_int(1),
Value::test_int(2),
Value::test_int(0),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let sort_options = SortOptions {
descending: call.has_flag("reverse")?,
nulls_last: call.has_flag("nulls-last")?,
multithreaded: true,
maintain_order: call.has_flag("maintain-order")?,
};
let mut res = df
.as_series(call.head)?
.arg_sort(sort_options)
.into_series();
res.rename("arg_sort");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgSort)
}
}

View File

@ -0,0 +1,126 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{arg_where, col, IntoLazy};
#[derive(Clone)]
pub struct ArgTrue;
impl PluginCommand for ArgTrue {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-true"
}
fn usage(&self) -> &str {
"Returns indexes where values are true."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argtrue", "truth", "boolean-true"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes where values are true",
example: "[false true false] | polars into-df | polars arg-true",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_true".to_string(),
vec![Value::test_int(1)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let columns = df.as_ref().get_column_names();
if columns.len() > 1 {
return Err(ShellError::GenericError {
error: "Error using as series".into(),
msg: "dataframe has more than one column".into(),
span: Some(call.head),
help: None,
inner: vec![],
});
}
match columns.first() {
Some(column) => {
let expression = arg_where(col(column).eq(true)).alias("arg_true");
let res: NuDataFrame = df
.as_ref()
.clone()
.lazy()
.select(&[expression])
.collect()
.map_err(|err| ShellError::GenericError {
error: "Error creating index column".into(),
msg: err.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
to_pipeline_data(plugin, engine, call.head, res)
}
_ => Err(ShellError::UnsupportedInput {
msg: "Expected the dataframe to have a column".to_string(),
input: "".to_string(),
msg_span: call.head,
input_span: call.head,
}),
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgTrue)
}
}

View File

@ -0,0 +1,104 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct ArgUnique;
impl PluginCommand for ArgUnique {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-unique"
}
fn usage(&self) -> &str {
"Returns indexes for unique values."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argunique", "distinct", "noduplicate", "unrepeated"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | polars into-df | polars arg-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_unique".to_string(),
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_series(call.head)?
.arg_unique()
.map_err(|e| ShellError::GenericError {
error: "Error extracting unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("arg_unique");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgUnique)
}
}

View File

@ -0,0 +1,9 @@
mod arg_sort;
mod arg_true;
mod arg_unique;
mod set_with_idx;
pub use arg_sort::ArgSort;
pub use arg_true::ArgTrue;
pub use arg_unique::ArgUnique;
pub use set_with_idx::SetWithIndex;

View File

@ -0,0 +1,227 @@
use crate::{
missing_flag_error,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
#[derive(Clone)]
pub struct SetWithIndex;
impl PluginCommand for SetWithIndex {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars set-with-idx"
}
fn usage(&self) -> &str {
"Sets value in the given index."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"indices",
SyntaxShape::Any,
"list of indices indicating where to set the value",
Some('i'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Set value in selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
let indices = ([0 2] | polars into-df);
$series | polars set-with-idx 6 --indices $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(6),
Value::test_int(1),
Value::test_int(6),
Value::test_int(2),
Value::test_int(4),
Value::test_int(3),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(0)?;
let indices_value: Value = call
.get_flag("indices")?
.ok_or_else(|| missing_flag_error("indices", call.head))?;
let indices_span = indices_value.span();
let indices = NuDataFrame::try_from_value_coerce(plugin, &indices_value, call.head)?
.as_series(indices_span)?;
let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast(&DataType::UInt32)
.map_err(|e| ShellError::GenericError {
error: "Error casting indices".into(),
msg: e.to_string(),
span: Some(indices_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "Series with incorrect type".into(),
span: Some(indices_span),
help: Some("Consider using a Series with type int type".into()),
inner: vec![],
}),
}?;
let indices = casted
.u32()
.map_err(|e| ShellError::GenericError {
error: "Error casting indices".into(),
msg: e.to_string(),
span: Some(indices_span),
help: None,
inner: vec![],
})?
.into_iter()
.flatten();
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let span = value.span();
let res = match value {
Value::Int { val, .. } => {
let chunked = series.i64().map_err(|e| ShellError::GenericError {
error: "Error casting to i64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::Float { val, .. } => {
let chunked = series.f64().map_err(|e| ShellError::GenericError {
error: "Error casting to f64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::String { val, .. } => {
let chunked = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.scatter_single(indices, Some(val.as_ref()))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let mut res = res.into_series();
res.rename("string");
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
_ => Err(ShellError::GenericError {
error: "Incorrect value type".into(),
msg: format!(
"this value cannot be set in a series of type '{}'",
series.dtype()
),
span: Some(span),
help: None,
inner: vec![],
}),
}?;
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SetWithIndex)
}
}

View File

@ -0,0 +1,133 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsDuplicated;
impl PluginCommand for IsDuplicated {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-duplicated"
}
fn usage(&self) -> &str {
"Creates mask indicating duplicated values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-duplicated",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-duplicated",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_ref()
.is_duplicated()
.map_err(|e| ShellError::GenericError {
error: "Error finding duplicates".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_duplicated");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsDuplicated)
}
}

View File

@ -0,0 +1,130 @@
use crate::{
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsNotNull;
impl PluginCommand for IsNotNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-not-null"
}
fn usage(&self) -> &str {
"Creates mask where value is not null."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | polars into-df);
let res = ($s / $s);
$res | polars is-not-null"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_not_null".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is not null expression from a column",
example: "polars col a | polars is-not-null",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().is_not_null().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut res = df.as_series(call.head)?.is_not_null();
res.rename("is_not_null");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsNotNull)
}
}

View File

@ -0,0 +1,130 @@
use crate::{
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsNull;
impl PluginCommand for IsNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-null"
}
fn usage(&self) -> &str {
"Creates mask where value is null."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | polars into-df);
let res = ($s / $s);
$res | polars is-null"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_null".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is null expression from a column",
example: "polars col a | polars is-null",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().is_null().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut res = df.as_series(call.head)?.is_null();
res.rename("is_null");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsNull)
}
}

View File

@ -0,0 +1,133 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsUnique;
impl PluginCommand for IsUnique {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-unique"
}
fn usage(&self) -> &str {
"Creates mask indicating unique values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_ref()
.is_unique()
.map_err(|e| ShellError::GenericError {
error: "Error finding unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_unique");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsUnique)
}
}

View File

@ -0,0 +1,13 @@
mod is_duplicated;
mod is_not_null;
mod is_null;
mod is_unique;
mod not;
mod set;
pub use is_duplicated::IsDuplicated;
pub use is_not_null::IsNotNull;
pub use is_null::IsNull;
pub use is_unique::IsUnique;
pub use not::NotSeries;
pub use set::SetSeries;

View File

@ -0,0 +1,103 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
use std::ops::Not;
#[derive(Clone)]
pub struct NotSeries;
impl PluginCommand for NotSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars not"
}
fn usage(&self) -> &str {
"Inverts boolean mask."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Inverts boolean mask",
example: "[true false true] | polars into-df | polars not",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
command(plugin, engine, call, df).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|e| ShellError::GenericError {
error: "Error inverting mask".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = bool.not();
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&NotSeries)
}
}

View File

@ -0,0 +1,210 @@
use crate::{
missing_flag_error,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
#[derive(Clone)]
pub struct SetSeries;
impl PluginCommand for SetSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars set"
}
fn usage(&self) -> &str {
"Sets value where given mask is true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"mask",
SyntaxShape::Any,
"mask indicating insertions",
Some('m'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: r#"let s = ([1 2 2 3 3] | polars into-df | polars shift 2);
let mask = ($s | polars is-null);
$s | polars set 0 --mask $mask"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(0)?;
let mask_value: Value = call
.get_flag("mask")?
.ok_or_else(|| missing_flag_error("mask", call.head))?;
let mask_span = mask_value.span();
let mask =
NuDataFrame::try_from_value_coerce(plugin, &mask_value, call.head)?.as_series(mask_span)?;
let bool_mask = match mask.dtype() {
DataType::Boolean => mask.bool().map_err(|e| ShellError::GenericError {
error: "Error casting to bool".into(),
msg: e.to_string(),
span: Some(mask_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "can only use bool series as mask".into(),
span: Some(mask_span),
help: None,
inner: vec![],
}),
}?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let span = value.span();
let res = match value {
Value::Int { val, .. } => {
let chunked = series.i64().map_err(|e| ShellError::GenericError {
error: "Error casting to i64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.set(bool_mask, Some(val))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::Float { val, .. } => {
let chunked = series.f64().map_err(|e| ShellError::GenericError {
error: "Error casting to f64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.set(bool_mask, Some(val))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::String { val, .. } => {
let chunked = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.set(bool_mask, Some(val.as_ref())).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
let mut res = res.into_series();
res.rename("string");
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
_ => Err(ShellError::GenericError {
error: "Incorrect value type".into(),
msg: format!(
"this value cannot be set in a series of type '{}'",
series.dtype()
),
span: Some(span),
help: None,
inner: vec![],
}),
}?;
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SetSeries)
}
}

Some files were not shown because too many files have changed in this diff Show More