mirror of
https://github.com/nushell/nushell.git
synced 2024-11-21 16:03:19 +01:00
Move dataframes support to a plugin (#12220)
WIP This PR covers migration crates/nu-cmd-dataframes to a new plugin ./crates/nu_plugin_polars ## TODO List Other: - [X] Fix examples - [x] Fix Plugin Test Harness - [X] Move Cache to Mutex<BTreeMap> - [X] Logic for disabling/enabling plugin GC based off whether items are cached. - [x] NuExpression custom values - [X] Optimize caching (don't cache every object creation). - [x] Fix dataframe operations (in NuDataFrameCustomValue::operations) - [x] Added plugin_debug! macro that for checking an env variable POLARS_PLUGIN_DEBUG Fix duplicated commands: - [x] There are two polars median commands, one for lazy and one for expr.. there should only be one that works for both. I temporarily called on polars expr-median (inside expressions_macros.rs) - [x] polars quantile (lazy, and expr). the expr one is temporarily expr-median - [x] polars is-in (renamed one series-is-in) Commands: - [x] AppendDF - [x] CastDF - [X] ColumnsDF - [x] DataTypes - [x] Summary - [x] DropDF - [x] DropDuplicates - [x] DropNulls - [x] Dummies - [x] FilterWith - [X] FirstDF - [x] GetDF - [x] LastDF - [X] ListDF - [x] MeltDF - [X] OpenDataFrame - [x] QueryDf - [x] RenameDF - [x] SampleDF - [x] SchemaDF - [x] ShapeDF - [x] SliceDF - [x] TakeDF - [X] ToArrow - [x] ToAvro - [X] ToCSV - [X] ToDataFrame - [X] ToNu - [x] ToParquet - [x] ToJsonLines - [x] WithColumn - [x] ExprAlias - [x] ExprArgWhere - [x] ExprCol - [x] ExprConcatStr - [x] ExprCount - [x] ExprLit - [x] ExprWhen - [x] ExprOtherwise - [x] ExprQuantile - [x] ExprList - [x] ExprAggGroups - [x] ExprCount - [x] ExprIsIn - [x] ExprNot - [x] ExprMax - [x] ExprMin - [x] ExprSum - [x] ExprMean - [x] ExprMedian - [x] ExprStd - [x] ExprVar - [x] ExprDatePart - [X] LazyAggregate - [x] LazyCache - [X] LazyCollect - [x] LazyFetch - [x] LazyFillNA - [x] LazyFillNull - [x] LazyFilter - [x] LazyJoin - [x] LazyQuantile - [x] LazyMedian - [x] LazyReverse - [x] LazySelect - [x] LazySortBy - [x] ToLazyFrame - [x] ToLazyGroupBy - [x] LazyExplode - [x] LazyFlatten - [x] AllFalse - [x] AllTrue - [x] ArgMax - [x] ArgMin - [x] ArgSort - [x] ArgTrue - [x] ArgUnique - [x] AsDate - [x] AsDateTime - [x] Concatenate - [x] Contains - [x] Cumulative - [x] GetDay - [x] GetHour - [x] GetMinute - [x] GetMonth - [x] GetNanosecond - [x] GetOrdinal - [x] GetSecond - [x] GetWeek - [x] GetWeekDay - [x] GetYear - [x] IsDuplicated - [x] IsIn - [x] IsNotNull - [x] IsNull - [x] IsUnique - [x] NNull - [x] NUnique - [x] NotSeries - [x] Replace - [x] ReplaceAll - [x] Rolling - [x] SetSeries - [x] SetWithIndex - [x] Shift - [x] StrLengths - [x] StrSlice - [x] StrFTime - [x] ToLowerCase - [x] ToUpperCase - [x] Unique - [x] ValueCount --------- Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
parent
cbbccaa722
commit
efc1cfa939
823
Cargo.lock
generated
823
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -47,6 +47,7 @@ members = [
|
||||
"crates/nu_plugin_query",
|
||||
"crates/nu_plugin_custom_values",
|
||||
"crates/nu_plugin_formats",
|
||||
"crates/nu_plugin_polars",
|
||||
"crates/nu-std",
|
||||
"crates/nu-table",
|
||||
"crates/nu-term-grid",
|
||||
|
@ -48,7 +48,6 @@ impl Command for ToLazyFrame {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?;
|
||||
let lazy = NuLazyFrame::from_dataframe(df);
|
||||
let value = Value::custom(Box::new(lazy), call.head);
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
}
|
||||
|
78
crates/nu_plugin_polars/Cargo.toml
Normal file
78
crates/nu_plugin_polars/Cargo.toml
Normal file
@ -0,0 +1,78 @@
|
||||
[package]
|
||||
authors = ["The Nushell Project Developers"]
|
||||
description = "Nushell dataframe plugin commands based on polars."
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
name = "nu_plugin_polars"
|
||||
repository = "https://github.com/nushell/nushell/tree/main/crates/nu-cmd-dataframe"
|
||||
version = "0.92.2"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[[bin]]
|
||||
name = "nu_plugin_polars"
|
||||
bench = false
|
||||
|
||||
[lib]
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
nu-protocol = { path = "../nu-protocol", version = "0.92.2" }
|
||||
nu-plugin = { path = "../nu-plugin", version = "0.92.2" }
|
||||
|
||||
# Potential dependencies for extras
|
||||
chrono = { workspace = true, features = ["std", "unstable-locales"], default-features = false }
|
||||
chrono-tz = "0.8"
|
||||
fancy-regex = { workspace = true }
|
||||
indexmap = { version = "2.2" }
|
||||
num = {version = "0.4"}
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
sqlparser = { version = "0.43"}
|
||||
polars-io = { version = "0.37", features = ["avro"]}
|
||||
polars-arrow = { version = "0.37"}
|
||||
polars-ops = { version = "0.37"}
|
||||
polars-plan = { version = "0.37", features = ["regex"]}
|
||||
polars-utils = { version = "0.37"}
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["v4", "serde"] }
|
||||
|
||||
[dependencies.polars]
|
||||
features = [
|
||||
"arg_where",
|
||||
"checked_arithmetic",
|
||||
"concat_str",
|
||||
"cross_join",
|
||||
"csv",
|
||||
"cum_agg",
|
||||
"default",
|
||||
"dtype-categorical",
|
||||
"dtype-datetime",
|
||||
"dtype-struct",
|
||||
"dtype-i8",
|
||||
"dtype-i16",
|
||||
"dtype-u8",
|
||||
"dtype-u16",
|
||||
"dynamic_group_by",
|
||||
"ipc",
|
||||
"is_in",
|
||||
"json",
|
||||
"lazy",
|
||||
"object",
|
||||
"parquet",
|
||||
"random",
|
||||
"rolling_window",
|
||||
"rows",
|
||||
"serde",
|
||||
"serde-lazy",
|
||||
"strings",
|
||||
"to_dummies",
|
||||
]
|
||||
optional = false
|
||||
version = "0.37"
|
||||
|
||||
[dev-dependencies]
|
||||
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.92.2" }
|
||||
nu-engine = { path = "../nu-engine", version = "0.92.2" }
|
||||
nu-parser = { path = "../nu-parser", version = "0.92.2" }
|
||||
nu-command = { path = "../nu-command", version = "0.92.2" }
|
||||
nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.92.2" }
|
21
crates/nu_plugin_polars/LICENSE
Normal file
21
crates/nu_plugin_polars/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 - 2023 The Nushell Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
122
crates/nu_plugin_polars/src/cache.rs
Normal file
122
crates/nu_plugin_polars/src/cache.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Mutex, MutexGuard},
|
||||
};
|
||||
|
||||
use nu_plugin::EngineInterface;
|
||||
use nu_protocol::{LabeledError, ShellError};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{plugin_debug, values::PolarsPluginObject, PolarsPlugin};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Cache {
|
||||
cache: Mutex<HashMap<Uuid, PolarsPluginObject>>,
|
||||
}
|
||||
|
||||
impl Cache {
|
||||
fn lock(&self) -> Result<MutexGuard<HashMap<Uuid, PolarsPluginObject>>, ShellError> {
|
||||
self.cache.lock().map_err(|e| ShellError::GenericError {
|
||||
error: format!("error acquiring cache lock: {e}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
/// Removes an item from the plugin cache.
|
||||
/// The maybe_engine parameter is required outside of testing
|
||||
pub fn remove(
|
||||
&self,
|
||||
maybe_engine: Option<&EngineInterface>,
|
||||
uuid: &Uuid,
|
||||
) -> Result<Option<PolarsPluginObject>, ShellError> {
|
||||
let mut lock = self.lock()?;
|
||||
let removed = lock.remove(uuid);
|
||||
plugin_debug!("PolarsPlugin: removing {uuid} from cache: {removed:?}");
|
||||
// Once there are no more entries in the cache
|
||||
// we can turn plugin gc back on
|
||||
match maybe_engine {
|
||||
Some(engine) if lock.is_empty() => {
|
||||
plugin_debug!("PolarsPlugin: Cache is empty enabling GC");
|
||||
engine.set_gc_disabled(false).map_err(LabeledError::from)?;
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
drop(lock);
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
/// Inserts an item into the plugin cache.
|
||||
/// The maybe_engine parameter is required outside of testing
|
||||
pub fn insert(
|
||||
&self,
|
||||
maybe_engine: Option<&EngineInterface>,
|
||||
uuid: Uuid,
|
||||
value: PolarsPluginObject,
|
||||
) -> Result<Option<PolarsPluginObject>, ShellError> {
|
||||
let mut lock = self.lock()?;
|
||||
plugin_debug!("PolarsPlugin: Inserting {uuid} into cache: {value:?}");
|
||||
// turn off plugin gc the first time an entry is added to the cache
|
||||
// as we don't want the plugin to be garbage collected if there
|
||||
// is any live data
|
||||
match maybe_engine {
|
||||
Some(engine) if lock.is_empty() => {
|
||||
plugin_debug!("PolarsPlugin: Cache has values disabling GC");
|
||||
engine.set_gc_disabled(true).map_err(LabeledError::from)?;
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
let result = lock.insert(uuid, value);
|
||||
drop(lock);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn get(&self, uuid: &Uuid) -> Result<Option<PolarsPluginObject>, ShellError> {
|
||||
let lock = self.lock()?;
|
||||
let result = lock.get(uuid).cloned();
|
||||
drop(lock);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn process_entries<F, T>(&self, mut func: F) -> Result<Vec<T>, ShellError>
|
||||
where
|
||||
F: FnMut((&Uuid, &PolarsPluginObject)) -> Result<T, ShellError>,
|
||||
{
|
||||
let lock = self.lock()?;
|
||||
let mut vals: Vec<T> = Vec::new();
|
||||
for entry in lock.iter() {
|
||||
eprintln!("entry: {:?}", entry);
|
||||
let val = func(entry)?;
|
||||
vals.push(val);
|
||||
}
|
||||
drop(lock);
|
||||
Ok(vals)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Cacheable: Sized + Clone {
|
||||
fn cache_id(&self) -> &Uuid;
|
||||
|
||||
fn to_cache_value(&self) -> Result<PolarsPluginObject, ShellError>;
|
||||
|
||||
fn from_cache_value(cv: PolarsPluginObject) -> Result<Self, ShellError>;
|
||||
|
||||
fn cache(self, plugin: &PolarsPlugin, engine: &EngineInterface) -> Result<Self, ShellError> {
|
||||
plugin.cache.insert(
|
||||
Some(engine),
|
||||
self.cache_id().to_owned(),
|
||||
self.to_cache_value()?,
|
||||
)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn get_cached(plugin: &PolarsPlugin, id: &Uuid) -> Result<Option<Self>, ShellError> {
|
||||
if let Some(cache_value) = plugin.cache.get(id)? {
|
||||
Ok(Some(Self::from_cache_value(cache_value)?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
12
crates/nu_plugin_polars/src/dataframe/README.md
Normal file
12
crates/nu_plugin_polars/src/dataframe/README.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Dataframe
|
||||
|
||||
This dataframe directory holds all of the definitions of the dataframe data structures and commands.
|
||||
|
||||
There are three sections of commands:
|
||||
|
||||
* [eager](./eager)
|
||||
* [series](./series)
|
||||
* [values](./values)
|
||||
|
||||
For more details see the
|
||||
[Nushell book section on dataframes](https://www.nushell.sh/book/dataframes.html)
|
144
crates/nu_plugin_polars/src/dataframe/eager/append.rs
Normal file
144
crates/nu_plugin_polars/src/dataframe/eager/append.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
values::{to_pipeline_data, Axis, Column, CustomValueSupport, NuDataFrame},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppendDF;
|
||||
|
||||
impl PluginCommand for AppendDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("other", SyntaxShape::Any, "other dataframe to append")
|
||||
.switch("col", "append as new columns instead of rows", None)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars append"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Appends a new dataframe."
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Appends a dataframe as new columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df);
|
||||
$a | polars append $a"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"a_x".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b_x".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Appends a dataframe merging at the end of columns",
|
||||
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df); $a | polars append $a --col"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let other: Value = call.req(0)?;
|
||||
|
||||
let axis = if call.has_flag("col")? {
|
||||
Axis::Column
|
||||
} else {
|
||||
Axis::Row
|
||||
};
|
||||
|
||||
let df_other = NuDataFrame::try_from_value_coerce(plugin, &other, call.head)?;
|
||||
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
|
||||
let df = df.append_df(&df_other, axis, call.head)?;
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&AppendDF)
|
||||
}
|
||||
}
|
202
crates/nu_plugin_polars/src/dataframe/eager/cast.rs
Normal file
202
crates/nu_plugin_polars/src/dataframe/eager/cast.rs
Normal file
@ -0,0 +1,202 @@
|
||||
use crate::{
|
||||
dataframe::values::{str_to_dtype, to_pipeline_data, NuExpression, NuLazyFrame},
|
||||
values::{cant_convert_err, PolarsPluginObject, PolarsPluginType},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::*;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CastDF;
|
||||
|
||||
impl PluginCommand for CastDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars cast"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Cast a column to a different dtype."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.required(
|
||||
"dtype",
|
||||
SyntaxShape::String,
|
||||
"The dtype to cast the column to",
|
||||
)
|
||||
.optional(
|
||||
"column",
|
||||
SyntaxShape::String,
|
||||
"The column to cast. Required when used with a dataframe.",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Cast a column in a dataframe to a different dtype",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
|
||||
result: Some(Value::record(
|
||||
record! {
|
||||
"a" => Value::string("u8", Span::test_data()),
|
||||
"b" => Value::string("i64", Span::test_data()),
|
||||
},
|
||||
Span::test_data(),
|
||||
)),
|
||||
},
|
||||
Example {
|
||||
description: "Cast a column in a lazy dataframe to a different dtype",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema",
|
||||
result: Some(Value::record(
|
||||
record! {
|
||||
"a" => Value::string("u8", Span::test_data()),
|
||||
"b" => Value::string("i64", Span::test_data()),
|
||||
},
|
||||
Span::test_data(),
|
||||
)),
|
||||
},
|
||||
Example {
|
||||
description: "Cast a column in a expression to a different dtype",
|
||||
example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
let (dtype, column_nm) = df_args(call)?;
|
||||
command_lazy(plugin, engine, call, column_nm, dtype, lazy)
|
||||
}
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
let (dtype, column_nm) = df_args(call)?;
|
||||
command_eager(plugin, engine, call, column_nm, dtype, df)
|
||||
}
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let dtype: String = call.req(0)?;
|
||||
let dtype = str_to_dtype(&dtype, call.head)?;
|
||||
let expr: NuExpression = expr.to_polars().cast(dtype).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> {
|
||||
let dtype = dtype_arg(call)?;
|
||||
let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter {
|
||||
param_name: "column_name".into(),
|
||||
span: call.head,
|
||||
})?;
|
||||
Ok((dtype, column_nm))
|
||||
}
|
||||
|
||||
fn dtype_arg(call: &EvaluatedCall) -> Result<DataType, ShellError> {
|
||||
let dtype: String = call.req(0)?;
|
||||
str_to_dtype(&dtype, call.head)
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
column_nm: String,
|
||||
dtype: DataType,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let column = col(&column_nm).cast(dtype);
|
||||
let lazy = lazy.to_polars().with_columns(&[column]);
|
||||
let lazy = NuLazyFrame::new(false, lazy);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
column_nm: String,
|
||||
dtype: DataType,
|
||||
nu_df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mut df = (*nu_df.df).clone();
|
||||
let column = df
|
||||
.column(&column_nm)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("{e}"),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError {
|
||||
error: format!("{e}"),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let _ = df
|
||||
.with_column(casted)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("{e}"),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::new(false, df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&CastDF)
|
||||
}
|
||||
}
|
79
crates/nu_plugin_polars/src/dataframe/eager/columns.rs
Normal file
79
crates/nu_plugin_polars/src/dataframe/eager/columns.rs
Normal file
@ -0,0 +1,79 @@
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ColumnsDF;
|
||||
|
||||
impl PluginCommand for ColumnsDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars columns"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show dataframe columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars columns",
|
||||
result: Some(Value::list(
|
||||
vec![Value::test_string("a"), Value::test_string("b")],
|
||||
Span::test_data(),
|
||||
)),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, call, input).map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| Value::string(*v, call.head))
|
||||
.collect();
|
||||
|
||||
let names = Value::list(names, call.head);
|
||||
|
||||
Ok(PipelineData::Value(names, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ColumnsDF)
|
||||
}
|
||||
}
|
127
crates/nu_plugin_polars/src/dataframe/eager/drop.rs
Normal file
127
crates/nu_plugin_polars/src/dataframe/eager/drop.rs
Normal file
@ -0,0 +1,127 @@
|
||||
use crate::values::to_pipeline_data;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::values::CustomValueSupport;
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::utils::convert_columns;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropDF;
|
||||
|
||||
impl PluginCommand for DropDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars drop"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new dataframe by dropping the selected columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "column names to be dropped")
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Vec<Value> = call.rest(0)?;
|
||||
let (col_string, col_span) = convert_columns(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let new_df = col_string
|
||||
.first()
|
||||
.ok_or_else(|| ShellError::GenericError {
|
||||
error: "Empty names list".into(),
|
||||
msg: "No column names were found".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
.and_then(|col| {
|
||||
df.as_ref()
|
||||
.drop(&col.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error dropping column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
})?;
|
||||
|
||||
// If there are more columns in the drop selection list, these
|
||||
// are added from the resulting dataframe
|
||||
let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
|
||||
new_df
|
||||
.drop(&col.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error dropping column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
})?;
|
||||
|
||||
let final_df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, final_df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&DropDF)
|
||||
}
|
||||
}
|
133
crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs
Normal file
133
crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs
Normal file
@ -0,0 +1,133 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::UniqueKeepStrategy;
|
||||
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropDuplicates;
|
||||
|
||||
impl PluginCommand for DropDuplicates {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars drop-duplicates"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops duplicate values in dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"subset",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"subset of columns to drop duplicates",
|
||||
)
|
||||
.switch("maintain", "maintain order", Some('m'))
|
||||
.switch(
|
||||
"last",
|
||||
"keeps last duplicate value (by default keeps first)",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(2)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Option<Vec<Value>> = call.opt(0)?;
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, call.head),
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let keep_strategy = if call.has_flag("last")? {
|
||||
UniqueKeepStrategy::Last
|
||||
} else {
|
||||
UniqueKeepStrategy::First
|
||||
};
|
||||
|
||||
let polars_df = df
|
||||
.as_ref()
|
||||
.unique(subset_slice, keep_strategy, None)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error dropping duplicates".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&DropDuplicates)
|
||||
}
|
||||
}
|
149
crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs
Normal file
149
crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs
Normal file
@ -0,0 +1,149 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropNulls;
|
||||
|
||||
impl PluginCommand for DropNulls {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars drop-nulls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops null values in dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"subset",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"subset of columns to drop nulls",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "drop null values in dataframe",
|
||||
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df);
|
||||
let res = ($df.b / $df.b);
|
||||
let a = ($df | polars with-column $res --name res);
|
||||
$a | polars drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"res".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "drop null values in dataframe",
|
||||
example: r#"let s = ([1 2 0 0 3 4] | polars into-df);
|
||||
($s / $s) | polars drop-nulls"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"div_0_0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let columns: Option<Vec<Value>> = call.opt(0)?;
|
||||
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, call.head),
|
||||
};
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let polars_df = df
|
||||
.as_ref()
|
||||
.drop_nulls(subset_slice)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error dropping nulls".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&DropNulls)
|
||||
}
|
||||
}
|
111
crates/nu_plugin_polars/src/dataframe/eager/dtypes.rs
Normal file
111
crates/nu_plugin_polars/src/dataframe/eager/dtypes.rs
Normal file
@ -0,0 +1,111 @@
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DataTypes;
|
||||
|
||||
impl PluginCommand for DataTypes {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars dtypes"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show dataframe data types."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe dtypes",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dtypes",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"column".to_string(),
|
||||
vec![Value::test_string("a"), Value::test_string("b")],
|
||||
),
|
||||
Column::new(
|
||||
"dtype".to_string(),
|
||||
vec![Value::test_string("i64"), Value::test_string("i64")],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut dtypes: Vec<Value> = Vec::new();
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
let dtype = df
|
||||
.as_ref()
|
||||
.column(v)
|
||||
.expect("using name from list of names from dataframe")
|
||||
.dtype();
|
||||
|
||||
let dtype_str = dtype.to_string();
|
||||
|
||||
dtypes.push(Value::string(dtype_str, call.head));
|
||||
|
||||
Value::string(*v, call.head)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let names_col = Column::new("column".to_string(), names);
|
||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&DataTypes)
|
||||
}
|
||||
}
|
119
crates/nu_plugin_polars/src/dataframe/eager/dummies.rs
Normal file
119
crates/nu_plugin_polars/src/dataframe/eager/dummies.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{prelude::*, series::Series};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Dummies;
|
||||
|
||||
impl PluginCommand for Dummies {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars dummies"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new dataframe with dummy variables."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("drop-first", "Drop first row", Some('d'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series_vec(
|
||||
vec![
|
||||
Series::new("a_1", &[1_u8, 0]),
|
||||
Series::new("a_3", &[0_u8, 1]),
|
||||
Series::new("b_2", &[1_u8, 0]),
|
||||
Series::new("b_4", &[0_u8, 1]),
|
||||
],
|
||||
Span::test_data(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a series",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series_vec(
|
||||
vec![
|
||||
Series::new("0_1", &[1_u8, 0, 0, 0, 0]),
|
||||
Series::new("0_2", &[0_u8, 1, 1, 0, 0]),
|
||||
Series::new("0_3", &[0_u8, 0, 0, 1, 1]),
|
||||
],
|
||||
Span::test_data(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let drop_first: bool = call.has_flag("drop-first")?;
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let polars_df =
|
||||
df.as_ref()
|
||||
.to_dummies(None, drop_first)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating dummies".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: Some("The only allowed column types for dummies are String or Int".into()),
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df: NuDataFrame = polars_df.into();
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&Dummies)
|
||||
}
|
||||
}
|
165
crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs
Normal file
165
crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs
Normal file
@ -0,0 +1,165 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::LazyFrame;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{NuExpression, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FilterWith;
|
||||
|
||||
impl PluginCommand for FilterWith {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars filter-with"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filters dataframe using a mask or expression as reference."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"mask or expression",
|
||||
SyntaxShape::Any,
|
||||
"boolean mask used to filter data",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([true false] | polars into-df);
|
||||
[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mask_value: Value = call.req(0)?;
|
||||
let mask_span = mask_value.span();
|
||||
|
||||
if NuExpression::can_downcast(&mask_value) {
|
||||
let expression = NuExpression::try_from_value(plugin, &mask_value)?;
|
||||
let lazy = df.lazy();
|
||||
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
} else {
|
||||
let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)?
|
||||
.as_series(mask_span)?;
|
||||
let mask = mask.bool().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to bool".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(mask_span),
|
||||
help: Some("Perhaps you want to use a series with booleans as mask".into()),
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let polars_df = df
|
||||
.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error filtering dataframe".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: Some("The only allowed column types for dummies are String or Int".into()),
|
||||
inner: vec![],
|
||||
})?;
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr: Value = call.req(0)?;
|
||||
let expr = NuExpression::try_from_value(plugin, &expr)?;
|
||||
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&FilterWith)
|
||||
}
|
||||
}
|
137
crates/nu_plugin_polars/src/dataframe/eager/first.rs
Normal file
137
crates/nu_plugin_polars/src/dataframe/eager/first.rs
Normal file
@ -0,0 +1,137 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, Column, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{NuDataFrame, NuExpression};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FirstDF;
|
||||
|
||||
impl PluginCommand for FirstDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars first"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show only the first number of rows or create a first expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional(
|
||||
"rows",
|
||||
SyntaxShape::Int,
|
||||
"starting from the front, the number of rows to return",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Return the first row of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Return the first two rows of a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a first expression from a column",
|
||||
example: "polars col a | polars first",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(plugin, &value)?;
|
||||
command(plugin, engine, call, df).map_err(|e| e.into())
|
||||
} else {
|
||||
let expr = NuExpression::try_from_value(plugin, &value)?;
|
||||
let expr: NuExpression = expr.to_polars().first().into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(0)?;
|
||||
let rows = rows.unwrap_or(1);
|
||||
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
let res = NuDataFrame::new(false, res);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&FirstDF)
|
||||
}
|
||||
}
|
103
crates/nu_plugin_polars/src/dataframe/eager/get.rs
Normal file
103
crates/nu_plugin_polars/src/dataframe/eager/get.rs
Normal file
@ -0,0 +1,103 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetDF;
|
||||
|
||||
impl PluginCommand for GetDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates dataframe with the selected columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns the selected column",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Vec<Value> = call.rest(0)?;
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
let df = df
|
||||
.as_ref()
|
||||
.select(col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let df = NuDataFrame::new(false, df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetDF)
|
||||
}
|
||||
}
|
112
crates/nu_plugin_polars/src/dataframe/eager/last.rs
Normal file
112
crates/nu_plugin_polars/src/dataframe/eager/last.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, Column, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LastDF;
|
||||
|
||||
impl PluginCommand for LastDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars last"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with tail rows or creates a last expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a last expression from a column",
|
||||
example: "polars col a | polars last",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(plugin, &value)?;
|
||||
command(plugin, engine, call, df).map_err(|e| e.into())
|
||||
} else {
|
||||
let expr = NuExpression::try_from_value(plugin, &value)?;
|
||||
let expr: NuExpression = expr.to_polars().last().into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
let res = NuDataFrame::new(false, res);
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LastDF)
|
||||
}
|
||||
}
|
96
crates/nu_plugin_polars/src/dataframe/eager/list.rs
Normal file
96
crates/nu_plugin_polars/src/dataframe/eager/list.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, IntoPipelineData, LabeledError, PipelineData, Signature, Value,
|
||||
};
|
||||
|
||||
use crate::{values::PolarsPluginObject, PolarsPlugin};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ListDF;
|
||||
|
||||
impl PluginCommand for ListDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars ls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Lists stored dataframes."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a new dataframe and shows it in the dataframe list",
|
||||
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df);
|
||||
polars ls"#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals = plugin.cache.process_entries(|(key, value)| match value {
|
||||
PolarsPluginObject::NuDataFrame(df) => Ok(Some(Value::record(
|
||||
record! {
|
||||
"key" => Value::string(key.to_string(), call.head),
|
||||
"columns" => Value::int(df.as_ref().width() as i64, call.head),
|
||||
"rows" => Value::int(df.as_ref().height() as i64, call.head),
|
||||
"type" => Value::string("NuDataFrame", call.head),
|
||||
},
|
||||
call.head,
|
||||
))),
|
||||
PolarsPluginObject::NuLazyFrame(lf) => {
|
||||
let lf = lf.clone().collect(call.head)?;
|
||||
Ok(Some(Value::record(
|
||||
record! {
|
||||
"key" => Value::string(key.to_string(), call.head),
|
||||
"columns" => Value::int(lf.as_ref().width() as i64, call.head),
|
||||
"rows" => Value::int(lf.as_ref().height() as i64, call.head),
|
||||
"type" => Value::string("NuLazyFrame", call.head),
|
||||
},
|
||||
call.head,
|
||||
)))
|
||||
}
|
||||
PolarsPluginObject::NuExpression(_) => Ok(Some(Value::record(
|
||||
record! {
|
||||
"key" => Value::string(key.to_string(), call.head),
|
||||
"columns" => Value::nothing(call.head),
|
||||
"rows" => Value::nothing(call.head),
|
||||
"type" => Value::string("NuExpression", call.head),
|
||||
},
|
||||
call.head,
|
||||
))),
|
||||
PolarsPluginObject::NuLazyGroupBy(_) => Ok(Some(Value::record(
|
||||
record! {
|
||||
"key" => Value::string(key.to_string(), call.head),
|
||||
"columns" => Value::nothing(call.head),
|
||||
"rows" => Value::nothing(call.head),
|
||||
"type" => Value::string("NuLazyGroupBy", call.head),
|
||||
},
|
||||
call.head,
|
||||
))),
|
||||
PolarsPluginObject::NuWhen(_) => Ok(Some(Value::record(
|
||||
record! {
|
||||
"key" => Value::string(key.to_string(), call.head),
|
||||
"columns" => Value::nothing(call.head),
|
||||
"rows" => Value::nothing(call.head),
|
||||
"type" => Value::string("NuWhen", call.head),
|
||||
},
|
||||
call.head,
|
||||
))),
|
||||
})?;
|
||||
let vals = vals.into_iter().flatten().collect();
|
||||
let list = Value::list(vals, call.head);
|
||||
Ok(list.into_pipeline_data())
|
||||
}
|
||||
}
|
255
crates/nu_plugin_polars/src/dataframe/eager/melt.rs
Normal file
255
crates/nu_plugin_polars/src/dataframe/eager/melt.rs
Normal file
@ -0,0 +1,255 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeltDF;
|
||||
|
||||
impl PluginCommand for MeltDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars melt"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating melt".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let res = NuDataFrame::new(false, res);
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "empty column list".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df
|
||||
.column(w[0].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let r_series = df
|
||||
.column(w[1].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "found different column types in list".into(),
|
||||
span: Some(col_span),
|
||||
help: Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&MeltDF)
|
||||
}
|
||||
}
|
105
crates/nu_plugin_polars/src/dataframe/eager/mod.rs
Normal file
105
crates/nu_plugin_polars/src/dataframe/eager/mod.rs
Normal file
@ -0,0 +1,105 @@
|
||||
mod append;
|
||||
mod cast;
|
||||
mod columns;
|
||||
mod drop;
|
||||
mod drop_duplicates;
|
||||
mod drop_nulls;
|
||||
mod dtypes;
|
||||
mod dummies;
|
||||
mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod last;
|
||||
mod list;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod query_df;
|
||||
mod rename;
|
||||
mod sample;
|
||||
mod schema;
|
||||
mod shape;
|
||||
mod slice;
|
||||
mod sql_context;
|
||||
mod sql_expr;
|
||||
mod summary;
|
||||
mod take;
|
||||
mod to_arrow;
|
||||
mod to_avro;
|
||||
mod to_csv;
|
||||
mod to_df;
|
||||
mod to_json_lines;
|
||||
mod to_nu;
|
||||
mod to_parquet;
|
||||
mod with_column;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
pub use self::open::OpenDataFrame;
|
||||
pub use append::AppendDF;
|
||||
pub use cast::CastDF;
|
||||
pub use columns::ColumnsDF;
|
||||
pub use drop::DropDF;
|
||||
pub use drop_duplicates::DropDuplicates;
|
||||
pub use drop_nulls::DropNulls;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use dummies::Dummies;
|
||||
pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use last::LastDF;
|
||||
pub use list::ListDF;
|
||||
pub use melt::MeltDF;
|
||||
use nu_plugin::PluginCommand;
|
||||
pub use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use schema::SchemaCmd;
|
||||
pub use shape::ShapeDF;
|
||||
pub use slice::SliceDF;
|
||||
pub use sql_context::SQLContext;
|
||||
pub use summary::Summary;
|
||||
pub use take::TakeDF;
|
||||
pub use to_arrow::ToArrow;
|
||||
pub use to_avro::ToAvro;
|
||||
pub use to_csv::ToCSV;
|
||||
pub use to_df::ToDataFrame;
|
||||
pub use to_json_lines::ToJsonLines;
|
||||
pub use to_nu::ToNu;
|
||||
pub use to_parquet::ToParquet;
|
||||
pub use with_column::WithColumn;
|
||||
|
||||
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
vec![
|
||||
Box::new(AppendDF),
|
||||
Box::new(CastDF),
|
||||
Box::new(ColumnsDF),
|
||||
Box::new(DataTypes),
|
||||
Box::new(DropDF),
|
||||
Box::new(DropDuplicates),
|
||||
Box::new(DropNulls),
|
||||
Box::new(Dummies),
|
||||
Box::new(FilterWith),
|
||||
Box::new(GetDF),
|
||||
Box::new(OpenDataFrame),
|
||||
Box::new(MeltDF),
|
||||
Box::new(Summary),
|
||||
Box::new(FirstDF),
|
||||
Box::new(LastDF),
|
||||
Box::new(ListDF),
|
||||
Box::new(RenameDF),
|
||||
Box::new(SampleDF),
|
||||
Box::new(ShapeDF),
|
||||
Box::new(SliceDF),
|
||||
Box::new(SchemaCmd),
|
||||
Box::new(TakeDF),
|
||||
Box::new(ToNu),
|
||||
Box::new(ToArrow),
|
||||
Box::new(ToAvro),
|
||||
Box::new(ToDataFrame),
|
||||
Box::new(ToCSV),
|
||||
Box::new(ToJsonLines),
|
||||
Box::new(ToParquet),
|
||||
Box::new(QueryDf),
|
||||
Box::new(WithColumn),
|
||||
]
|
||||
}
|
531
crates/nu_plugin_polars/src/dataframe/eager/open.rs
Normal file
531
crates/nu_plugin_polars/src/dataframe/eager/open.rs
Normal file
@ -0,0 +1,531 @@
|
||||
use crate::{
|
||||
dataframe::values::NuSchema,
|
||||
values::{cache_and_to_value, NuLazyFrame},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_plugin::PluginCommand;
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
|
||||
use std::{fs::File, io::BufReader, path::PathBuf};
|
||||
|
||||
use polars::prelude::{
|
||||
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
|
||||
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
|
||||
};
|
||||
|
||||
use polars_io::{avro::AvroReader, prelude::ParallelStrategy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OpenDataFrame;
|
||||
|
||||
impl PluginCommand for OpenDataFrame {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars open"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Opens CSV, JSON, JSON lines, arrow, avro, or parquet file to create dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"file",
|
||||
SyntaxShape::Filepath,
|
||||
"file path to load values from",
|
||||
)
|
||||
.switch("lazy", "creates a lazy dataframe", Some('l'))
|
||||
.named(
|
||||
"type",
|
||||
SyntaxShape::String,
|
||||
"File type: csv, tsv, json, parquet, arrow, avro. If omitted, derive from file extension",
|
||||
Some('t'),
|
||||
)
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character. CSV file",
|
||||
Some('d'),
|
||||
)
|
||||
.switch(
|
||||
"no-header",
|
||||
"Indicates if file doesn't have header. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"infer-schema",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to infer the schema of the file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"skip-rows",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows to skip from file. CSV file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"columns",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
||||
"Columns to be selected from csv file. CSV and Parquet file",
|
||||
None,
|
||||
)
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s')
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a file name and creates a dataframe",
|
||||
example: "polars open test.csv",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
_input: nu_protocol::PipelineData,
|
||||
) -> Result<nu_protocol::PipelineData, LabeledError> {
|
||||
command(plugin, engine, call).map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
|
||||
let type_option: Option<Spanned<String>> = call.get_flag("type")?;
|
||||
|
||||
let type_id = match &type_option {
|
||||
Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)),
|
||||
None => file.item.extension().map(|e| {
|
||||
(
|
||||
e.to_string_lossy().into_owned(),
|
||||
"Invalid extension",
|
||||
file.span,
|
||||
)
|
||||
}),
|
||||
};
|
||||
|
||||
match type_id {
|
||||
Some((e, msg, blamed)) => match e.as_str() {
|
||||
"csv" | "tsv" => from_csv(plugin, engine, call),
|
||||
"parquet" | "parq" => from_parquet(plugin, engine, call),
|
||||
"ipc" | "arrow" => from_ipc(plugin, engine, call),
|
||||
"json" => from_json(plugin, engine, call),
|
||||
"jsonl" => from_jsonl(plugin, engine, call),
|
||||
"avro" => from_avro(plugin, engine, call),
|
||||
_ => Err(ShellError::FileNotFoundCustom {
|
||||
msg: format!(
|
||||
"{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json, jsonl, avro"
|
||||
),
|
||||
span: blamed,
|
||||
}),
|
||||
},
|
||||
None => Err(ShellError::FileNotFoundCustom {
|
||||
msg: "File without extension".into(),
|
||||
span: file.span,
|
||||
}),
|
||||
}
|
||||
.map(|value| PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
fn from_parquet(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
if call.has_flag("lazy")? {
|
||||
let file: String = call.req(0)?;
|
||||
let args = ScanArgsParquet {
|
||||
n_rows: None,
|
||||
cache: true,
|
||||
parallel: ParallelStrategy::Auto,
|
||||
rechunk: false,
|
||||
row_index: None,
|
||||
low_memory: false,
|
||||
cloud_options: None,
|
||||
use_statistics: false,
|
||||
hive_partitioning: false,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let reader = ParquetReader::new(r);
|
||||
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn from_avro(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let reader = AvroReader::new(r);
|
||||
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Avro reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn from_ipc(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
if call.has_flag("lazy")? {
|
||||
let file: String = call.req(0)?;
|
||||
let args = ScanArgsIpc {
|
||||
n_rows: None,
|
||||
cache: true,
|
||||
rechunk: false,
|
||||
row_index: None,
|
||||
memmap: true,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "IPC reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let reader = IpcReader::new(r);
|
||||
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "IPC reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn from_json(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let buf_reader = BufReader::new(file);
|
||||
let reader = JsonReader::new(buf_reader);
|
||||
|
||||
let reader = match maybe_schema {
|
||||
Some(schema) => reader.with_schema(schema.into()),
|
||||
None => reader,
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Json reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn from_jsonl(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let buf_reader = BufReader::new(file);
|
||||
let reader = JsonReader::new(buf_reader)
|
||||
.with_json_format(JsonFormat::JsonLines)
|
||||
.infer_schema_len(infer_schema);
|
||||
|
||||
let reader = match maybe_schema {
|
||||
Some(schema) => reader.with_schema(schema.into()),
|
||||
None => reader,
|
||||
};
|
||||
|
||||
let df: NuDataFrame = reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Json lines reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn from_csv(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<Value, ShellError> {
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header")?;
|
||||
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
|
||||
let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
|
||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
if call.has_flag("lazy")? {
|
||||
let file: String = call.req(0)?;
|
||||
let csv_reader = LazyCsvReader::new(file);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Incorrect delimiter".into(),
|
||||
msg: "Delimiter has to be one character".into(),
|
||||
span: Some(d.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match maybe_schema {
|
||||
Some(schema) => csv_reader.with_schema(Some(schema.into())),
|
||||
None => csv_reader,
|
||||
};
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
|
||||
};
|
||||
|
||||
let csv_reader = match skip_rows {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_skip_rows(r),
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = csv_reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
} else {
|
||||
let file: Spanned<PathBuf> = call.req(0)?;
|
||||
let csv_reader = CsvReader::from_path(&file.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating CSV reader".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.with_encoding(CsvEncoding::LossyUtf8);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Incorrect delimiter".into(),
|
||||
msg: "Delimiter has to be one character".into(),
|
||||
span: Some(d.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
csv_reader.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let csv_reader = csv_reader.has_header(!no_header);
|
||||
|
||||
let csv_reader = match maybe_schema {
|
||||
Some(schema) => csv_reader.with_schema(Some(schema.into())),
|
||||
None => csv_reader,
|
||||
};
|
||||
|
||||
let csv_reader = match infer_schema {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.infer_schema(Some(r)),
|
||||
};
|
||||
|
||||
let csv_reader = match skip_rows {
|
||||
None => csv_reader,
|
||||
Some(r) => csv_reader.with_skip_rows(r),
|
||||
};
|
||||
|
||||
let csv_reader = match columns {
|
||||
None => csv_reader,
|
||||
Some(columns) => csv_reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
let df: NuDataFrame = csv_reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
cache_and_to_value(plugin, engine, call.head, df)
|
||||
}
|
||||
}
|
108
crates/nu_plugin_polars/src/dataframe/eager/query_df.rs
Normal file
108
crates/nu_plugin_polars/src/dataframe/eager/query_df.rs
Normal file
@ -0,0 +1,108 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use crate::dataframe::values::Column;
|
||||
use crate::dataframe::{eager::SQLContext, values::NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
// attribution:
|
||||
// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you.
|
||||
// maybe we should just use the crate at some point but it's not published yet.
|
||||
// https://github.com/pola-rs/polars/tree/master/polars-sql
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueryDf;
|
||||
|
||||
impl PluginCommand for QueryDf {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars query"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("sql", SyntaxShape::String, "sql query")
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["dataframe", "sql", "search"]
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Query dataframe using SQL",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars query 'select a from df'",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let sql_query: String = call.req(0)?;
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut ctx = SQLContext::new();
|
||||
ctx.register("df", &df.df);
|
||||
let df_sql = ctx
|
||||
.execute(&sql_query)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let lazy = NuLazyFrame::new(!df.from_lazy, df_sql);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&QueryDf)
|
||||
}
|
||||
}
|
203
crates/nu_plugin_polars/src/dataframe/eager/rename.rs
Normal file
203
crates/nu_plugin_polars/src/dataframe/eager/rename.rs
Normal file
@ -0,0 +1,203 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::{utils::extract_strings, values::NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RenameDF;
|
||||
|
||||
impl PluginCommand for RenameDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars rename"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Rename a dataframe column."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"columns",
|
||||
SyntaxShape::Any,
|
||||
"Column(s) to be renamed. A string or list of strings",
|
||||
)
|
||||
.required(
|
||||
"new names",
|
||||
SyntaxShape::Any,
|
||||
"New names for the selected column(s). A string or list of strings",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Renames a series",
|
||||
example: "[5 6 7 8] | polars into-df | polars rename '0' new_name",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"new_name".to_string(),
|
||||
vec![
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(7),
|
||||
Value::test_int(8),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames two dataframe columns",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b_new".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
command_eager(plugin, engine, call, df).map_err(LabeledError::from)
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
_ => Err(LabeledError::new(format!("Unsupported type: {value:?}"))
|
||||
.with_label("Unsupported Type", call.head)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Value = call.req(0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let new_names: Value = call.req(1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
let mut polars_df = df.to_polars();
|
||||
|
||||
for (from, to) in columns.iter().zip(new_names.iter()) {
|
||||
polars_df
|
||||
.rename(from, to)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let df = NuDataFrame::new(false, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Value = call.req(0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let new_names: Value = call.req(1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
if columns.len() != new_names.len() {
|
||||
let value: Value = call.req(1)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "New name list has different size to column list".into(),
|
||||
span: value.span(),
|
||||
});
|
||||
}
|
||||
|
||||
let lazy = lazy.to_polars();
|
||||
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&RenameDF)
|
||||
}
|
||||
}
|
138
crates/nu_plugin_polars/src/dataframe/eager/sample.rs
Normal file
138
crates/nu_plugin_polars/src/dataframe/eager/sample.rs
Normal file
@ -0,0 +1,138 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type,
|
||||
};
|
||||
use polars::prelude::NamedFrom;
|
||||
use polars::series::Series;
|
||||
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SampleDF;
|
||||
|
||||
impl PluginCommand for SampleDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars sample"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Create sample dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"n-rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be taken from dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.named(
|
||||
"fraction",
|
||||
SyntaxShape::Number,
|
||||
"fraction of dataframe to be taken",
|
||||
Some('f'),
|
||||
)
|
||||
.named(
|
||||
"seed",
|
||||
SyntaxShape::Number,
|
||||
"seed for the selection",
|
||||
Some('s'),
|
||||
)
|
||||
.switch("replace", "sample with replace", Some('e'))
|
||||
.switch("shuffle", "shuffle sample", Some('u'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sample rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars sample --n-rows 1",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
Example {
|
||||
description: "Shows sample row using fraction and replace",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4] [5 6]] | polars into-df | polars sample --fraction 0.5 --replace",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<Spanned<i64>> = call.get_flag("n-rows")?;
|
||||
let fraction: Option<Spanned<f64>> = call.get_flag("fraction")?;
|
||||
let seed: Option<u64> = call.get_flag::<i64>("seed")?.map(|val| val as u64);
|
||||
let replace: bool = call.has_flag("replace")?;
|
||||
let shuffle: bool = call.has_flag("shuffle")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
let df = match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
.as_ref()
|
||||
.sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating sample".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(rows.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating sample".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(frac.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
(Some(_), Some(_)) => Err(ShellError::GenericError {
|
||||
error: "Incompatible flags".into(),
|
||||
msg: "Only one selection criterion allowed".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
(None, None) => Err(ShellError::GenericError {
|
||||
error: "No selection".into(),
|
||||
msg: "No selection criterion was found".into(),
|
||||
span: Some(call.head),
|
||||
help: Some("Perhaps you want to use the flag -n or -f".into()),
|
||||
inner: vec![],
|
||||
}),
|
||||
};
|
||||
let df = NuDataFrame::new(false, df?);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
133
crates/nu_plugin_polars/src/dataframe/eager/schema.rs
Normal file
133
crates/nu_plugin_polars/src/dataframe/eager/schema.rs
Normal file
@ -0,0 +1,133 @@
|
||||
use crate::{values::PolarsPluginObject, PolarsPlugin};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SchemaCmd;
|
||||
|
||||
impl PluginCommand for SchemaCmd {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars schema"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show schema for a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe schema",
|
||||
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | polars into-df | polars schema"#,
|
||||
result: Some(Value::record(
|
||||
record! {
|
||||
"a" => Value::string("i64", Span::test_data()),
|
||||
"b" => Value::string("str", Span::test_data()),
|
||||
},
|
||||
Span::test_data(),
|
||||
)),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
if call.has_flag("datatype-list")? {
|
||||
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
|
||||
} else {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
let schema = df.schema();
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
let schema = lazy.schema()?;
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Must be a dataframe or lazy dataframe".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn datatype_list(span: Span) -> Value {
|
||||
let types: Vec<Value> = [
|
||||
("null", ""),
|
||||
("bool", ""),
|
||||
("u8", ""),
|
||||
("u16", ""),
|
||||
("u32", ""),
|
||||
("u64", ""),
|
||||
("i8", ""),
|
||||
("i16", ""),
|
||||
("i32", ""),
|
||||
("i64", ""),
|
||||
("f32", ""),
|
||||
("f64", ""),
|
||||
("str", ""),
|
||||
("binary", ""),
|
||||
("date", ""),
|
||||
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
|
||||
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
|
||||
("time", ""),
|
||||
("object", ""),
|
||||
("unknown", ""),
|
||||
("list<dtype>", ""),
|
||||
]
|
||||
.iter()
|
||||
.map(|(dtype, note)| {
|
||||
Value::record(record! {
|
||||
"dtype" => Value::string(*dtype, span),
|
||||
"note" => Value::string(*note, span),
|
||||
},
|
||||
span)
|
||||
})
|
||||
.collect();
|
||||
Value::list(types, span)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&SchemaCmd)
|
||||
}
|
||||
}
|
94
crates/nu_plugin_polars/src/dataframe/eager/shape.rs
Normal file
94
crates/nu_plugin_polars/src/dataframe/eager/shape.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::Column,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ShapeDF;
|
||||
|
||||
impl PluginCommand for ShapeDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars shape"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Shows column and row size for a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shows row and column shape",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars shape",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("rows".to_string(), vec![Value::test_int(2)]),
|
||||
Column::new("columns".to_string(), vec![Value::test_int(2)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let rows = Value::int(df.as_ref().height() as i64, call.head);
|
||||
|
||||
let cols = Value::int(df.as_ref().width() as i64, call.head);
|
||||
|
||||
let rows_col = Column::new("rows".to_string(), vec![rows]);
|
||||
let cols_col = Column::new("columns".to_string(), vec![cols]);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ShapeDF)
|
||||
}
|
||||
}
|
95
crates/nu_plugin_polars/src/dataframe/eager/slice.rs
Normal file
95
crates/nu_plugin_polars/src/dataframe/eager/slice.rs
Normal file
@ -0,0 +1,95 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::Column,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SliceDF;
|
||||
|
||||
impl PluginCommand for SliceDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars slice"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe from a slice of rows."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("offset", SyntaxShape::Int, "start of slice")
|
||||
.required("size", SyntaxShape::Int, "size of slice")
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let offset: i64 = call.req(0)?;
|
||||
let size: usize = call.req(1)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let res = df.as_ref().slice(offset, size);
|
||||
let res = NuDataFrame::new(false, res);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&SliceDF)
|
||||
}
|
||||
}
|
228
crates/nu_plugin_polars/src/dataframe/eager/sql_context.rs
Normal file
228
crates/nu_plugin_polars/src/dataframe/eager/sql_context.rs
Normal file
@ -0,0 +1,228 @@
|
||||
use crate::dataframe::eager::sql_expr::parse_sql_expr;
|
||||
use polars::error::{ErrString, PolarsError};
|
||||
use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame};
|
||||
use sqlparser::ast::{
|
||||
Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor,
|
||||
Value as SQLValue,
|
||||
};
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::parser::Parser;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SQLContext {
|
||||
table_map: HashMap<String, LazyFrame>,
|
||||
dialect: GenericDialect,
|
||||
}
|
||||
|
||||
impl SQLContext {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
table_map: HashMap::new(),
|
||||
dialect: GenericDialect,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(&mut self, name: &str, df: &DataFrame) {
|
||||
self.table_map.insert(name.to_owned(), df.clone().lazy());
|
||||
}
|
||||
|
||||
fn execute_select(&self, select_stmt: &Select) -> Result<LazyFrame, PolarsError> {
|
||||
// Determine involved dataframe
|
||||
// Implicit join require some more work in query parsers, Explicit join are preferred for now.
|
||||
let tbl = select_stmt.from.first().ok_or_else(|| {
|
||||
PolarsError::ComputeError(ErrString::from("No table found in select statement"))
|
||||
})?;
|
||||
let mut alias_map = HashMap::new();
|
||||
let tbl_name = match &tbl.relation {
|
||||
TableFactor::Table { name, alias, .. } => {
|
||||
let tbl_name = name
|
||||
.0
|
||||
.first()
|
||||
.ok_or_else(|| {
|
||||
PolarsError::ComputeError(ErrString::from(
|
||||
"No table found in select statement",
|
||||
))
|
||||
})?
|
||||
.value
|
||||
.to_string();
|
||||
if self.table_map.contains_key(&tbl_name) {
|
||||
if let Some(alias) = alias {
|
||||
alias_map.insert(alias.name.value.clone(), tbl_name.to_owned());
|
||||
};
|
||||
tbl_name
|
||||
} else {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Table name {tbl_name} was not found").into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
// Support bare table, optional with alias for now
|
||||
_ => return Err(PolarsError::ComputeError("Not implemented".into())),
|
||||
};
|
||||
let df = &self.table_map[&tbl_name];
|
||||
let mut raw_projection_before_alias: HashMap<String, usize> = HashMap::new();
|
||||
let mut contain_wildcard = false;
|
||||
// Filter Expression
|
||||
let df = match select_stmt.selection.as_ref() {
|
||||
Some(expr) => {
|
||||
let filter_expression = parse_sql_expr(expr)?;
|
||||
df.clone().filter(filter_expression)
|
||||
}
|
||||
None => df.clone(),
|
||||
};
|
||||
// Column Projections
|
||||
let projection = select_stmt
|
||||
.projection
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, select_item)| {
|
||||
Ok(match select_item {
|
||||
SelectItem::UnnamedExpr(expr) => {
|
||||
let expr = parse_sql_expr(expr)?;
|
||||
raw_projection_before_alias.insert(format!("{expr:?}"), i);
|
||||
expr
|
||||
}
|
||||
SelectItem::ExprWithAlias { expr, alias } => {
|
||||
let expr = parse_sql_expr(expr)?;
|
||||
raw_projection_before_alias.insert(format!("{expr:?}"), i);
|
||||
expr.alias(&alias.value)
|
||||
}
|
||||
SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => {
|
||||
contain_wildcard = true;
|
||||
col("*")
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, PolarsError>>()?;
|
||||
// Check for group by
|
||||
// After projection since there might be number.
|
||||
let group_by = match &select_stmt.group_by {
|
||||
GroupByExpr::All =>
|
||||
Err(
|
||||
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into())
|
||||
)?,
|
||||
GroupByExpr::Expressions(expressions) => expressions
|
||||
}
|
||||
.iter()
|
||||
.map(
|
||||
|e|match e {
|
||||
SqlExpr::Value(SQLValue::Number(idx, _)) => {
|
||||
let idx = match idx.parse::<usize>() {
|
||||
Ok(0)| Err(_) => Err(
|
||||
PolarsError::ComputeError(
|
||||
format!("Group-By Error: Only positive number or expression are supported, got {idx}").into()
|
||||
)),
|
||||
Ok(idx) => Ok(idx)
|
||||
}?;
|
||||
Ok(projection[idx].clone())
|
||||
}
|
||||
SqlExpr::Value(_) => Err(
|
||||
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into())
|
||||
),
|
||||
_ => parse_sql_expr(e)
|
||||
}
|
||||
)
|
||||
.collect::<Result<Vec<_>, PolarsError>>()?;
|
||||
|
||||
let df = if group_by.is_empty() {
|
||||
df.select(projection)
|
||||
} else {
|
||||
// check groupby and projection due to difference between SQL and polars
|
||||
// Return error on wild card, shouldn't process this
|
||||
if contain_wildcard {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"Group-By Error: Can't process wildcard in group-by".into(),
|
||||
));
|
||||
}
|
||||
// Default polars group by will have group by columns at the front
|
||||
// need some container to contain position of group by columns and its position
|
||||
// at the final agg projection, check the schema for the existence of group by column
|
||||
// and its projections columns, keeping the original index
|
||||
let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by
|
||||
.iter()
|
||||
.map(|expr| raw_projection_before_alias.get(&format!("{expr:?}")))
|
||||
.enumerate()
|
||||
.filter(|(_, proj_p)| proj_p.is_some())
|
||||
.map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p)))
|
||||
.unzip();
|
||||
let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| !exclude_expr.contains(i))
|
||||
.enumerate()
|
||||
.map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len())))
|
||||
.unzip();
|
||||
let agg_df = df.group_by(group_by).agg(agg_projection);
|
||||
let mut final_proj_pos = groupby_pos
|
||||
.into_iter()
|
||||
.chain(agg_proj_pos)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb));
|
||||
let final_proj = final_proj_pos
|
||||
.into_iter()
|
||||
.map(|(_, shm_p)| {
|
||||
col(agg_df
|
||||
.clone()
|
||||
// FIXME: had to do this mess to get get_index to work, not sure why. need help
|
||||
.collect()
|
||||
.unwrap_or_default()
|
||||
.schema()
|
||||
.get_at_index(shm_p)
|
||||
.unwrap_or((&"".into(), &DataType::Null))
|
||||
.0)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
agg_df.select(final_proj)
|
||||
};
|
||||
Ok(df)
|
||||
}
|
||||
|
||||
pub fn execute(&self, query: &str) -> Result<LazyFrame, PolarsError> {
|
||||
let ast = Parser::parse_sql(&self.dialect, query)
|
||||
.map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?;
|
||||
if ast.len() != 1 {
|
||||
Err(PolarsError::ComputeError(
|
||||
"One and only one statement at a time please".into(),
|
||||
))
|
||||
} else {
|
||||
let ast = ast
|
||||
.first()
|
||||
.ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?;
|
||||
Ok(match ast {
|
||||
Statement::Query(query) => {
|
||||
let rs = match &*query.body {
|
||||
SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"INSERT, UPDATE is not supported for polars".into(),
|
||||
))
|
||||
}
|
||||
};
|
||||
match &query.limit {
|
||||
Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => {
|
||||
let nrow = nrow.parse().map_err(|err| {
|
||||
PolarsError::ComputeError(
|
||||
format!("Conversion Error: {err:?}").into(),
|
||||
)
|
||||
})?;
|
||||
rs.limit(nrow)
|
||||
}
|
||||
None => rs,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"Only support number argument to LIMIT clause".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Statement type {ast:?} is not supported").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
200
crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs
Normal file
200
crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs
Normal file
@ -0,0 +1,200 @@
|
||||
use polars::error::PolarsError;
|
||||
use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit};
|
||||
|
||||
use sqlparser::ast::{
|
||||
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
|
||||
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType,
|
||||
};
|
||||
|
||||
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
|
||||
Ok(match data_type {
|
||||
SQLDataType::Char(_)
|
||||
| SQLDataType::Varchar(_)
|
||||
| SQLDataType::Uuid
|
||||
| SQLDataType::Clob(_)
|
||||
| SQLDataType::Text
|
||||
| SQLDataType::String(_) => DataType::String,
|
||||
SQLDataType::Float(_) => DataType::Float32,
|
||||
SQLDataType::Real => DataType::Float32,
|
||||
SQLDataType::Double => DataType::Float64,
|
||||
SQLDataType::TinyInt(_) => DataType::Int8,
|
||||
SQLDataType::UnsignedTinyInt(_) => DataType::UInt8,
|
||||
SQLDataType::SmallInt(_) => DataType::Int16,
|
||||
SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
|
||||
SQLDataType::Int(_) => DataType::Int32,
|
||||
SQLDataType::UnsignedInt(_) => DataType::UInt32,
|
||||
SQLDataType::BigInt(_) => DataType::Int64,
|
||||
SQLDataType::UnsignedBigInt(_) => DataType::UInt64,
|
||||
|
||||
SQLDataType::Boolean => DataType::Boolean,
|
||||
SQLDataType::Date => DataType::Date,
|
||||
SQLDataType::Time(_, _) => DataType::Time,
|
||||
SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None),
|
||||
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
|
||||
SQLDataType::Array(array_type_def) => match array_type_def {
|
||||
ArrayElemTypeDef::AngleBracket(inner_type)
|
||||
| ArrayElemTypeDef::SquareBracket(inner_type) => {
|
||||
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
|
||||
}
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
"SQL Datatype Array(None) was not supported in polars-sql yet!".into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_(expr: Expr, data_type: &SQLDataType) -> Result<Expr> {
|
||||
let polars_type = map_sql_polars_datatype(data_type)?;
|
||||
Ok(expr.cast(polars_type))
|
||||
}
|
||||
|
||||
fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result<Expr> {
|
||||
Ok(match op {
|
||||
SQLBinaryOperator::Plus => left + right,
|
||||
SQLBinaryOperator::Minus => left - right,
|
||||
SQLBinaryOperator::Multiply => left * right,
|
||||
SQLBinaryOperator::Divide => left / right,
|
||||
SQLBinaryOperator::Modulo => left % right,
|
||||
SQLBinaryOperator::StringConcat => {
|
||||
left.cast(DataType::String) + right.cast(DataType::String)
|
||||
}
|
||||
SQLBinaryOperator::Gt => left.gt(right),
|
||||
SQLBinaryOperator::Lt => left.lt(right),
|
||||
SQLBinaryOperator::GtEq => left.gt_eq(right),
|
||||
SQLBinaryOperator::LtEq => left.lt_eq(right),
|
||||
SQLBinaryOperator::Eq => left.eq(right),
|
||||
SQLBinaryOperator::NotEq => left.eq(right).not(),
|
||||
SQLBinaryOperator::And => left.and(right),
|
||||
SQLBinaryOperator::Or => left.or(right),
|
||||
SQLBinaryOperator::Xor => left.xor(right),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn literal_expr(value: &SqlValue) -> Result<Expr> {
|
||||
Ok(match value {
|
||||
SqlValue::Number(s, _) => {
|
||||
// Check for existence of decimal separator dot
|
||||
if s.contains('.') {
|
||||
s.parse::<f64>().map(lit).map_err(|_| {
|
||||
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
|
||||
})
|
||||
} else {
|
||||
s.parse::<i64>().map(lit).map_err(|_| {
|
||||
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
|
||||
})
|
||||
}?
|
||||
}
|
||||
SqlValue::SingleQuotedString(s) => lit(s.clone()),
|
||||
SqlValue::NationalStringLiteral(s) => lit(s.clone()),
|
||||
SqlValue::HexStringLiteral(s) => lit(s.clone()),
|
||||
SqlValue::DoubleQuotedString(s) => lit(s.clone()),
|
||||
SqlValue::Boolean(b) => lit(*b),
|
||||
SqlValue::Null => Expr::Literal(LiteralValue::Null),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
|
||||
Ok(match expr {
|
||||
SqlExpr::Identifier(e) => col(&e.value),
|
||||
SqlExpr::BinaryOp { left, op, right } => {
|
||||
let left = parse_sql_expr(left)?;
|
||||
let right = parse_sql_expr(right)?;
|
||||
binary_op_(left, right, op)?
|
||||
}
|
||||
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
|
||||
SqlExpr::Cast {
|
||||
expr,
|
||||
data_type,
|
||||
format: _,
|
||||
} => cast_(parse_sql_expr(expr)?, data_type)?,
|
||||
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
|
||||
SqlExpr::Value(value) => literal_expr(value)?,
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result<Expr> {
|
||||
Ok(match &window_type {
|
||||
Some(wtype) => match wtype {
|
||||
WindowType::WindowSpec(window_spec) => {
|
||||
// Process for simple window specification, partition by first
|
||||
let partition_by = window_spec
|
||||
.partition_by
|
||||
.iter()
|
||||
.map(parse_sql_expr)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
expr.over(partition_by)
|
||||
// Order by and Row range may not be supported at the moment
|
||||
}
|
||||
// TODO: make NamedWindow work
|
||||
WindowType::NamedWindow(_named) => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
None => expr,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
|
||||
use sqlparser::ast::{FunctionArg, FunctionArgExpr};
|
||||
// Function name mostly do not have name space, so it mostly take the first args
|
||||
let function_name = sql_function.name.0[0].value.to_ascii_lowercase();
|
||||
let args = sql_function
|
||||
.args
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
FunctionArg::Named { arg, .. } => arg,
|
||||
FunctionArg::Unnamed(arg) => arg,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(
|
||||
match (
|
||||
function_name.as_str(),
|
||||
args.as_slice(),
|
||||
sql_function.distinct,
|
||||
) {
|
||||
("sum", [FunctionArgExpr::Expr(expr)], false) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum()
|
||||
}
|
||||
("count", [FunctionArgExpr::Expr(expr)], false) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count()
|
||||
}
|
||||
("count", [FunctionArgExpr::Expr(expr)], true) => {
|
||||
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique()
|
||||
}
|
||||
// Special case for wildcard args to count function.
|
||||
("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(),
|
||||
_ => {
|
||||
return Err(PolarsError::ComputeError(
|
||||
format!(
|
||||
"Function {function_name:?} with args {args:?} was not supported in polars-sql yet!"
|
||||
)
|
||||
.into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
293
crates/nu_plugin_polars/src/dataframe/eager/summary.rs
Normal file
293
crates/nu_plugin_polars/src/dataframe/eager/summary.rs
Normal file
@ -0,0 +1,293 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::{
|
||||
chunked_array::ChunkedArray,
|
||||
prelude::{
|
||||
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
|
||||
QuantileInterpolOptions, Series, StringType,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Summary;
|
||||
|
||||
impl PluginCommand for Summary {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars summary"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.named(
|
||||
"quantiles",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"provide optional quantiles",
|
||||
Some('q'),
|
||||
)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "list dataframe descriptives",
|
||||
example: "[[a b]; [1 1] [1 1]] | polars into-df | polars summary",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"descriptor".to_string(),
|
||||
vec![
|
||||
Value::test_string("count"),
|
||||
Value::test_string("sum"),
|
||||
Value::test_string("mean"),
|
||||
Value::test_string("median"),
|
||||
Value::test_string("std"),
|
||||
Value::test_string("min"),
|
||||
Value::test_string("25%"),
|
||||
Value::test_string("50%"),
|
||||
Value::test_string("75%"),
|
||||
Value::test_string("max"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"a (i64)".to_string(),
|
||||
vec![
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(0.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b (i64)".to_string(),
|
||||
vec![
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(2.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(0.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
Value::test_float(1.0),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let quantiles: Option<Vec<Value>> = call.get_flag("quantiles")?;
|
||||
let quantiles = quantiles.map(|values| {
|
||||
values
|
||||
.iter()
|
||||
.map(|value| {
|
||||
let span = value.span();
|
||||
match value {
|
||||
Value::Float { val, .. } => {
|
||||
if (&0.0..=&1.0).contains(&val) {
|
||||
Ok(*val)
|
||||
} else {
|
||||
Err(ShellError::GenericError {
|
||||
error: "Incorrect value for quantile".into(),
|
||||
msg: "value should be between 0 and 1".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
Value::Error { error, .. } => Err(*error.clone()),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect value for quantile".into(),
|
||||
msg: "value should be a float".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<f64>, ShellError>>()
|
||||
});
|
||||
|
||||
let quantiles = match quantiles {
|
||||
Some(quantiles) => quantiles?,
|
||||
None => vec![0.25, 0.50, 0.75],
|
||||
};
|
||||
|
||||
let mut quantiles_labels = quantiles
|
||||
.iter()
|
||||
.map(|q| Some(format!("{}%", q * 100.0)))
|
||||
.collect::<Vec<Option<String>>>();
|
||||
let mut labels = vec![
|
||||
Some("count".to_string()),
|
||||
Some("sum".to_string()),
|
||||
Some("mean".to_string()),
|
||||
Some("median".to_string()),
|
||||
Some("std".to_string()),
|
||||
Some("min".to_string()),
|
||||
];
|
||||
labels.append(&mut quantiles_labels);
|
||||
labels.push(Some("max".to_string()));
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let names = ChunkedArray::<StringType>::from_slice_options("descriptor", &labels).into_series();
|
||||
|
||||
let head = std::iter::once(names);
|
||||
|
||||
let tail = df
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _)))
|
||||
.map(|col| {
|
||||
let count = col.len() as f64;
|
||||
|
||||
let sum = col.sum_as_series().ok().and_then(|series| {
|
||||
series
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
});
|
||||
|
||||
let mean = match col.mean_as_series().get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let median = match col.median_as_series() {
|
||||
Ok(v) => match v.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let std = match col.std_as_series(0) {
|
||||
Ok(v) => match v.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let min = col.min_as_series().ok().and_then(|series| {
|
||||
series
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
});
|
||||
|
||||
let mut quantiles = quantiles
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|q| {
|
||||
col.quantile_as_series(q, QuantileInterpolOptions::default())
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<Option<f64>>>();
|
||||
|
||||
let max = col.max_as_series().ok().and_then(|series| {
|
||||
series
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
Ok(AnyValue::Float64(v)) => Some(v),
|
||||
_ => None,
|
||||
})
|
||||
});
|
||||
|
||||
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
|
||||
descriptors.append(&mut quantiles);
|
||||
descriptors.push(max);
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
|
||||
let polars_df = DataFrame::new(res).map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&Summary)
|
||||
}
|
||||
}
|
162
crates/nu_plugin_polars/src/dataframe/eager/take.rs
Normal file
162
crates/nu_plugin_polars/src/dataframe/eager/take.rs
Normal file
@ -0,0 +1,162 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::Column,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TakeDF;
|
||||
|
||||
impl PluginCommand for TakeDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars take"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe using the given indices."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"indices",
|
||||
SyntaxShape::Any,
|
||||
"list of indices used to take data",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes selected rows from dataframe",
|
||||
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | polars into-df);
|
||||
let indices = ([0 2] | polars into-df);
|
||||
$df | polars take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
|
||||
let indices = ([0 2] | polars into-df);
|
||||
$series | polars take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let index_value: Value = call.req(0)?;
|
||||
let index_span = index_value.span();
|
||||
let index = NuDataFrame::try_from_value(plugin, &index_value)?.as_series(index_span)?;
|
||||
|
||||
let casted = match index.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => index
|
||||
.cast(&DataType::UInt32)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting index list".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(index_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect type".into(),
|
||||
msg: "Series with incorrect type".into(),
|
||||
span: Some(call.head),
|
||||
help: Some("Consider using a Series with type int type".into()),
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
|
||||
let indices = casted.u32().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting index list".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(index_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let polars_df = df
|
||||
.to_polars()
|
||||
.take(indices)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error taking values".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&TakeDF)
|
||||
}
|
||||
}
|
87
crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs
Normal file
87
crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use polars::prelude::{IpcWriter, SerWriter};
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToArrow;
|
||||
|
||||
impl PluginCommand for ToArrow {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars to-arrow"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to arrow file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to arrow file",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-arrow test.arrow",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, call, input).map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error with file name".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
IpcWriter::new(&mut file)
|
||||
.finish(&mut df.to_polars())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error saving file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::list(vec![file_value], call.head),
|
||||
None,
|
||||
))
|
||||
}
|
117
crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs
Normal file
117
crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs
Normal file
@ -0,0 +1,117 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use polars_io::avro::{AvroCompression, AvroWriter};
|
||||
use polars_io::SerWriter;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToAvro;
|
||||
|
||||
impl PluginCommand for ToAvro {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars to-avro"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to avro file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"compression",
|
||||
SyntaxShape::String,
|
||||
"use compression, supports deflate or snappy",
|
||||
Some('c'),
|
||||
)
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to avro file",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-avro test.avro",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_compression(call: &EvaluatedCall) -> Result<Option<AvroCompression>, ShellError> {
|
||||
if let Some((compression, span)) = call
|
||||
.get_flag_value("compression")
|
||||
.map(|e| e.as_str().map(|s| (s.to_owned(), e.span())))
|
||||
.transpose()?
|
||||
{
|
||||
match compression.as_ref() {
|
||||
"snappy" => Ok(Some(AvroCompression::Snappy)),
|
||||
"deflate" => Ok(Some(AvroCompression::Deflate)),
|
||||
_ => Err(ShellError::IncorrectValue {
|
||||
msg: "compression must be one of deflate or snappy".to_string(),
|
||||
val_span: span,
|
||||
call_span: span,
|
||||
}),
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(0)?;
|
||||
let compression = get_compression(call)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error with file name".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
AvroWriter::new(file)
|
||||
.with_compression(compression)
|
||||
.finish(&mut df.to_polars())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error saving file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::list(vec![file_value], call.head),
|
||||
None,
|
||||
))
|
||||
}
|
133
crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs
Normal file
133
crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs
Normal file
@ -0,0 +1,133 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use polars::prelude::{CsvWriter, SerWriter};
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToCSV;
|
||||
|
||||
impl PluginCommand for ToCSV {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars to-csv"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to CSV file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character",
|
||||
Some('d'),
|
||||
)
|
||||
.switch("no-header", "Indicates if file doesn't have header", None)
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Saves dataframe to CSV file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Saves dataframe to CSV file using other delimiter",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv --delimiter '|'",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, call, input).map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(0)?;
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error with file name".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let writer = CsvWriter::new(&mut file);
|
||||
|
||||
let writer = if no_header {
|
||||
writer.include_header(false)
|
||||
} else {
|
||||
writer.include_header(true)
|
||||
};
|
||||
|
||||
let mut writer = match delimiter {
|
||||
None => writer,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Incorrect delimiter".into(),
|
||||
msg: "Delimiter has to be one char".into(),
|
||||
span: Some(d.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
|
||||
writer.with_separator(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
writer
|
||||
.finish(&mut df.to_polars())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error writing to file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::list(vec![file_value], call.head),
|
||||
None,
|
||||
))
|
||||
}
|
201
crates/nu_plugin_polars/src/dataframe/eager/to_df.rs
Normal file
201
crates/nu_plugin_polars/src/dataframe/eager/to_df.rs
Normal file
@ -0,0 +1,201 @@
|
||||
use crate::{
|
||||
dataframe::values::NuSchema,
|
||||
values::{to_pipeline_data, Column, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{AnyValue, DataType, Field, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToDataFrame;
|
||||
|
||||
impl PluginCommand for ToDataFrame {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars into-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a list, table or record into a dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | polars into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | polars into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
|
||||
),
|
||||
Column::new(
|
||||
"1".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"2".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | polars into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[true true false] | polars into-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Convert to a dataframe and provide a schema",
|
||||
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series_vec(vec![
|
||||
Series::new("a", &[1u8]),
|
||||
{
|
||||
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
|
||||
let vals = vec![AnyValue::StructOwned(
|
||||
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
|
||||
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
|
||||
.expect("Struct series should not fail")
|
||||
},
|
||||
{
|
||||
let dtype = DataType::List(Box::new(DataType::String));
|
||||
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
|
||||
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
|
||||
.expect("List series should not fail")
|
||||
}
|
||||
], Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Convert to a dataframe and provide a schema that adds a new column",
|
||||
example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: u8, b:str, c:i64} | polars fill-null 3"#,
|
||||
result: Some(NuDataFrame::try_from_series_vec(vec![
|
||||
Series::new("a", [1u8, 2]),
|
||||
Series::new("b", ["foo", "bar"]),
|
||||
Series::new("c", [3i64, 3]),
|
||||
], Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?;
|
||||
to_pipeline_data(plugin, engine, call.head, df).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_into_df() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ToDataFrame)
|
||||
}
|
||||
}
|
89
crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs
Normal file
89
crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs
Normal file
@ -0,0 +1,89 @@
|
||||
use std::{fs::File, io::BufWriter, path::PathBuf};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use polars::prelude::{JsonWriter, SerWriter};
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToJsonLines;
|
||||
|
||||
impl PluginCommand for ToJsonLines {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars to-jsonl"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to a JSON lines file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to JSON lines file",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-jsonl test.jsonl",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error with file name".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let buf_writer = BufWriter::new(file);
|
||||
|
||||
JsonWriter::new(buf_writer)
|
||||
.finish(&mut df.to_polars())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error saving file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::list(vec![file_value], call.head),
|
||||
None,
|
||||
))
|
||||
}
|
144
crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs
Normal file
144
crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{dataframe::values::NuExpression, values::CustomValueSupport, PolarsPlugin};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToNu;
|
||||
|
||||
impl PluginCommand for ToNu {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars into-nu"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a dataframe or an expression into into nushell value for access and exploration."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"rows",
|
||||
SyntaxShape::Number,
|
||||
"number of rows to be shown",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("tail", "shows tail rows", Some('t'))
|
||||
.input_output_types(vec![
|
||||
(Type::Custom("expression".into()), Type::Any),
|
||||
(Type::Custom("dataframe".into()), Type::Table(vec![])),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
let rec_1 = Value::test_record(record! {
|
||||
"index" => Value::test_int(0),
|
||||
"a" => Value::test_int(1),
|
||||
"b" => Value::test_int(2),
|
||||
});
|
||||
let rec_2 = Value::test_record(record! {
|
||||
"index" => Value::test_int(1),
|
||||
"a" => Value::test_int(3),
|
||||
"b" => Value::test_int(4),
|
||||
});
|
||||
let rec_3 = Value::test_record(record! {
|
||||
"index" => Value::test_int(2),
|
||||
"a" => Value::test_int(3),
|
||||
"b" => Value::test_int(4),
|
||||
});
|
||||
|
||||
vec![
|
||||
Example {
|
||||
description: "Shows head rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars into-nu",
|
||||
result: Some(Value::list(vec![rec_1, rec_2], Span::test_data())),
|
||||
},
|
||||
Example {
|
||||
description: "Shows tail rows from dataframe",
|
||||
example:
|
||||
"[[a b]; [1 2] [5 6] [3 4]] | polars into-df | polars into-nu --tail --rows 1",
|
||||
result: Some(Value::list(vec![rec_3], Span::test_data())),
|
||||
},
|
||||
Example {
|
||||
description: "Convert a col expression into a nushell value",
|
||||
example: "polars col a | polars into-nu",
|
||||
result: Some(Value::test_record(record! {
|
||||
"expr" => Value::test_string("column"),
|
||||
"value" => Value::test_string("a"),
|
||||
})),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) {
|
||||
dataframe_command(plugin, call, value)
|
||||
} else {
|
||||
expression_command(plugin, call, value)
|
||||
}
|
||||
.map_err(|e| e.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn dataframe_command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.get_flag("rows")?;
|
||||
let tail: bool = call.has_flag("tail")?;
|
||||
|
||||
let df = NuDataFrame::try_from_value(plugin, &input)?;
|
||||
|
||||
let values = if tail {
|
||||
df.tail(rows, call.head)?
|
||||
} else {
|
||||
// if rows is specified, return those rows, otherwise return everything
|
||||
if rows.is_some() {
|
||||
df.head(rows, call.head)?
|
||||
} else {
|
||||
df.head(Some(df.height()), call.head)?
|
||||
}
|
||||
};
|
||||
|
||||
let value = Value::list(values, call.head);
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
fn expression_command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = NuExpression::try_from_value(plugin, &input)?;
|
||||
let value = expr.to_value(call.head)?;
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ToNu)
|
||||
}
|
||||
}
|
87
crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs
Normal file
87
crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use polars::prelude::ParquetWriter;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToParquet;
|
||||
|
||||
impl PluginCommand for ToParquet {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars to-parquet"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to parquet file."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to parquet file",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-parquet test.parquet",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
_engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
|
||||
error: "Error with file name".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let mut polars_df = df.to_polars();
|
||||
ParquetWriter::new(file)
|
||||
.finish(&mut polars_df)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error saving file".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(file_name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::list(vec![file_value], call.head),
|
||||
None,
|
||||
))
|
||||
}
|
195
crates/nu_plugin_polars/src/dataframe/eager/with_column.rs
Normal file
195
crates/nu_plugin_polars/src/dataframe/eager/with_column.rs
Normal file
@ -0,0 +1,195 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use crate::{
|
||||
dataframe::values::{NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WithColumn;
|
||||
|
||||
impl PluginCommand for WithColumn {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars with-column"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Adds a series to the dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named("name", SyntaxShape::String, "new column name", Some('n'))
|
||||
.rest(
|
||||
"series or expressions",
|
||||
SyntaxShape::Any,
|
||||
"series to be added or expressions used to define the new columns",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| polars into-df
|
||||
| polars with-column ([5 6] | polars into-df) --name c"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| polars into-lazy
|
||||
| polars with-column [
|
||||
((polars col a) * 2 | polars as "c")
|
||||
((polars col a) * 3 | polars as "d")
|
||||
]
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(9)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||
_ => Err(ShellError::CantConvert {
|
||||
to_type: "lazy or eager dataframe".into(),
|
||||
from_type: value.get_type().to_string(),
|
||||
span: value.span(),
|
||||
help: None,
|
||||
}),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let new_column: Value = call.req(0)?;
|
||||
let column_span = new_column.span();
|
||||
|
||||
if NuExpression::can_downcast(&new_column) {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, value)?;
|
||||
let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions));
|
||||
let df = lazy.collect(call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
} else {
|
||||
let mut other = NuDataFrame::try_from_value(plugin, &new_column)?.as_series(column_span)?;
|
||||
|
||||
let name = match call.get_flag::<String>("name")? {
|
||||
Some(name) => name,
|
||||
None => other.name().to_string(),
|
||||
};
|
||||
|
||||
let series = other.rename(&name).clone();
|
||||
|
||||
let mut polars_df = df.to_polars();
|
||||
polars_df
|
||||
.with_column(series)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error adding column to dataframe".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(column_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::new(df.from_lazy, polars_df);
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, value)?;
|
||||
let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into();
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&WithColumn)
|
||||
}
|
||||
}
|
88
crates/nu_plugin_polars/src/dataframe/expressions/alias.rs
Normal file
88
crates/nu_plugin_polars/src/dataframe/expressions/alias.rs
Normal file
@ -0,0 +1,88 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::NuExpression;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprAlias;
|
||||
|
||||
impl PluginCommand for ExprAlias {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars as"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an alias expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"Alias name",
|
||||
SyntaxShape::String,
|
||||
"Alias name for the expression",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates and alias expression",
|
||||
example: "polars col a | polars as new_a | polars into-nu",
|
||||
result: {
|
||||
let record = Value::test_record(record! {
|
||||
"expr" => Value::test_record(record! {
|
||||
"expr" => Value::test_string("column"),
|
||||
"value" => Value::test_string("a"),
|
||||
}),
|
||||
"alias" => Value::test_string("new_a"),
|
||||
});
|
||||
|
||||
Some(record)
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["aka", "abbr", "otherwise"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let alias: String = call.req(0)?;
|
||||
|
||||
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
|
||||
let expr: NuExpression = expr.to_polars().alias(alias.as_str()).into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprAlias)
|
||||
}
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::arg_where;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprArgWhere;
|
||||
|
||||
impl PluginCommand for ExprArgWhere {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-where"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an expression that returns the arguments where expression is true."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("column name", SyntaxShape::Any, "Expression to evaluate")
|
||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Return a dataframe where the value match the expression",
|
||||
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
|
||||
$df | polars select (polars arg-where ((polars col b) >= 2) | polars as b_arg)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"b_arg".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["condition", "match", "if"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value: Value = call.req(0)?;
|
||||
let expr = NuExpression::try_from_value(plugin, &value)?;
|
||||
let expr: NuExpression = arg_where(expr.to_polars()).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprArgWhere)
|
||||
}
|
||||
}
|
70
crates/nu_plugin_polars/src/dataframe/expressions/col.rs
Normal file
70
crates/nu_plugin_polars/src/dataframe/expressions/col.rs
Normal file
@ -0,0 +1,70 @@
|
||||
use crate::{dataframe::values::NuExpression, values::to_pipeline_data, PolarsPlugin};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::col;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprCol;
|
||||
|
||||
impl PluginCommand for ExprCol {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars col"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a named column expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"column name",
|
||||
SyntaxShape::String,
|
||||
"Name of column to be used",
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a named column expression and converts it to a nu object",
|
||||
example: "polars col a | polars into-nu",
|
||||
result: Some(Value::test_record(record! {
|
||||
"expr" => Value::test_string("column"),
|
||||
"value" => Value::test_string("a"),
|
||||
})),
|
||||
}]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["create"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let name: String = call.req(0)?;
|
||||
let expr: NuExpression = col(name.as_str()).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprCol)
|
||||
}
|
||||
}
|
109
crates/nu_plugin_polars/src/dataframe/expressions/concat_str.rs
Normal file
109
crates/nu_plugin_polars/src/dataframe/expressions/concat_str.rs
Normal file
@ -0,0 +1,109 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::concat_str;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprConcatStr;
|
||||
|
||||
impl PluginCommand for ExprConcatStr {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars concat-str"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a concat string expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"separator",
|
||||
SyntaxShape::String,
|
||||
"Separator used during the concatenation",
|
||||
)
|
||||
.required(
|
||||
"concat expressions",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Any)),
|
||||
"Expression(s) that define the string concatenation",
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a concat string expression",
|
||||
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | polars into-df);
|
||||
$df | polars with-column ((polars concat-str "-" [(polars col a) (polars col b) ((polars col c) * 2)]) | polars as concat)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("three")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_string("two"), Value::test_string("four")],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"concat".to_string(),
|
||||
vec![
|
||||
Value::test_string("one-two-2"),
|
||||
Value::test_string("three-four-4"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["join", "connect", "update"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let separator: String = call.req(0)?;
|
||||
let value: Value = call.req(1)?;
|
||||
|
||||
let expressions = NuExpression::extract_exprs(plugin, value)?;
|
||||
let expr: NuExpression = concat_str(expressions, &separator, false).into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprConcatStr)
|
||||
}
|
||||
}
|
165
crates/nu_plugin_polars/src/dataframe/expressions/datepart.rs
Normal file
165
crates/nu_plugin_polars/src/dataframe/expressions/datepart.rs
Normal file
@ -0,0 +1,165 @@
|
||||
use super::super::values::NuExpression;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use chrono::{DateTime, FixedOffset};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::{
|
||||
datatypes::{DataType, TimeUnit},
|
||||
prelude::NamedFrom,
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprDatePart;
|
||||
|
||||
impl PluginCommand for ExprDatePart {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars datepart"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an expression for capturing the specified datepart in a column."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"Datepart name",
|
||||
SyntaxShape::String,
|
||||
"Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
let dt = DateTime::<FixedOffset>::parse_from_str(
|
||||
"2021-12-30T01:02:03.123456789 +0000",
|
||||
"%Y-%m-%dT%H:%M:%S.%9f %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test");
|
||||
vec![
|
||||
Example {
|
||||
description: "Creates an expression to capture the year date part",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
|
||||
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates an expression to capture multiple date parts",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
|
||||
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
|
||||
(polars col datetime | polars datepart month | polars as datetime_month ),
|
||||
(polars col datetime | polars datepart day | polars as datetime_day ),
|
||||
(polars col datetime | polars datepart hour | polars as datetime_hour ),
|
||||
(polars col datetime | polars datepart minute | polars as datetime_minute ),
|
||||
(polars col datetime | polars datepart second | polars as datetime_second ),
|
||||
(polars col datetime | polars datepart nanosecond | polars as datetime_ns ) ]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series_vec(
|
||||
vec![
|
||||
Series::new("datetime", &[dt.timestamp_nanos_opt()])
|
||||
.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))
|
||||
.expect("Error casting to datetime type"),
|
||||
Series::new("datetime_year", &[2021_i64]), // i32 was coerced to i64
|
||||
Series::new("datetime_month", &[12_i8]),
|
||||
Series::new("datetime_day", &[30_i8]),
|
||||
Series::new("datetime_hour", &[1_i8]),
|
||||
Series::new("datetime_minute", &[2_i8]),
|
||||
Series::new("datetime_second", &[3_i8]),
|
||||
Series::new("datetime_ns", &[123456789_i64]), // i32 was coerced to i64
|
||||
],
|
||||
Span::test_data(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec![
|
||||
"year",
|
||||
"month",
|
||||
"week",
|
||||
"weekday",
|
||||
"quarter",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"millisecond",
|
||||
"microsecond",
|
||||
"nanosecond",
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let part: Spanned<String> = call.req(0)?;
|
||||
|
||||
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
|
||||
let expr_dt = expr.to_polars().dt();
|
||||
let expr: NuExpression = match part.item.as_str() {
|
||||
"year" => expr_dt.year(),
|
||||
"quarter" => expr_dt.quarter(),
|
||||
"month" => expr_dt.month(),
|
||||
"week" => expr_dt.week(),
|
||||
"day" => expr_dt.day(),
|
||||
"hour" => expr_dt.hour(),
|
||||
"minute" => expr_dt.minute(),
|
||||
"second" => expr_dt.second(),
|
||||
"millisecond" => expr_dt.millisecond(),
|
||||
"microsecond" => expr_dt.microsecond(),
|
||||
"nanosecond" => expr_dt.nanosecond(),
|
||||
_ => {
|
||||
return Err(LabeledError::from(ShellError::UnsupportedInput {
|
||||
msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item),
|
||||
input: "value originates from here".to_string(),
|
||||
msg_span: call.head,
|
||||
input_span: part.span,
|
||||
}))
|
||||
}
|
||||
}.into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ExprDatePart)
|
||||
}
|
||||
}
|
@ -0,0 +1,645 @@
|
||||
/// Definition of multiple Expression commands using a macro rule
|
||||
/// All of these expressions have an identical body and only require
|
||||
/// to have a change in the name, description and expression function
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
// The structs defined in this file are structs that form part of other commands
|
||||
// since they share a similar name
|
||||
macro_rules! expr_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let expr: NuExpression = expr.to_polars().$func().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
.plugin_examples($examples)
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let expr: NuExpression = expr.into_polars().$func($ddof).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// The structs defined in this file are structs that form part of other commands
|
||||
// since they share a similar name
|
||||
macro_rules! lazy_expr_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.usage($desc)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.$func()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})
|
||||
.map_err(LabeledError::from)?,
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
} else {
|
||||
let expr =
|
||||
NuExpression::try_from_value(plugin, &value).map_err(LabeledError::from)?;
|
||||
let expr: NuExpression = expr.to_polars().$func().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.$func($ddof)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})
|
||||
.map_err(LabeledError::from)?,
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
} else {
|
||||
let expr = NuExpression::try_from_value(plugin, &value)?;
|
||||
let expr: NuExpression = expr.to_polars().$func($ddof).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ExprList command
|
||||
// Expands to a command definition for a list expression
|
||||
expr_command!(
|
||||
ExprList,
|
||||
"polars implode",
|
||||
"Aggregates a group to a Series.",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
implode,
|
||||
test_implode
|
||||
);
|
||||
|
||||
// ExprAggGroups command
|
||||
// Expands to a command definition for a agg groups expression
|
||||
expr_command!(
|
||||
ExprAggGroups,
|
||||
"polars agg-groups",
|
||||
"Creates an agg_groups expression.",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
agg_groups,
|
||||
test_groups
|
||||
);
|
||||
|
||||
// ExprCount command
|
||||
// Expands to a command definition for a count expression
|
||||
expr_command!(
|
||||
ExprCount,
|
||||
"polars count",
|
||||
"Creates a count expression.",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
count,
|
||||
test_count
|
||||
);
|
||||
|
||||
// ExprNot command
|
||||
// Expands to a command definition for a not expression
|
||||
expr_command!(
|
||||
ExprNot,
|
||||
"polars expr-not",
|
||||
"Creates a not expression.",
|
||||
vec![Example {
|
||||
description: "Creates a not expression",
|
||||
example: "(polars col a) > 2) | polars expr-not",
|
||||
result: None,
|
||||
},],
|
||||
not,
|
||||
test_not
|
||||
);
|
||||
|
||||
// ExprMax command
|
||||
// Expands to a command definition for max aggregation
|
||||
lazy_expr_command!(
|
||||
ExprMax,
|
||||
"polars max",
|
||||
"Creates a max expression or aggregates columns to their max value.",
|
||||
vec![
|
||||
Example {
|
||||
description: "Max value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars max",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(6)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Max aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars max)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(1)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
max,
|
||||
test_max
|
||||
);
|
||||
|
||||
// ExprMin command
|
||||
// Expands to a command definition for min aggregation
|
||||
lazy_expr_command!(
|
||||
ExprMin,
|
||||
"polars min",
|
||||
"Creates a min expression or aggregates columns to their min value.",
|
||||
vec![
|
||||
Example {
|
||||
description: "Min value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars min",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(1)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Min aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars min)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(1)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
min,
|
||||
test_min
|
||||
);
|
||||
|
||||
// ExprSum command
|
||||
// Expands to a command definition for sum aggregation
|
||||
lazy_expr_command!(
|
||||
ExprSum,
|
||||
"polars sum",
|
||||
"Creates a sum expression for an aggregation or aggregates columns to their sum value.",
|
||||
vec![
|
||||
Example {
|
||||
description: "Sums all columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(11)],),
|
||||
Column::new("b".to_string(), vec![Value::test_int(7)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Sum aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars sum)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
sum,
|
||||
test_sum
|
||||
);
|
||||
|
||||
// ExprMean command
|
||||
// Expands to a command definition for mean aggregation
|
||||
lazy_expr_command!(
|
||||
ExprMean,
|
||||
"polars mean",
|
||||
"Creates a mean expression for an aggregation or aggregates columns to their mean value.",
|
||||
vec![
|
||||
Example {
|
||||
description: "Mean value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars mean",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Mean aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars mean)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(3.0), Value::test_float(1.0)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
mean,
|
||||
test_mean
|
||||
);
|
||||
|
||||
// ExprStd command
|
||||
// Expands to a command definition for std aggregation
|
||||
lazy_expr_command!(
|
||||
ExprStd,
|
||||
"polars std",
|
||||
"Creates a std expression for an aggregation of std value from columns in a dataframe.",
|
||||
vec![
|
||||
Example {
|
||||
description: "Std value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars std",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Std aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars std)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(0.0), Value::test_float(0.0)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
std,
|
||||
test_std,
|
||||
1
|
||||
);
|
||||
|
||||
// ExprVar command
|
||||
// Expands to a command definition for var aggregation
|
||||
lazy_expr_command!(
|
||||
ExprVar,
|
||||
"polars var",
|
||||
"Create a var expression for an aggregation.",
|
||||
vec![
|
||||
Example {
|
||||
description:
|
||||
"Var value from columns in a dataframe or aggregates columns to their var value",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars var",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
|
||||
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Var aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars var)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(0.0), Value::test_float(0.0)],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
],
|
||||
var,
|
||||
test_var,
|
||||
1
|
||||
);
|
200
crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs
Normal file
200
crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs
Normal file
@ -0,0 +1,200 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{is_in, lit, DataType, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprIsIn;
|
||||
|
||||
impl PluginCommand for ExprIsIn {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars is-in"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an is-in expression or checks to see if the elements are contained in the right series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("list", SyntaxShape::Any, "List to check if values are in")
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Creates a is-in expression",
|
||||
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
|
||||
$df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_string("one"),
|
||||
Value::test_string("two"),
|
||||
Value::test_string("three"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"a_in".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Checks if elements from a series are contained in right series",
|
||||
example: r#"let other = ([1 3 6] | polars into-df);
|
||||
[5 6 6 6 8 8 8] | polars into-df | polars is-in $other"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_in".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["check", "contained", "is-contain", "match"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
command_df(plugin, engine, call, lazy.collect(call.head)?)
|
||||
}
|
||||
PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr),
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let list: Vec<Value> = call.req(0)?;
|
||||
|
||||
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
|
||||
let list = values.as_series(call.head)?;
|
||||
|
||||
if matches!(list.dtype(), DataType::Object(..)) {
|
||||
return Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Cannot use a mixed list as argument".into(),
|
||||
span: call.head,
|
||||
});
|
||||
}
|
||||
|
||||
let expr: NuExpression = expr.to_polars().is_in(lit(list)).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
|
||||
fn command_df(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let other_value: Value = call.req(0)?;
|
||||
let other_span = other_value.span();
|
||||
let other_df = NuDataFrame::try_from_value(plugin, &other_value)?;
|
||||
let other = other_df.as_series(other_span)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let mut res = is_in(&series, &other)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error finding in other".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("is_in");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ExprIsIn)
|
||||
}
|
||||
}
|
73
crates/nu_plugin_polars/src/dataframe/expressions/lit.rs
Normal file
73
crates/nu_plugin_polars/src/dataframe/expressions/lit.rs
Normal file
@ -0,0 +1,73 @@
|
||||
use crate::{
|
||||
dataframe::values::NuExpression,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprLit;
|
||||
|
||||
impl PluginCommand for ExprLit {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars lit"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a literal expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"literal",
|
||||
SyntaxShape::Any,
|
||||
"literal to construct the expression",
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Created a literal expression and converts it to a nu object",
|
||||
example: "polars lit 2 | polars into-nu",
|
||||
result: Some(Value::test_record(record! {
|
||||
"expr" => Value::test_string("literal"),
|
||||
"value" => Value::test_string("2"),
|
||||
})),
|
||||
}]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["string", "literal", "expression"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let literal: Value = call.req(0)?;
|
||||
let expr = NuExpression::try_from_value(plugin, &literal)?;
|
||||
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprLit)
|
||||
}
|
||||
}
|
48
crates/nu_plugin_polars/src/dataframe/expressions/mod.rs
Normal file
48
crates/nu_plugin_polars/src/dataframe/expressions/mod.rs
Normal file
@ -0,0 +1,48 @@
|
||||
mod alias;
|
||||
mod arg_where;
|
||||
mod col;
|
||||
mod concat_str;
|
||||
mod datepart;
|
||||
mod expressions_macro;
|
||||
mod is_in;
|
||||
mod lit;
|
||||
mod otherwise;
|
||||
mod when;
|
||||
|
||||
use nu_plugin::PluginCommand;
|
||||
|
||||
pub use crate::dataframe::expressions::alias::ExprAlias;
|
||||
pub use crate::dataframe::expressions::arg_where::ExprArgWhere;
|
||||
pub use crate::dataframe::expressions::col::ExprCol;
|
||||
pub use crate::dataframe::expressions::concat_str::ExprConcatStr;
|
||||
pub use crate::dataframe::expressions::datepart::ExprDatePart;
|
||||
pub use crate::dataframe::expressions::expressions_macro::*;
|
||||
pub use crate::dataframe::expressions::is_in::ExprIsIn;
|
||||
pub use crate::dataframe::expressions::lit::ExprLit;
|
||||
pub use crate::dataframe::expressions::otherwise::ExprOtherwise;
|
||||
pub use crate::dataframe::expressions::when::ExprWhen;
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
pub(crate) fn expr_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
vec![
|
||||
Box::new(ExprAlias),
|
||||
Box::new(ExprArgWhere),
|
||||
Box::new(ExprAggGroups),
|
||||
Box::new(ExprCol),
|
||||
Box::new(ExprConcatStr),
|
||||
Box::new(ExprCount),
|
||||
Box::new(ExprDatePart),
|
||||
Box::new(ExprIsIn),
|
||||
Box::new(ExprList),
|
||||
Box::new(ExprLit),
|
||||
Box::new(ExprNot),
|
||||
Box::new(ExprMax),
|
||||
Box::new(ExprMin),
|
||||
Box::new(ExprOtherwise),
|
||||
Box::new(ExprSum),
|
||||
Box::new(ExprMean),
|
||||
Box::new(ExprStd),
|
||||
Box::new(ExprVar),
|
||||
Box::new(ExprWhen),
|
||||
]
|
||||
}
|
122
crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs
Normal file
122
crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen, NuWhenType},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprOtherwise;
|
||||
|
||||
impl PluginCommand for ExprOtherwise {
|
||||
type Plugin = PolarsPlugin;
|
||||
fn name(&self) -> &str {
|
||||
"polars otherwise"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Completes a when expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"otherwise expression",
|
||||
SyntaxShape::Any,
|
||||
"expression to apply when no when predicate matches",
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create a when conditions",
|
||||
example: "polars when ((polars col a) > 2) 4 | polars otherwise 5",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Create a when conditions",
|
||||
example:
|
||||
"polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6 | polars otherwise 0",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Create a new column for the dataframe",
|
||||
example: r#"[[a b]; [6 2] [1 4] [4 1]]
|
||||
| polars into-lazy
|
||||
| polars with-column (
|
||||
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
|
||||
)
|
||||
| polars with-column (
|
||||
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
|
||||
)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["condition", "else"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let otherwise_predicate: Value = call.req(0)?;
|
||||
let otherwise_predicate = NuExpression::try_from_value(plugin, &otherwise_predicate)?;
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let complete: NuExpression = match NuWhen::try_from_value(plugin, &value)?.when_type {
|
||||
NuWhenType::Then(then) => then.otherwise(otherwise_predicate.to_polars()).into(),
|
||||
NuWhenType::ChainedThen(chained_when) => chained_when
|
||||
.otherwise(otherwise_predicate.to_polars())
|
||||
.into(),
|
||||
};
|
||||
to_pipeline_data(plugin, engine, call.head, complete).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprOtherwise)
|
||||
}
|
||||
}
|
144
crates/nu_plugin_polars/src/dataframe/expressions/when.rs
Normal file
144
crates/nu_plugin_polars/src/dataframe/expressions/when.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen},
|
||||
values::{to_pipeline_data, CustomValueSupport, NuWhenType},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::when;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprWhen;
|
||||
|
||||
impl PluginCommand for ExprWhen {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars when"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates and modifies a when expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"when expression",
|
||||
SyntaxShape::Any,
|
||||
"when expression used for matching",
|
||||
)
|
||||
.required(
|
||||
"then expression",
|
||||
SyntaxShape::Any,
|
||||
"expression that will be applied when predicate is true",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create a when conditions",
|
||||
example: "polars when ((polars col a) > 2) 4",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Create a when conditions",
|
||||
example: "polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Create a new column for the dataframe",
|
||||
example: r#"[[a b]; [6 2] [1 4] [4 1]]
|
||||
| polars into-lazy
|
||||
| polars with-column (
|
||||
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
|
||||
)
|
||||
| polars with-column (
|
||||
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
|
||||
)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["condition", "match", "if", "else"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let when_predicate: Value = call.req(0)?;
|
||||
let when_predicate = NuExpression::try_from_value(plugin, &when_predicate)?;
|
||||
|
||||
let then_predicate: Value = call.req(1)?;
|
||||
let then_predicate = NuExpression::try_from_value(plugin, &then_predicate)?;
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let when_then: NuWhen = match value {
|
||||
Value::Nothing { .. } => when(when_predicate.to_polars())
|
||||
.then(then_predicate.to_polars())
|
||||
.into(),
|
||||
v => match NuWhen::try_from_value(plugin, &v)?.when_type {
|
||||
NuWhenType::Then(when_then) => when_then
|
||||
.when(when_predicate.to_polars())
|
||||
.then(then_predicate.to_polars())
|
||||
.into(),
|
||||
NuWhenType::ChainedThen(when_then_then) => when_then_then
|
||||
.when(when_predicate.to_polars())
|
||||
.then(then_predicate.to_polars())
|
||||
.into(),
|
||||
},
|
||||
};
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, when_then).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&ExprWhen)
|
||||
}
|
||||
}
|
210
crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs
Normal file
210
crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs
Normal file
@ -0,0 +1,210 @@
|
||||
use crate::{
|
||||
dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy},
|
||||
values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::{datatypes::DataType, prelude::Expr};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyAggregate;
|
||||
|
||||
impl PluginCommand for LazyAggregate {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars agg"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs a series of aggregations from a group-by."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group-by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the aggregations to be applied",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, value)?;
|
||||
|
||||
let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
for expr in expressions.iter() {
|
||||
if let Some(name) = get_col_name(expr) {
|
||||
let dtype = group_by.schema.schema.get(name.as_str());
|
||||
|
||||
if matches!(dtype, Some(DataType::Object(..))) {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Object type column not supported for aggregation".into(),
|
||||
msg: format!("Column '{name}' is type Object"),
|
||||
span: Some(call.head),
|
||||
help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
|
||||
inner: vec![],
|
||||
}).map_err(|e| e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let polars = group_by.to_polars();
|
||||
let lazy = NuLazyFrame::new(false, polars.agg(&expressions));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_col_name(expr: &Expr) -> Option<String> {
|
||||
match expr {
|
||||
Expr::Column(column) => Some(column.to_string()),
|
||||
Expr::Agg(agg) => match agg {
|
||||
polars::prelude::AggExpr::Min { input: e, .. }
|
||||
| polars::prelude::AggExpr::Max { input: e, .. }
|
||||
| polars::prelude::AggExpr::Median(e)
|
||||
| polars::prelude::AggExpr::NUnique(e)
|
||||
| polars::prelude::AggExpr::First(e)
|
||||
| polars::prelude::AggExpr::Last(e)
|
||||
| polars::prelude::AggExpr::Mean(e)
|
||||
| polars::prelude::AggExpr::Implode(e)
|
||||
| polars::prelude::AggExpr::Count(e, _)
|
||||
| polars::prelude::AggExpr::Sum(e)
|
||||
| polars::prelude::AggExpr::AggGroups(e)
|
||||
| polars::prelude::AggExpr::Std(e, _)
|
||||
| polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()),
|
||||
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
|
||||
},
|
||||
Expr::Filter { input: expr, .. }
|
||||
| Expr::Slice { input: expr, .. }
|
||||
| Expr::Cast { expr, .. }
|
||||
| Expr::Sort { expr, .. }
|
||||
| Expr::Gather { expr, .. }
|
||||
| Expr::SortBy { expr, .. }
|
||||
| Expr::Exclude(expr, _)
|
||||
| Expr::Alias(expr, _)
|
||||
| Expr::KeepName(expr)
|
||||
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
|
||||
Expr::Ternary { .. }
|
||||
| Expr::AnonymousFunction { .. }
|
||||
| Expr::Function { .. }
|
||||
| Expr::Columns(_)
|
||||
| Expr::DtypeColumn(_)
|
||||
| Expr::Literal(_)
|
||||
| Expr::BinaryExpr { .. }
|
||||
| Expr::Window { .. }
|
||||
| Expr::Wildcard
|
||||
| Expr::RenameAlias { .. }
|
||||
| Expr::Len
|
||||
| Expr::Nth(_)
|
||||
| Expr::SubPlan(_, _)
|
||||
| Expr::Selector(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyAggregate)
|
||||
}
|
||||
}
|
98
crates/nu_plugin_polars/src/dataframe/lazy/collect.rs
Normal file
98
crates/nu_plugin_polars/src/dataframe/lazy/collect.rs
Normal file
@ -0,0 +1,98 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
|
||||
Cacheable, PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyCollect;
|
||||
|
||||
impl PluginCommand for LazyCollect {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars collect"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Collect lazy dataframe into eager dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
let eager = lazy.collect(call.head)?;
|
||||
Ok(PipelineData::Value(
|
||||
eager.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
// just return the dataframe, add to cache again to be safe
|
||||
Ok(PipelineData::Value(
|
||||
df.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazyCollect)
|
||||
}
|
||||
}
|
175
crates/nu_plugin_polars/src/dataframe/lazy/explode.rs
Normal file
175
crates/nu_plugin_polars/src/dataframe/lazy/explode.rs
Normal file
@ -0,0 +1,175 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject};
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyExplode;
|
||||
|
||||
impl PluginCommand for LazyExplode {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars explode"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Explodes a dataframe or creates a explode expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to explode, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Explode the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and explode the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
explode(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn explode(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
let lazy = df.lazy();
|
||||
explode_lazy(plugin, engine, call, lazy)
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy),
|
||||
PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr),
|
||||
_ => Err(ShellError::CantConvert {
|
||||
to_type: "dataframe or expression".into(),
|
||||
from_type: value.get_type().to_string(),
|
||||
span: call.head,
|
||||
help: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn explode_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns = call
|
||||
.positional
|
||||
.iter()
|
||||
.map(|e| e.as_str().map(|s| s.to_string()))
|
||||
.collect::<Result<Vec<String>, ShellError>>()?;
|
||||
|
||||
let exploded = lazy
|
||||
.to_polars()
|
||||
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
|
||||
let lazy = NuLazyFrame::from(exploded);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
pub(crate) fn explode_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr: NuExpression = expr.to_polars().explode().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyExplode)
|
||||
}
|
||||
}
|
100
crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs
Normal file
100
crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFetch;
|
||||
|
||||
impl PluginCommand for LazyFetch {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fetch"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Collects the lazyframe to the selected rows."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be fetched from lazyframe",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Fetch a rows from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let rows: i64 = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
||||
|
||||
let mut eager: NuDataFrame = lazy
|
||||
.to_polars()
|
||||
.fetch(rows as usize)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error fetching rows".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
// mark this as not from lazy so it doesn't get converted back to a lazy frame
|
||||
eager.from_lazy = false;
|
||||
to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFetch)
|
||||
}
|
||||
}
|
189
crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs
Normal file
189
crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs
Normal file
@ -0,0 +1,189 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNA;
|
||||
|
||||
impl PluginCommand for LazyFillNA {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fill-nan"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NaN values with the given expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the NAN values",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Fills the NaN values with 0",
|
||||
example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(0),
|
||||
Value::test_int(3),
|
||||
Value::test_int(0),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Fills the NaN values of a whole dataframe",
|
||||
example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_float(0.2), Value::test_float(0.1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let fill: Value = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
cmd_df(plugin, engine, call, df, fill, value.span())
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => cmd_df(
|
||||
plugin,
|
||||
engine,
|
||||
call,
|
||||
lazy.collect(value.span())?,
|
||||
fill,
|
||||
value.span(),
|
||||
),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
Ok(cmd_expr(plugin, engine, call, expr, fill)?)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_df(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
frame: NuDataFrame,
|
||||
fill: Value,
|
||||
val_span: Span,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns = frame.columns(val_span)?;
|
||||
let dataframe = columns
|
||||
.into_iter()
|
||||
.map(|column| {
|
||||
let column_name = column.name().to_string();
|
||||
let values = column
|
||||
.into_iter()
|
||||
.map(|value| {
|
||||
let span = value.span();
|
||||
match value {
|
||||
Value::Float { val, .. } => {
|
||||
if val.is_nan() {
|
||||
fill.clone()
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
Value::List { vals, .. } => {
|
||||
NuDataFrame::fill_list_nan(vals, span, fill.clone())
|
||||
}
|
||||
_ => value,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
Column::new(column_name, values)
|
||||
})
|
||||
.collect::<Vec<Column>>();
|
||||
let df = NuDataFrame::try_from_columns(dataframe, None)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn cmd_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let expr: NuExpression = expr.to_polars().fill_nan(fill).into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFillNA)
|
||||
}
|
||||
}
|
127
crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs
Normal file
127
crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs
Normal file
@ -0,0 +1,127 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNull;
|
||||
|
||||
impl PluginCommand for LazyFillNull {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fill-null"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NULL values with the given expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the null values",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Fills the null values by 0",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let fill: Value = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill),
|
||||
PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill),
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
fn cmd_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let expr: NuExpression = expr.to_polars().fill_null(fill).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFillNull)
|
||||
}
|
||||
}
|
104
crates/nu_plugin_polars/src/dataframe/lazy/filter.rs
Normal file
104
crates/nu_plugin_polars/src/dataframe/lazy/filter.rs
Normal file
@ -0,0 +1,104 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFilter;
|
||||
|
||||
impl PluginCommand for LazyFilter {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars filter"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filter dataframe based in expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"filter expression",
|
||||
SyntaxShape::Any,
|
||||
"Expression that define the column selection",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example:
|
||||
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let expr_value: Value = call.req(0)?;
|
||||
let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?;
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
filter_expr: NuExpression,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars().filter(filter_expr.to_polars()),
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFilter)
|
||||
}
|
||||
}
|
125
crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs
Normal file
125
crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs
Normal file
@ -0,0 +1,125 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
values::CustomValueSupport,
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::explode::explode;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFlatten;
|
||||
|
||||
impl PluginCommand for LazyFlatten {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars flatten"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"An alias for polars explode."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to flatten, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Flatten the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and flatten the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
explode(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazyFlatten)
|
||||
}
|
||||
}
|
168
crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs
Normal file
168
crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs
Normal file
@ -0,0 +1,168 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyGroupBy;
|
||||
|
||||
impl PluginCommand for ToLazyGroupBy {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars group-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a group-by object that can be used for other aggregations."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group-by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the lazy group-by",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
{
|
||||
let value: Value = call.req(0)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only Col expressions".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
expressions: Vec<Expr>,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let group_by = lazy.to_polars().group_by(expressions);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
|
||||
to_pipeline_data(plugin, engine, call.head, group_by)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ToLazyGroupBy)
|
||||
}
|
||||
}
|
260
crates/nu_plugin_polars/src/dataframe/lazy/join.rs
Normal file
260
crates/nu_plugin_polars/src/dataframe/lazy/join.rs
Normal file
@ -0,0 +1,260 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{Expr, JoinType};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyJoin;
|
||||
|
||||
impl PluginCommand for LazyJoin {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars join"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Joins a lazy frame with other lazy frame."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("other", SyntaxShape::Any, "LazyFrame to join with")
|
||||
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
|
||||
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
|
||||
.switch(
|
||||
"inner",
|
||||
"inner joining between lazyframes (default)",
|
||||
Some('i'),
|
||||
)
|
||||
.switch("left", "left join between lazyframes", Some('l'))
|
||||
.switch("outer", "outer join between lazyframes", Some('o'))
|
||||
.switch("cross", "cross join between lazyframes", Some('c'))
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"Suffix to use on columns with same name",
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Join two lazy dataframes",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
||||
$df_a | polars join $df_b a foo | polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"bar".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"ham".to_string(),
|
||||
vec![
|
||||
Value::test_string("let"),
|
||||
Value::test_string("var"),
|
||||
Value::test_string("let"),
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Join one eager dataframe with a lazy dataframe",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
||||
$df_a | polars join $df_b a foo"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"bar".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"ham".to_string(),
|
||||
vec![
|
||||
Value::test_string("let"),
|
||||
Value::test_string("var"),
|
||||
Value::test_string("let"),
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let left = call.has_flag("left")?;
|
||||
let outer = call.has_flag("outer")?;
|
||||
let cross = call.has_flag("cross")?;
|
||||
|
||||
let how = if left {
|
||||
JoinType::Left
|
||||
} else if outer {
|
||||
JoinType::Outer { coalesce: true }
|
||||
} else if cross {
|
||||
JoinType::Cross
|
||||
} else {
|
||||
JoinType::Inner
|
||||
};
|
||||
|
||||
let other: Value = call.req(0)?;
|
||||
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
|
||||
let other = other.to_polars();
|
||||
|
||||
let left_on: Value = call.req(1)?;
|
||||
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
|
||||
|
||||
let right_on: Value = call.req(2)?;
|
||||
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
|
||||
|
||||
if left_on.len() != right_on.len() {
|
||||
let right_on: Value = call.req(2)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "The right column list has a different size to the left column list".into(),
|
||||
span: right_on.span(),
|
||||
})?;
|
||||
}
|
||||
|
||||
// Checking that both list of expressions are made out of col expressions or strings
|
||||
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
|
||||
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
|
||||
let value: Value = call.req(*index)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only a string, col expressions or list of strings".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
let suffix: Option<String> = call.get_flag("suffix")?;
|
||||
let suffix = suffix.unwrap_or_else(|| "_x".into());
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
||||
let from_eager = lazy.from_eager;
|
||||
let lazy = lazy.to_polars();
|
||||
|
||||
let lazy = lazy
|
||||
.join_builder()
|
||||
.with(other)
|
||||
.left_on(left_on)
|
||||
.right_on(right_on)
|
||||
.how(how)
|
||||
.force_parallel(true)
|
||||
.suffix(suffix)
|
||||
.finish();
|
||||
|
||||
let lazy = NuLazyFrame::new(from_eager, lazy);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyJoin)
|
||||
}
|
||||
}
|
225
crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs
Normal file
225
crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs
Normal file
@ -0,0 +1,225 @@
|
||||
/// Definition of multiple lazyframe commands using a macro rule
|
||||
/// All of these commands have an identical body and only require
|
||||
/// to have a change in the name, description and function
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
|
||||
|
||||
macro_rules! lazy_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build($name)
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
.plugin_examples($examples)
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.$func()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})
|
||||
.map_err(LabeledError::from)?,
|
||||
);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// LazyReverse command
|
||||
// Expands to a command definition for reverse
|
||||
lazy_command!(
|
||||
LazyReverse,
|
||||
"polars reverse",
|
||||
"Reverses the LazyFrame",
|
||||
vec![Example {
|
||||
description: "Reverses the dataframe.",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},],
|
||||
reverse,
|
||||
test_reverse
|
||||
);
|
||||
|
||||
// LazyCache command
|
||||
// Expands to a command definition for cache
|
||||
lazy_command!(
|
||||
LazyCache,
|
||||
"polars cache",
|
||||
"Caches operations in a new LazyFrame.",
|
||||
vec![Example {
|
||||
description: "Caches the result into a new LazyFrame",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache",
|
||||
result: None,
|
||||
}],
|
||||
cache,
|
||||
test_cache
|
||||
);
|
143
crates/nu_plugin_polars/src/dataframe/lazy/median.rs
Normal file
143
crates/nu_plugin_polars/src/dataframe/lazy/median.rs
Normal file
@ -0,0 +1,143 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
#[derive(Clone)]
|
||||
pub struct LazyMedian;
|
||||
|
||||
impl PluginCommand for LazyMedian {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars median"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Median value from columns in a dataframe or creates expression for an aggregation"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Median aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars median)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(3.0), Value::test_float(1.0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Median value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr.to_polars().median().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let polars_lazy = lazy
|
||||
.to_polars()
|
||||
.median()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("Error in median operation: {e}"),
|
||||
msg: "".into(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyMedian)
|
||||
}
|
||||
}
|
57
crates/nu_plugin_polars/src/dataframe/lazy/mod.rs
Normal file
57
crates/nu_plugin_polars/src/dataframe/lazy/mod.rs
Normal file
@ -0,0 +1,57 @@
|
||||
mod aggregate;
|
||||
mod collect;
|
||||
mod explode;
|
||||
mod fetch;
|
||||
mod fill_nan;
|
||||
mod fill_null;
|
||||
mod filter;
|
||||
mod flatten;
|
||||
pub mod groupby;
|
||||
mod join;
|
||||
mod macro_commands;
|
||||
mod median;
|
||||
mod quantile;
|
||||
mod select;
|
||||
mod sort_by_expr;
|
||||
mod to_lazy;
|
||||
|
||||
use nu_plugin::PluginCommand;
|
||||
|
||||
pub use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
pub use crate::dataframe::lazy::collect::LazyCollect;
|
||||
use crate::dataframe::lazy::fetch::LazyFetch;
|
||||
use crate::dataframe::lazy::fill_nan::LazyFillNA;
|
||||
pub use crate::dataframe::lazy::fill_null::LazyFillNull;
|
||||
use crate::dataframe::lazy::filter::LazyFilter;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
use crate::dataframe::lazy::join::LazyJoin;
|
||||
pub(crate) use crate::dataframe::lazy::macro_commands::*;
|
||||
use crate::dataframe::lazy::quantile::LazyQuantile;
|
||||
pub(crate) use crate::dataframe::lazy::select::LazySelect;
|
||||
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
|
||||
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
|
||||
use crate::PolarsPlugin;
|
||||
pub use explode::LazyExplode;
|
||||
pub use flatten::LazyFlatten;
|
||||
|
||||
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
vec![
|
||||
Box::new(LazyAggregate),
|
||||
Box::new(LazyCache),
|
||||
Box::new(LazyCollect),
|
||||
Box::new(LazyExplode),
|
||||
Box::new(LazyFetch),
|
||||
Box::new(LazyFillNA),
|
||||
Box::new(LazyFillNull),
|
||||
Box::new(LazyFilter),
|
||||
Box::new(LazyFlatten),
|
||||
Box::new(LazyJoin),
|
||||
Box::new(median::LazyMedian),
|
||||
Box::new(LazyReverse),
|
||||
Box::new(LazySelect),
|
||||
Box::new(LazySortBy),
|
||||
Box::new(LazyQuantile),
|
||||
Box::new(ToLazyFrame),
|
||||
Box::new(ToLazyGroupBy),
|
||||
]
|
||||
}
|
160
crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs
Normal file
160
crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs
Normal file
@ -0,0 +1,160 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{lit, QuantileInterpolOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyQuantile;
|
||||
|
||||
impl PluginCommand for LazyQuantile {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars quantile"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Aggregates the columns to the selected quantile."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"quantile",
|
||||
SyntaxShape::Number,
|
||||
"quantile value for quantile operation",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "quantile value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Quantile aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars quantile 0.5)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(4.0), Value::test_float(1.0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
let quantile: f64 = call.req(0)?;
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
command(plugin, engine, call, df.lazy(), quantile)
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr
|
||||
.to_polars()
|
||||
.quantile(lit(quantile), QuantileInterpolOptions::default())
|
||||
.into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
quantile: f64,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.quantile(lit(quantile), QuantileInterpolOptions::default())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})?,
|
||||
);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyQuantile)
|
||||
}
|
||||
}
|
85
crates/nu_plugin_polars/src/dataframe/lazy/select.rs
Normal file
85
crates/nu_plugin_polars/src/dataframe/lazy/select.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
#[derive(Clone)]
|
||||
pub struct LazySelect;
|
||||
|
||||
impl PluginCommand for LazySelect {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars select"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Selects columns from lazyframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"select expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the column selection",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Select a column from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazySelect)
|
||||
}
|
||||
}
|
160
crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs
Normal file
160
crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs
Normal file
@ -0,0 +1,160 @@
|
||||
use super::super::values::NuLazyFrame;
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySortBy;
|
||||
|
||||
impl PluginCommand for LazySortBy {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars sort-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Sorts a lazy dataframe based on expression(s)."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"sort expression",
|
||||
SyntaxShape::Any,
|
||||
"sort expression for the dataframe",
|
||||
)
|
||||
.named(
|
||||
"reverse",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
|
||||
"Reverse sorting. Default is false",
|
||||
Some('r'),
|
||||
)
|
||||
.switch(
|
||||
"nulls-last",
|
||||
"nulls are shown last in the dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("maintain-order", "Maintains order during sort", Some('m'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sort dataframe by one column",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Sort column using two columns",
|
||||
example:
|
||||
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(1),
|
||||
Value::test_int(4),
|
||||
Value::test_int(2),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
let nulls_last = call.has_flag("nulls-last")?;
|
||||
let maintain_order = call.has_flag("maintain-order")?;
|
||||
|
||||
let reverse: Option<Vec<bool>> = call.get_flag("reverse")?;
|
||||
let reverse = match reverse {
|
||||
Some(list) => {
|
||||
if expressions.len() != list.len() {
|
||||
let span = call
|
||||
.get_flag::<Value>("reverse")?
|
||||
.expect("already checked and it exists")
|
||||
.span();
|
||||
Err(ShellError::GenericError {
|
||||
error: "Incorrect list size".into(),
|
||||
msg: "Size doesn't match expression list".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
} else {
|
||||
list
|
||||
}
|
||||
}
|
||||
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
|
||||
};
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazySortBy)
|
||||
}
|
||||
}
|
61
crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs
Normal file
61
crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs
Normal file
@ -0,0 +1,61 @@
|
||||
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
|
||||
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyFrame;
|
||||
|
||||
impl PluginCommand for ToLazyFrame {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars into-lazy"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a dataframe into a lazy dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a dictionary and creates a lazy dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
|
||||
let lazy = NuLazyFrame::from_dataframe(df);
|
||||
Ok(PipelineData::Value(
|
||||
lazy.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
19
crates/nu_plugin_polars/src/dataframe/mod.rs
Normal file
19
crates/nu_plugin_polars/src/dataframe/mod.rs
Normal file
@ -0,0 +1,19 @@
|
||||
use nu_protocol::{ShellError, Span};
|
||||
|
||||
pub mod eager;
|
||||
pub mod expressions;
|
||||
pub mod lazy;
|
||||
pub mod series;
|
||||
pub mod stub;
|
||||
mod utils;
|
||||
pub mod values;
|
||||
|
||||
pub fn missing_flag_error(flag: &str, span: Span) -> ShellError {
|
||||
ShellError::GenericError {
|
||||
error: format!("Missing flag: {flag}"),
|
||||
msg: "".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
}
|
119
crates/nu_plugin_polars/src/dataframe/series/all_false.rs
Normal file
119
crates/nu_plugin_polars/src/dataframe/series/all_false.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AllFalse;
|
||||
|
||||
impl PluginCommand for AllFalse {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars all-false"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns true if all values are false."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Returns true if all values are false",
|
||||
example: "[false false false] | polars into-df | polars all-false",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![Value::test_bool(true)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Checks the result from a comparison",
|
||||
example: r#"let s = ([5 6 2 10] | polars into-df);
|
||||
let res = ($s > 9);
|
||||
$res | polars all-false"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![Value::test_bool(false)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let series = df.as_series(call.head)?;
|
||||
let bool = series.bool().map_err(|_| ShellError::GenericError {
|
||||
error: "Error converting to bool".into(),
|
||||
msg: "all-false only works with series of type bool".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let value = Value::bool(!bool.any(), call.head);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("all_false".to_string(), vec![value])],
|
||||
None,
|
||||
)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&AllFalse)
|
||||
}
|
||||
}
|
119
crates/nu_plugin_polars/src/dataframe/series/all_true.rs
Normal file
119
crates/nu_plugin_polars/src/dataframe/series/all_true.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AllTrue;
|
||||
|
||||
impl PluginCommand for AllTrue {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars all-true"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns true if all values are true."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Returns true if all values are true",
|
||||
example: "[true true true] | polars into-df | polars all-true",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![Value::test_bool(true)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Checks the result from a comparison",
|
||||
example: r#"let s = ([5 6 2 8] | polars into-df);
|
||||
let res = ($s > 9);
|
||||
$res | polars all-true"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![Value::test_bool(false)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let series = df.as_series(call.head)?;
|
||||
let bool = series.bool().map_err(|_| ShellError::GenericError {
|
||||
error: "Error converting to bool".into(),
|
||||
msg: "all-false only works with series of type bool".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let value = Value::bool(bool.all(), call.head);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("all_true".to_string(), vec![value])],
|
||||
None,
|
||||
)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&AllTrue)
|
||||
}
|
||||
}
|
96
crates/nu_plugin_polars/src/dataframe/series/arg_max.rs
Normal file
96
crates/nu_plugin_polars/src/dataframe/series/arg_max.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgMax;
|
||||
|
||||
impl PluginCommand for ArgMax {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-max"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Return index for max value in series."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["argmax", "maximum", "most", "largest", "greatest"]
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns index for max value",
|
||||
example: "[1 3 2] | polars into-df | polars arg-max",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series.arg_max();
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]),
|
||||
None => UInt32Chunked::from_slice("arg_max", &[]),
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ArgMax)
|
||||
}
|
||||
}
|
96
crates/nu_plugin_polars/src/dataframe/series/arg_min.rs
Normal file
96
crates/nu_plugin_polars/src/dataframe/series/arg_min.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgMin;
|
||||
|
||||
impl PluginCommand for ArgMin {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-min"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Return index for min value in series."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["argmin", "minimum", "least", "smallest", "lowest"]
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns index for min value",
|
||||
example: "[1 3 2] | polars into-df | polars arg-min",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series.arg_min();
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]),
|
||||
None => UInt32Chunked::from_slice("arg_min", &[]),
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ArgMin)
|
||||
}
|
||||
}
|
159
crates/nu_plugin_polars/src/dataframe/series/cumulative.rs
Normal file
159
crates/nu_plugin_polars/src/dataframe/series/cumulative.rs
Normal file
@ -0,0 +1,159 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::{DataType, IntoSeries};
|
||||
use polars_ops::prelude::{cum_max, cum_min, cum_sum};
|
||||
|
||||
enum CumulativeType {
|
||||
Min,
|
||||
Max,
|
||||
Sum,
|
||||
}
|
||||
|
||||
impl CumulativeType {
|
||||
fn from_str(roll_type: &str, span: Span) -> Result<Self, ShellError> {
|
||||
match roll_type {
|
||||
"min" => Ok(Self::Min),
|
||||
"max" => Ok(Self::Max),
|
||||
"sum" => Ok(Self::Sum),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Wrong operation".into(),
|
||||
msg: "Operation not valid for cumulative".into(),
|
||||
span: Some(span),
|
||||
help: Some("Allowed values: max, min, sum".into()),
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_str(&self) -> &'static str {
|
||||
match self {
|
||||
CumulativeType::Min => "cumulative_min",
|
||||
CumulativeType::Max => "cumulative_max",
|
||||
CumulativeType::Sum => "cumulative_sum",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Cumulative;
|
||||
|
||||
impl PluginCommand for Cumulative {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars cumulative"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Cumulative calculation for a series."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("type", SyntaxShape::String, "rolling operation")
|
||||
.switch("reverse", "Reverse cumulative calculation", Some('r'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Cumulative sum for a series",
|
||||
example: "[1 2 3 4 5] | polars into-df | polars cumulative sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0_cumulative_sum".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(3),
|
||||
Value::test_int(6),
|
||||
Value::test_int(10),
|
||||
Value::test_int(15),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let cum_type: Spanned<String> = call.req(0)?;
|
||||
let reverse = call.has_flag("reverse")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
if let DataType::Object(..) = series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Found object series".into(),
|
||||
msg: "Series of type object cannot be used for cumulative operation".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let cum_type = CumulativeType::from_str(&cum_type.item, cum_type.span)?;
|
||||
let mut res = match cum_type {
|
||||
CumulativeType::Max => cum_max(&series, reverse),
|
||||
CumulativeType::Min => cum_min(&series, reverse),
|
||||
CumulativeType::Sum => cum_sum(&series, reverse),
|
||||
}
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating cumulative".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let name = format!("{}_{}", series.name(), cum_type.to_str());
|
||||
res.rename(&name);
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&Cumulative)
|
||||
}
|
||||
}
|
101
crates/nu_plugin_polars/src/dataframe/series/date/as_date.rs
Normal file
101
crates/nu_plugin_polars/src/dataframe/series/date/as_date.rs
Normal file
@ -0,0 +1,101 @@
|
||||
use crate::{values::to_pipeline_data, PolarsPlugin};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, SyntaxShape, Type,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, StringMethods};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDate;
|
||||
|
||||
impl PluginCommand for AsDate {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars as-date"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
r#"Converts string to date."#
|
||||
}
|
||||
|
||||
fn extra_usage(&self) -> &str {
|
||||
r#"Format example:
|
||||
"%Y-%m-%d" => 2021-12-31
|
||||
"%d-%m-%Y" => 31-12-2021
|
||||
"%Y%m%d" => 2021319 (2021-03-19)"#
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formatting date string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Converts string to date",
|
||||
example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(0)?;
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
let casted = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = if not_exact {
|
||||
casted.as_date_not_exact(Some(format.as_str()))
|
||||
} else {
|
||||
casted.as_date(Some(format.as_str()), false)
|
||||
};
|
||||
|
||||
let mut res = res
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating datetime".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("date");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
198
crates/nu_plugin_polars/src/dataframe/series/date/as_datetime.rs
Normal file
198
crates/nu_plugin_polars/src/dataframe/series/date/as_datetime.rs
Normal file
@ -0,0 +1,198 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use chrono::DateTime;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDateTime;
|
||||
|
||||
impl PluginCommand for AsDateTime {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars as-datetime"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
r#"Converts string to datetime."#
|
||||
}
|
||||
|
||||
fn extra_usage(&self) -> &str {
|
||||
r#"Format example:
|
||||
"%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98
|
||||
"%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
|
||||
"%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01
|
||||
"%y%m%d %H:%M:%S" => 210319 23:58:50
|
||||
"%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98
|
||||
"%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
|
||||
"%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01
|
||||
"%Y%m%d %H:%M:%S" => 20210319 23:58:50
|
||||
"%FT%H:%M:%S" => 2019-04-18T02:45:55
|
||||
"%FT%H:%M:%S.%6f" => microseconds
|
||||
"%FT%H:%M:%S.%9f" => nanoseconds"#
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formatting date time string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Converts string to datetime",
|
||||
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime with high resolutions",
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00.123456789 +0000",
|
||||
"%Y-%m-%d %H:%M:%S.%9f %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00.123456789 +0000",
|
||||
"%Y-%m-%d %H:%M:%S.%9f %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(0)?;
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
let casted = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = if not_exact {
|
||||
casted.as_datetime_not_exact(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
} else {
|
||||
casted.as_datetime(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
false,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
};
|
||||
|
||||
let mut res = res
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating datetime".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("datetime");
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&AsDateTime)
|
||||
}
|
||||
}
|
105
crates/nu_plugin_polars/src/dataframe/series/date/get_day.rs
Normal file
105
crates/nu_plugin_polars/src/dataframe/series/date/get_day.rs
Normal file
@ -0,0 +1,105 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetDay;
|
||||
|
||||
impl PluginCommand for GetDay {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-day"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets day from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns day from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-day"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[4i8, 4]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
|
||||
fn extra_usage(&self) -> &str {
|
||||
""
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.day().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetDay)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetHour;
|
||||
|
||||
impl PluginCommand for GetHour {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-hour"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets hour from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns hour from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-hour"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[16i8, 16]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.hour().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetHour)
|
||||
}
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use polars::{prelude::NamedFrom, series::Series};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetMinute;
|
||||
|
||||
impl PluginCommand for GetMinute {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-minute"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets minute from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns minute from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-minute"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[39i8, 39]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.minute().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetMinute)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetMonth;
|
||||
|
||||
impl PluginCommand for GetMonth {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-month"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets month from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns month from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-month"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[8i8, 8]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.month().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetMonth)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetNanosecond;
|
||||
|
||||
impl PluginCommand for GetNanosecond {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-nanosecond"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets nanosecond from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns nanosecond from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-nanosecond"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[0i32, 0]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.nanosecond().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetNanosecond)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetOrdinal;
|
||||
|
||||
impl PluginCommand for GetOrdinal {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-ordinal"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets ordinal from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns ordinal from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-ordinal"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[217i16, 217]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.ordinal().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetOrdinal)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetSecond;
|
||||
|
||||
impl PluginCommand for GetSecond {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-second"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets second from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns second from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-second"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[18i8, 18]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.second().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetSecond)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetWeek;
|
||||
|
||||
impl PluginCommand for GetWeek {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-week"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets week from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns week from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-week"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[32i8, 32]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.week().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetWeek)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetWeekDay;
|
||||
|
||||
impl PluginCommand for GetWeekDay {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-weekday"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets weekday from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns weekday from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-weekday"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[2i8, 2]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.weekday().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetWeekDay)
|
||||
}
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
|
||||
};
|
||||
use polars::{
|
||||
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetYear;
|
||||
|
||||
impl PluginCommand for GetYear {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars get-year"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Gets year from date."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns year from a date",
|
||||
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
|
||||
let df = ([$dt $dt] | polars into-df);
|
||||
$df | polars get-year"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_series(Series::new("0", &[2020i32, 2020]), Span::test_data())
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let casted = series.datetime().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to datetime type".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = casted.year().into_series();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&GetYear)
|
||||
}
|
||||
}
|
25
crates/nu_plugin_polars/src/dataframe/series/date/mod.rs
Normal file
25
crates/nu_plugin_polars/src/dataframe/series/date/mod.rs
Normal file
@ -0,0 +1,25 @@
|
||||
mod as_date;
|
||||
mod as_datetime;
|
||||
mod get_day;
|
||||
mod get_hour;
|
||||
mod get_minute;
|
||||
mod get_month;
|
||||
mod get_nanosecond;
|
||||
mod get_ordinal;
|
||||
mod get_second;
|
||||
mod get_week;
|
||||
mod get_weekday;
|
||||
mod get_year;
|
||||
|
||||
pub use as_date::AsDate;
|
||||
pub use as_datetime::AsDateTime;
|
||||
pub use get_day::GetDay;
|
||||
pub use get_hour::GetHour;
|
||||
pub use get_minute::GetMinute;
|
||||
pub use get_month::GetMonth;
|
||||
pub use get_nanosecond::GetNanosecond;
|
||||
pub use get_ordinal::GetOrdinal;
|
||||
pub use get_second::GetSecond;
|
||||
pub use get_week::GetWeek;
|
||||
pub use get_weekday::GetWeekDay;
|
||||
pub use get_year::GetYear;
|
140
crates/nu_plugin_polars/src/dataframe/series/indexes/arg_sort.rs
Normal file
140
crates/nu_plugin_polars/src/dataframe/series/indexes/arg_sort.rs
Normal file
@ -0,0 +1,140 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, SortOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgSort;
|
||||
|
||||
impl PluginCommand for ArgSort {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-sort"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns indexes for a sorted series."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["argsort", "order", "arrange"]
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("reverse", "reverse order", Some('r'))
|
||||
.switch("nulls-last", "nulls ordered last", Some('n'))
|
||||
.switch(
|
||||
"maintain-order",
|
||||
"maintain order on sorted items",
|
||||
Some('m'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Returns indexes for a sorted series",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars arg-sort",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_sort".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(4),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Returns indexes for a sorted series",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars arg-sort --reverse",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_sort".to_string(),
|
||||
vec![
|
||||
Value::test_int(3),
|
||||
Value::test_int(4),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(0),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let sort_options = SortOptions {
|
||||
descending: call.has_flag("reverse")?,
|
||||
nulls_last: call.has_flag("nulls-last")?,
|
||||
multithreaded: true,
|
||||
maintain_order: call.has_flag("maintain-order")?,
|
||||
};
|
||||
|
||||
let mut res = df
|
||||
.as_series(call.head)?
|
||||
.arg_sort(sort_options)
|
||||
.into_series();
|
||||
res.rename("arg_sort");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ArgSort)
|
||||
}
|
||||
}
|
126
crates/nu_plugin_polars/src/dataframe/series/indexes/arg_true.rs
Normal file
126
crates/nu_plugin_polars/src/dataframe/series/indexes/arg_true.rs
Normal file
@ -0,0 +1,126 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::{arg_where, col, IntoLazy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgTrue;
|
||||
|
||||
impl PluginCommand for ArgTrue {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-true"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns indexes where values are true."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["argtrue", "truth", "boolean-true"]
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns indexes where values are true",
|
||||
example: "[false true false] | polars into-df | polars arg-true",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_true".to_string(),
|
||||
vec![Value::test_int(1)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let columns = df.as_ref().get_column_names();
|
||||
if columns.len() > 1 {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Error using as series".into(),
|
||||
msg: "dataframe has more than one column".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
match columns.first() {
|
||||
Some(column) => {
|
||||
let expression = arg_where(col(column).eq(true)).alias("arg_true");
|
||||
let res: NuDataFrame = df
|
||||
.as_ref()
|
||||
.clone()
|
||||
.lazy()
|
||||
.select(&[expression])
|
||||
.collect()
|
||||
.map_err(|err| ShellError::GenericError {
|
||||
error: "Error creating index column".into(),
|
||||
msg: err.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
_ => Err(ShellError::UnsupportedInput {
|
||||
msg: "Expected the dataframe to have a column".to_string(),
|
||||
input: "".to_string(),
|
||||
msg_span: call.head,
|
||||
input_span: call.head,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ArgTrue)
|
||||
}
|
||||
}
|
@ -0,0 +1,104 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgUnique;
|
||||
|
||||
impl PluginCommand for ArgUnique {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars arg-unique"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns indexes for unique values."
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["argunique", "distinct", "noduplicate", "unrepeated"]
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns indexes for unique values",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars arg-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_unique".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut res = df
|
||||
.as_series(call.head)?
|
||||
.arg_unique()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error extracting unique values".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
res.rename("arg_unique");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ArgUnique)
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
mod arg_sort;
|
||||
mod arg_true;
|
||||
mod arg_unique;
|
||||
mod set_with_idx;
|
||||
|
||||
pub use arg_sort::ArgSort;
|
||||
pub use arg_true::ArgTrue;
|
||||
pub use arg_unique::ArgUnique;
|
||||
pub use set_with_idx::SetWithIndex;
|
@ -0,0 +1,227 @@
|
||||
use crate::{
|
||||
missing_flag_error,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SetWithIndex;
|
||||
|
||||
impl PluginCommand for SetWithIndex {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars set-with-idx"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Sets value in the given index."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("value", SyntaxShape::Any, "value to be inserted in series")
|
||||
.required_named(
|
||||
"indices",
|
||||
SyntaxShape::Any,
|
||||
"list of indices indicating where to set the value",
|
||||
Some('i'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Set value in selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
|
||||
let indices = ([0 2] | polars into-df);
|
||||
$series | polars set-with-idx 6 --indices $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(6),
|
||||
Value::test_int(1),
|
||||
Value::test_int(6),
|
||||
Value::test_int(2),
|
||||
Value::test_int(4),
|
||||
Value::test_int(3),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(0)?;
|
||||
|
||||
let indices_value: Value = call
|
||||
.get_flag("indices")?
|
||||
.ok_or_else(|| missing_flag_error("indices", call.head))?;
|
||||
|
||||
let indices_span = indices_value.span();
|
||||
let indices = NuDataFrame::try_from_value_coerce(plugin, &indices_value, call.head)?
|
||||
.as_series(indices_span)?;
|
||||
|
||||
let casted = match indices.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
|
||||
.as_ref()
|
||||
.cast(&DataType::UInt32)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting indices".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(indices_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect type".into(),
|
||||
msg: "Series with incorrect type".into(),
|
||||
span: Some(indices_span),
|
||||
help: Some("Consider using a Series with type int type".into()),
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
|
||||
let indices = casted
|
||||
.u32()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting indices".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(indices_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_iter()
|
||||
.flatten();
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let span = value.span();
|
||||
let res = match value {
|
||||
Value::Int { val, .. } => {
|
||||
let chunked = series.i64().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to i64".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::Float { val, .. } => {
|
||||
let chunked = series.f64().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to f64".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::String { val, .. } => {
|
||||
let chunked = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked
|
||||
.scatter_single(indices, Some(val.as_ref()))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let mut res = res.into_series();
|
||||
res.rename("string");
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect value type".into(),
|
||||
msg: format!(
|
||||
"this value cannot be set in a series of type '{}'",
|
||||
series.dtype()
|
||||
),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&SetWithIndex)
|
||||
}
|
||||
}
|
@ -0,0 +1,133 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IsDuplicated;
|
||||
|
||||
impl PluginCommand for IsDuplicated {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars is-duplicated"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask indicating duplicated values."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask indicating duplicated values",
|
||||
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-duplicated",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create mask indicating duplicated rows in a dataframe",
|
||||
example:
|
||||
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-duplicated",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.is_duplicated()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error finding duplicates".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("is_duplicated");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&IsDuplicated)
|
||||
}
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
use crate::{
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame, NuExpression};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IsNotNull;
|
||||
|
||||
impl PluginCommand for IsNotNull {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars is-not-null"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask where value is not null."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask where values are not null",
|
||||
example: r#"let s = ([5 6 0 8] | polars into-df);
|
||||
let res = ($s / $s);
|
||||
$res | polars is-not-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_not_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a is not null expression from a column",
|
||||
example: "polars col a | polars is-not-null",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
command(plugin, engine, call, lazy.collect(call.head)?)
|
||||
}
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr.to_polars().is_not_null().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mut res = df.as_series(call.head)?.is_not_null();
|
||||
res.rename("is_not_null");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&IsNotNull)
|
||||
}
|
||||
}
|
130
crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs
Normal file
130
crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs
Normal file
@ -0,0 +1,130 @@
|
||||
use crate::{
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IsNull;
|
||||
|
||||
impl PluginCommand for IsNull {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars is-null"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask where value is null."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask where values are null",
|
||||
example: r#"let s = ([5 6 0 8] | polars into-df);
|
||||
let res = ($s / $s);
|
||||
$res | polars is-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a is null expression from a column",
|
||||
example: "polars col a | polars is-null",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
command(plugin, engine, call, lazy.collect(call.head)?)
|
||||
}
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr.to_polars().is_null().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mut res = df.as_series(call.head)?.is_null();
|
||||
res.rename("is_null");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&IsNull)
|
||||
}
|
||||
}
|
133
crates/nu_plugin_polars/src/dataframe/series/masks/is_unique.rs
Normal file
133
crates/nu_plugin_polars/src/dataframe/series/masks/is_unique.rs
Normal file
@ -0,0 +1,133 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IsUnique;
|
||||
|
||||
impl PluginCommand for IsUnique {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars is-unique"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask indicating unique values."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask indicating unique values",
|
||||
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create mask indicating duplicated rows in a dataframe",
|
||||
example:
|
||||
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.is_unique()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error finding unique values".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("is_unique");
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&IsUnique)
|
||||
}
|
||||
}
|
13
crates/nu_plugin_polars/src/dataframe/series/masks/mod.rs
Normal file
13
crates/nu_plugin_polars/src/dataframe/series/masks/mod.rs
Normal file
@ -0,0 +1,13 @@
|
||||
mod is_duplicated;
|
||||
mod is_not_null;
|
||||
mod is_null;
|
||||
mod is_unique;
|
||||
mod not;
|
||||
mod set;
|
||||
|
||||
pub use is_duplicated::IsDuplicated;
|
||||
pub use is_not_null::IsNotNull;
|
||||
pub use is_null::IsNull;
|
||||
pub use is_unique::IsUnique;
|
||||
pub use not::NotSeries;
|
||||
pub use set::SetSeries;
|
103
crates/nu_plugin_polars/src/dataframe/series/masks/not.rs
Normal file
103
crates/nu_plugin_polars/src/dataframe/series/masks/not.rs
Normal file
@ -0,0 +1,103 @@
|
||||
use crate::{
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
use std::ops::Not;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NotSeries;
|
||||
|
||||
impl PluginCommand for NotSeries {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars not"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Inverts boolean mask."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Inverts boolean mask",
|
||||
example: "[true false true] | polars into-df | polars not",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
command(plugin, engine, call, df).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let bool = series.bool().map_err(|e| ShellError::GenericError {
|
||||
error: "Error inverting mask".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = bool.not();
|
||||
|
||||
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&NotSeries)
|
||||
}
|
||||
}
|
210
crates/nu_plugin_polars/src/dataframe/series/masks/set.rs
Normal file
210
crates/nu_plugin_polars/src/dataframe/series/masks/set.rs
Normal file
@ -0,0 +1,210 @@
|
||||
use crate::{
|
||||
missing_flag_error,
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SetSeries;
|
||||
|
||||
impl PluginCommand for SetSeries {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars set"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Sets value where given mask is true."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("value", SyntaxShape::Any, "value to be inserted in series")
|
||||
.required_named(
|
||||
"mask",
|
||||
SyntaxShape::Any,
|
||||
"mask indicating insertions",
|
||||
Some('m'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shifts the values by a given period",
|
||||
example: r#"let s = ([1 2 2 3 3] | polars into-df | polars shift 2);
|
||||
let mask = ($s | polars is-null);
|
||||
$s | polars set 0 --mask $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(0)?;
|
||||
|
||||
let mask_value: Value = call
|
||||
.get_flag("mask")?
|
||||
.ok_or_else(|| missing_flag_error("mask", call.head))?;
|
||||
|
||||
let mask_span = mask_value.span();
|
||||
let mask =
|
||||
NuDataFrame::try_from_value_coerce(plugin, &mask_value, call.head)?.as_series(mask_span)?;
|
||||
|
||||
let bool_mask = match mask.dtype() {
|
||||
DataType::Boolean => mask.bool().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to bool".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(mask_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect type".into(),
|
||||
msg: "can only use bool series as mask".into(),
|
||||
span: Some(mask_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
let span = value.span();
|
||||
let res = match value {
|
||||
Value::Int { val, .. } => {
|
||||
let chunked = series.i64().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to i64".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked
|
||||
.set(bool_mask, Some(val))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::Float { val, .. } => {
|
||||
let chunked = series.f64().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to f64".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked
|
||||
.set(bool_mask, Some(val))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::String { val, .. } => {
|
||||
let chunked = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let res = chunked.set(bool_mask, Some(val.as_ref())).map_err(|e| {
|
||||
ShellError::GenericError {
|
||||
error: "Error setting value".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}
|
||||
})?;
|
||||
|
||||
let mut res = res.into_series();
|
||||
res.rename("string");
|
||||
|
||||
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
|
||||
}
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Incorrect value type".into(),
|
||||
msg: format!(
|
||||
"this value cannot be set in a series of type '{}'",
|
||||
series.dtype()
|
||||
),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}?;
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&SetSeries)
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user