diff --git a/Cargo.lock b/Cargo.lock index 1a8bc134b3..c8d09c7b3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ "getrandom", "once_cell", @@ -36,9 +36,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.7" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", @@ -49,9 +49,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -119,9 +119,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.11" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", @@ -133,9 +133,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "argminmax" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202108b46429b765ef483f8a24d5c46f48c14acfdacc086dd4ab6dddf6bcdbd2" +checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" dependencies = [ "num-traits", ] @@ -221,9 +221,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.13" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" dependencies = [ "anstyle", "bstr", @@ -253,18 +253,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -284,9 +284,9 @@ checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "avro-schema" @@ -304,9 +304,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", @@ -355,22 +355,22 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.68.1" +version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cexpr", "clang-sys", + "itertools 0.12.1", "lazy_static", "lazycell", - "peeking_take_while", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -396,9 +396,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" dependencies = [ "serde", ] @@ -432,9 +432,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58b559fd6448c6e2fd0adb5720cd98a2506594cafa4737ff98c396f3e82f667" +checksum = "0901fc8eb0aca4c83be0106d6f2db17d86a08dfc2c25f0e84464bf381158add6" dependencies = [ "borsh-derive", "cfg_aliases", @@ -442,15 +442,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aadb5b6ccbd078890f6d7003694e33816e6b784358f18e15e7e6d9f065a57cd" +checksum = "51670c3aa053938b0ee3bd67c3817e471e626151131b934038e83c5bf8de48f5" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", "syn_derive", ] @@ -462,9 +462,9 @@ checksum = "ada7f35ca622a86a4d6c27be2633fc6c243ecc834859628fcce0681d8e76e1c8" [[package]] name = "brotli" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -483,9 +483,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", "regex-automata", @@ -494,9 +494,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byte-unit" @@ -511,9 +511,9 @@ dependencies = [ [[package]] name = "bytecheck" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" dependencies = [ "bytecheck_derive", "ptr_meta", @@ -522,9 +522,9 @@ dependencies = [ [[package]] name = "bytecheck_derive" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" dependencies = [ "proc-macro2", "quote", @@ -539,22 +539,22 @@ checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytemuck" -version = "1.14.1" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" +checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -565,9 +565,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "bytesize" @@ -609,9 +609,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" dependencies = [ "jobserver", "libc", @@ -661,9 +661,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", @@ -686,9 +686,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", "chrono-tz-build", @@ -729,18 +729,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.18" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.18" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstream", "anstyle", @@ -751,15 +751,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "clipboard-win" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12f9a0700e0127ba15d1d52dd742097f821cd9c65939303a44d970465040a297" +checksum = "d517d4b86184dbb111d3556a10f1c8a04da7428d2987bf1081602bf11c3aa9ee" dependencies = [ "error-code", ] @@ -779,6 +779,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "crossterm", + "strum", + "strum_macros 0.26.2", + "unicode-width", +] + [[package]] name = "compact_str" version = "0.7.1" @@ -813,9 +825,9 @@ dependencies = [ [[package]] name = "const-random" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" dependencies = [ "const-random-macro", ] @@ -893,18 +905,18 @@ checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403" [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" dependencies = [ "crossbeam-utils", ] @@ -949,7 +961,7 @@ version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "crossterm_winapi", "libc", "mio", @@ -1005,7 +1017,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -1069,7 +1081,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -1160,7 +1172,7 @@ checksum = "27540baf49be0d484d8f0130d7d8da3011c32a44d4fc873368154f1510e574a2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -1189,9 +1201,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "downcast-rs" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" [[package]] name = "dtoa" @@ -1228,9 +1240,9 @@ checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" [[package]] name = "dyn-clone" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "545b22097d44f8a9581187cdf93de7a71e4722bf51200cfaba810865b49a495d" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" [[package]] name = "ego-tree" @@ -1240,9 +1252,9 @@ checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "eml-parser" @@ -1271,14 +1283,14 @@ dependencies = [ [[package]] name = "enum_dispatch" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e" +checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -1299,9 +1311,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "erased-serde" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55d05712b2d8d88102bc9868020c9e5c7a1f5527c452b9b97450a1d006140ba7" +checksum = "2b73807008a3c7f171cc40312f37d95ef0396e048b5848d775f54b1a4dd4a0d3" dependencies = [ "serde", ] @@ -1359,9 +1371,9 @@ checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" [[package]] name = "fastrand" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "fd-lock" @@ -1552,7 +1564,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -1625,9 +1637,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "js-sys", @@ -1644,11 +1656,11 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] name = "git2" -version = "0.18.1" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf97ba92db08df386e10c8ede66a2a0369bd277090afd8710e19e38de9ec0cd" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "libc", "libgit2-sys", "log", @@ -1690,11 +1702,11 @@ dependencies = [ [[package]] name = "halfbrown" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5681137554ddff44396e5f149892c769d45301dd9aa19c51602a89ee214cb0ec" +checksum = "8588661a8607108a5ca69cab034063441a0413a0b041c13618a7dd348021ef6f" dependencies = [ - "hashbrown 0.13.2", + "hashbrown 0.14.3", "serde", ] @@ -1713,16 +1725,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash 0.7.7", -] - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.7", + "ahash 0.7.8", ] [[package]] @@ -1731,7 +1734,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "allocator-api2", "rayon", ] @@ -1769,9 +1772,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.3.4" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1804,9 +1807,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", @@ -1838,9 +1841,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "human-date-parser" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d65b3ad1fdc03306397b6004b4f8f765cf7467194a1080b4530eeed5a2f0bc" +checksum = "c5cbf96a7157cc349eeafe4595e4f283c3fcab73b5a656d8b2cc00a870a74e1a" dependencies = [ "chrono", "pest", @@ -1915,9 +1918,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.2" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -1926,9 +1929,9 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.7" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ "console", "instant", @@ -1939,9 +1942,9 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "inotify" @@ -2029,18 +2032,18 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "itoap" @@ -2050,9 +2053,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] @@ -2065,9 +2068,9 @@ checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -2210,12 +2213,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-sys 0.48.0", + "windows-targets 0.52.4", ] [[package]] @@ -2236,9 +2239,9 @@ dependencies = [ [[package]] name = "libproc" -version = "0.14.2" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef" +checksum = "8eb6497078a4c9c2aca63df56d8dce6eb4381d53a960f781a3a748f7ea97436d" dependencies = [ "bindgen", "errno", @@ -2247,13 +2250,12 @@ dependencies = [ [[package]] name = "libredox" -version = "0.0.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "libc", - "redox_syscall", ] [[package]] @@ -2283,9 +2285,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.15" +version = "1.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" dependencies = [ "cc", "libc", @@ -2326,9 +2328,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lru" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7" +checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" dependencies = [ "hashbrown 0.14.3", ] @@ -2356,9 +2358,9 @@ dependencies = [ [[package]] name = "lsp-types" -version = "0.95.0" +version = "0.95.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "158c1911354ef73e8fe42da6b10c0484cb65c7f1007f28022e847706c1ab6984" +checksum = "8e34d33a8e9b006cd3fc4fe69a921affa097bae4bb65f76271f4644f9a334365" dependencies = [ "bitflags 1.3.2", "serde", @@ -2437,9 +2439,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memmap2" @@ -2478,7 +2480,7 @@ checksum = "dcf09caffaac8068c346b6df2a7fc27a177fd20b39421a39ce0a211bde679a6c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -2514,9 +2516,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -2553,9 +2555,9 @@ dependencies = [ [[package]] name = "multiversion" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c7b9d7fe61760ce5ea19532ead98541f6b4c495d87247aff9826445cf6872a" +checksum = "c4851161a11d3ad0bf9402d90ffc3967bf231768bfd7aeb61755ad06dbf1a142" dependencies = [ "multiversion-macros", "target-features", @@ -2563,9 +2565,9 @@ dependencies = [ [[package]] name = "multiversion-macros" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a83d8500ed06d68877e9de1dde76c1dbb83885dcdbda4ef44ccbc3fbda2ac8" +checksum = "79a74ddee9e0c27d2578323c13905793e91622148f138ba29738f9dddb835e90" dependencies = [ "proc-macro2", "quote", @@ -2593,9 +2595,9 @@ dependencies = [ [[package]] name = "new_debug_unreachable" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nix" @@ -2603,7 +2605,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cfg-if", "cfg_aliases", "libc", @@ -2625,7 +2627,7 @@ version = "6.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "crossbeam-channel", "filetime", "fsevent-sys", @@ -2786,7 +2788,7 @@ dependencies = [ "polars-plan", "polars-utils", "serde", - "sqlparser", + "sqlparser 0.39.0", ] [[package]] @@ -2795,7 +2797,7 @@ version = "0.92.2" dependencies = [ "fancy-regex", "heck 0.5.0", - "itertools 0.12.0", + "itertools 0.12.1", "nu-ansi-term", "nu-cmd-base", "nu-cmd-lang", @@ -2818,7 +2820,7 @@ dependencies = [ name = "nu-cmd-lang" version = "0.92.2" dependencies = [ - "itertools 0.12.0", + "itertools 0.12.1", "nu-engine", "nu-parser", "nu-protocol", @@ -2866,7 +2868,7 @@ dependencies = [ "human-date-parser", "indexmap", "indicatif", - "itertools 0.12.0", + "itertools 0.12.1", "log", "lscolors", "md-5", @@ -2920,7 +2922,7 @@ dependencies = [ "tabled", "terminal_size", "titlecase", - "toml 0.8.8", + "toml 0.8.12", "trash", "umask", "unicode-segmentation", @@ -3018,7 +3020,7 @@ version = "0.92.2" dependencies = [ "bytesize", "chrono", - "itertools 0.12.0", + "itertools 0.12.1", "log", "nu-engine", "nu-path", @@ -3099,7 +3101,7 @@ dependencies = [ "serde", "serde_json", "strum", - "strum_macros 0.26.1", + "strum_macros 0.26.2", "thiserror", "typetag", ] @@ -3232,6 +3234,34 @@ dependencies = [ "semver", ] +[[package]] +name = "nu_plugin_polars" +version = "0.92.2" +dependencies = [ + "chrono", + "chrono-tz", + "fancy-regex", + "indexmap", + "nu-cmd-lang", + "nu-command", + "nu-engine", + "nu-parser", + "nu-plugin", + "nu-plugin-test-support", + "nu-protocol", + "num", + "polars", + "polars-arrow", + "polars-io", + "polars-ops", + "polars-plan", + "polars-utils", + "serde", + "sqlparser 0.43.1", + "typetag", + "uuid", +] + [[package]] name = "nu_plugin_query" version = "0.92.2" @@ -3271,13 +3301,19 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-format" version = "0.4.4" @@ -3290,19 +3326,18 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" dependencies = [ "autocfg", "num-integer", @@ -3323,9 +3358,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -3343,9 +3378,9 @@ dependencies = [ [[package]] name = "num_threads" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" dependencies = [ "libc", ] @@ -3408,9 +3443,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "open" -version = "5.1.1" +version = "5.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b3fbb0d52bf0cbb5225ba3d2c303aa136031d43abff98284332a9981ecddec" +checksum = "449f0ff855d85ddbf1edd5b646d65249ead3f5e422aaa86b7d2d0b049b103e32" dependencies = [ "is-wsl", "libc", @@ -3419,11 +3454,11 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.63" +version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cfg-if", "foreign-types", "libc", @@ -3440,7 +3475,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -3460,9 +3495,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.99" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -3473,9 +3508,9 @@ dependencies = [ [[package]] name = "ordered-multimap" -version = "0.7.1" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d6a8c22fc714f0c2373e6091bf6f5e9b37b1bc0b1184874b7e0a4e303d318f" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" dependencies = [ "dlv-list", "hashbrown 0.14.3", @@ -3573,12 +3608,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "percent-encoding" version = "2.3.1" @@ -3593,9 +3622,9 @@ checksum = "f658886ed52e196e850cfbbfddab9eaa7f6d90dd0929e264c31e5cec07e09e57" [[package]] name = "pest" -version = "2.7.6" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f200d8d83c44a45b21764d1916299752ca035d15ecd46faca3e9a2a2bf6ad06" +checksum = "311fb059dee1a7b802f036316d790138c613a4e8b180c822e3925a662e9f0c95" dependencies = [ "memchr", "thiserror", @@ -3604,9 +3633,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.6" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcd6ab1236bbdb3a49027e920e693192ebfe8913f6d60e294de57463a493cfde" +checksum = "f73541b156d32197eecda1a4014d7f868fd2bcb3c550d5386087cfba442bf69c" dependencies = [ "pest", "pest_generator", @@ -3614,22 +3643,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.6" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a31940305ffc96863a735bef7c7994a00b325a7138fdbc5bda0f1a0476d3275" +checksum = "c35eeed0a3fab112f75165fdc026b3913f4183133f19b49be773ac9ea966e8bd" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "pest_meta" -version = "2.7.6" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7ff62f5259e53b78d1af898941cdcdccfae7385cf7d793a6e55de5d05bb4b7d" +checksum = "2adbf29bb9776f28caece835398781ab24435585fe0d4dc1374a61db5accedca" dependencies = [ "once_cell", "pest", @@ -3715,7 +3744,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -3738,9 +3767,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -3750,9 +3779,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "planus" @@ -3795,7 +3824,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faacd21a2548fa6d50c72d6b8d4649a8e029a0f3c6c5545b7f436f0610e49b0f" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "atoi", "atoi_simd", "avro-schema", @@ -3857,11 +3886,12 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "befd4d280a82219a01035c4f901319ceba65998c594d0c64f9a439cdee1d7777" dependencies = [ - "ahash 0.8.7", - "bitflags 2.4.2", + "ahash 0.8.11", + "bitflags 2.5.0", "bytemuck", "chrono", "chrono-tz", + "comfy-table", "either", "hashbrown 0.14.3", "indexmap", @@ -3903,7 +3933,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b51fba2cf014cb39c2b38353d601540fb9db643be65abb9ca8ff44b9c4c4a88e" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "async-trait", "atoi_simd", "bytes", @@ -3944,7 +3974,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "973d1f40ba964e70cf0038779056a7850f649538f72d8828c21bc1a7bce312ed" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "chrono", "fallible-streaming-iterator", "hashbrown 0.14.3", @@ -3965,8 +3995,8 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d83343e413346f048f3a5ad07c0ea4b5d0bada701a482878213142970b0ddff8" dependencies = [ - "ahash 0.8.7", - "bitflags 2.4.2", + "ahash 0.8.11", + "bitflags 2.5.0", "glob", "once_cell", "polars-arrow", @@ -3989,7 +4019,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6395f5fd5e1adf016fd6403c0a493181c1a349a7a145b2687cdf50a0d630310a" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "argminmax", "base64 0.21.7", "bytemuck", @@ -4022,7 +4052,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b664cac41636cc9f146fba584a8e7c2790d7335a278964529fa3e9b4eae96daf" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "async-stream", "base64 0.21.7", "brotli", @@ -4072,7 +4102,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb7d7527be2aa33baace9000f6772eb9df7cd57ec010a4b273435d2dc1349e8" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "bytemuck", "chrono-tz", "once_cell", @@ -4119,7 +4149,7 @@ dependencies = [ "rand", "serde", "serde_json", - "sqlparser", + "sqlparser 0.39.0", ] [[package]] @@ -4149,7 +4179,7 @@ version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38f9c955bb1e9b55d835aeb7fe4e4e8826e01abe5f0ada979ceb7d2b9af7b569" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "bytemuck", "hashbrown 0.14.3", "indexmap", @@ -4247,7 +4277,7 @@ version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" dependencies = [ - "toml_edit 0.21.0", + "toml_edit 0.21.1", ] [[package]] @@ -4275,9 +4305,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -4288,7 +4318,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "chrono", "flate2", "hex", @@ -4303,7 +4333,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "chrono", "hex", ] @@ -4460,16 +4490,16 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "154b85ef15a5d1719bcaa193c3c81fe645cd120c156874cd660fe49fd21d1373" +checksum = "bcb12f8fbf6c62614b0d56eb352af54f6a22410c3b079eb53ee93c7b97dd31d8" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cassowary", "compact_str", "crossterm", "indoc", - "itertools 0.12.0", + "itertools 0.12.1", "lru", "paste", "stability", @@ -4509,9 +4539,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", @@ -4528,14 +4558,14 @@ dependencies = [ "chrono", "crossterm", "fd-lock", - "itertools 0.12.0", + "itertools 0.12.1", "nu-ansi-term", "rusqlite", "serde", "serde_json", "strip-ansi-escapes", "strum", - "strum_macros 0.26.1", + "strum_macros 0.26.2", "thiserror", "unicode-segmentation", "unicode-width", @@ -4558,14 +4588,14 @@ checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -4575,9 +4605,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -4592,9 +4622,9 @@ checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "relative-path" @@ -4604,18 +4634,18 @@ checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" [[package]] name = "rend" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" dependencies = [ "bytecheck", ] [[package]] name = "rfc2047-decoder" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e372613f15fc5171f9052b0c1fbafca5b1e5b0ba86aa13c9c39fd91ca1f7955" +checksum = "e90a668c463c412c3118ae1883e18b53d812c349f5af7a06de3ba4bb0c17cc73" dependencies = [ "base64 0.21.7", "charset", @@ -4627,9 +4657,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.43" +version = "0.7.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527a97cdfef66f65998b5f3b637c26f5a5ec09cc52a3f9932313ac645f4190f5" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" dependencies = [ "bitvec", "bytecheck", @@ -4645,9 +4675,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.43" +version = "0.7.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" dependencies = [ "proc-macro2", "quote", @@ -4721,7 +4751,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.48", + "syn 2.0.58", "unicode-ident", ] @@ -4731,7 +4761,7 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "chrono", "fallible-iterator", "fallible-streaming-iterator", @@ -4760,7 +4790,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.48", + "syn 2.0.58", "walkdir", ] @@ -4786,9 +4816,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.33.1" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06676aec5ccb8fc1da723cc8c0f9a46549f21ebb8753d3915c6c41db1e7f1dc4" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" dependencies = [ "arrayvec 0.7.4", "borsh", @@ -4823,11 +4853,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", @@ -4836,15 +4866,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -4882,7 +4912,7 @@ version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b80b33679ff7a0ea53d37f3b39de77ea0c75b12c5805ac43ec0c33b3051af1b" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "cssparser", "ego-tree", "html5ever", @@ -4899,9 +4929,9 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.9.2" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", "core-foundation", @@ -4912,9 +4942,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef" dependencies = [ "core-foundation-sys", "libc", @@ -4926,7 +4956,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cssparser", "derive_more", "fxhash", @@ -4941,9 +4971,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "seq-macro" @@ -4953,29 +4983,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "indexmap", "itoa", @@ -4985,13 +5015,13 @@ dependencies = [ [[package]] name = "serde_repr" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2e6b945e9d3df726b65d6ee24060aff8e3533d431f677a9695db04eff9dfdb" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -5017,9 +5047,9 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.30" +version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bf28c79a99f70ee1f1d83d10c875d2e70618417fda01ad1785e027579d9d38" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ "indexmap", "itoa", @@ -5050,7 +5080,7 @@ checksum = "b93fb4adc70021ac1b47f7d45e8cc4169baaa7ea58483bc5b721d19a26202212" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -5128,11 +5158,11 @@ dependencies = [ [[package]] name = "simd-json" -version = "0.13.8" +version = "0.13.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2faf8f101b9bc484337a6a6b0409cf76c139f2fb70a9e3aee6b6774be7bfbf76" +checksum = "b0b84c23a1066e1d650ebc99aa8fb9f8ed0ab96fd36e2e836173c92fc9fb29bc" dependencies = [ - "ahash 0.8.7", + "ahash 0.8.11", "getrandom", "halfbrown", "lexical-core", @@ -5158,9 +5188,9 @@ checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" [[package]] name = "simplelog" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acee08041c5de3d5048c8b3f6f13fafb3026b24ba43c6a695a0c76179b844369" +checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0" dependencies = [ "log", "termcolor", @@ -5184,9 +5214,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "smartstring" @@ -5214,12 +5244,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -5231,6 +5261,15 @@ dependencies = [ "log", ] +[[package]] +name = "sqlparser" +version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" +dependencies = [ + "log", +] + [[package]] name = "stability" version = "0.1.1" @@ -5330,17 +5369,17 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ - "strum_macros 0.26.1", + "strum_macros 0.26.2", ] [[package]] @@ -5353,20 +5392,20 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "strum_macros" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -5424,9 +5463,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -5442,7 +5481,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -5456,9 +5495,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.30.5" +version = "0.30.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb4f3438c8f6389c864e61221cbc97e9bca98b4daf39a5beb7bea660f528bb2" +checksum = "e9a84fe4cfc513b41cb2596b624e561ec9e7e1c4b46328e496ed56a53514ef2a" dependencies = [ "cfg-if", "core-foundation-sys", @@ -5489,9 +5528,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "target-features" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfb5fa503293557c5158bd215fdc225695e567a77e453f5d4452a50a193969bd" +checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "tempfile" @@ -5518,9 +5557,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] @@ -5543,9 +5582,9 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "textwrap" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" dependencies = [ "smawk", "unicode-linebreak", @@ -5554,29 +5593,29 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] name = "thread_local" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" dependencies = [ "cfg-if", "once_cell", @@ -5584,13 +5623,14 @@ dependencies = [ [[package]] name = "time" -version = "0.3.31" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" dependencies = [ "deranged", "itoa", "libc", + "num-conv", "num_threads", "powerfmt", "serde", @@ -5606,10 +5646,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" dependencies = [ + "num-conv", "time-core", ] @@ -5650,9 +5691,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.35.1" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -5693,14 +5734,14 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.8" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.21.0", + "toml_edit 0.22.9", ] [[package]] @@ -5722,20 +5763,31 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.5.40", ] [[package]] name = "toml_edit" -version = "0.21.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.22.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.5", ] [[package]] @@ -5781,13 +5833,13 @@ dependencies = [ [[package]] name = "tree_magic_mini" -version = "3.0.3" +version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91adfd0607cacf6e4babdb870e9bec4037c1c4b151cfd279ccefc5e0c7feaa6d" +checksum = "77ee137597cdb361b55a4746983e4ac1b35ab6024396a419944ad473bb915265" dependencies = [ - "bytecount", "fnv", - "lazy_static", + "home", + "memchr", "nom", "once_cell", "petgraph", @@ -5813,9 +5865,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typetag" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43148481c7b66502c48f35b8eef38b6ccdc7a9f04bd4cc294226d901ccc9bc7" +checksum = "661d18414ec032a49ece2d56eee03636e43c4e8d577047ab334c0ba892e29aaf" dependencies = [ "erased-serde", "inventory", @@ -5826,13 +5878,13 @@ dependencies = [ [[package]] name = "typetag-impl" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291db8a81af4840c10d636e047cac67664e343be44e24dfdbd1492df9a5d3390" +checksum = "ac73887f47b9312552aa90ef477927ff014d63d1920ca8037c6c1951eab64bb1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -5879,18 +5931,18 @@ checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] [[package]] name = "unicode-reverse" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bea5dacebb0d2d0a69a6700a05b59b3908bf801bf563a49bd27a1b60122962c" +checksum = "4b6f4888ebc23094adfb574fdca9fdc891826287a6397d2cd28802ffd6f20c76" dependencies = [ "unicode-segmentation", ] @@ -5915,15 +5967,15 @@ checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] name = "unsafe-libyaml" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" [[package]] name = "ureq" -version = "2.9.1" +version = "2.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cdd25c339e200129fe4de81451814e5228c9b771d57378817d6117cc2b3f97" +checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35" dependencies = [ "base64 0.21.7", "encoding_rs", @@ -6062,9 +6114,9 @@ dependencies = [ [[package]] name = "uucore_procs" -version = "0.0.24" +version = "0.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eb9aeeb06d1f15c5b3b51acddddf3436e3e1480902b2a200618ca5dbb24e392" +checksum = "f7f51594832e53b11811446b1cd3567722e2906a589a5b19622c5c4720977309" dependencies = [ "proc-macro2", "quote", @@ -6073,9 +6125,9 @@ dependencies = [ [[package]] name = "uuhelp_parser" -version = "0.0.24" +version = "0.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d841f8408028085ca65896cdd60b9925d4e407cb69989a64889f2bebbb51147b" +checksum = "ac7a6832a5add86204d5a8d0ef41c5a11e3ddf61c0f1a508f69e7e3e1bf04e3f" [[package]] name = "uuid" @@ -6084,6 +6136,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "getrandom", + "serde", ] [[package]] @@ -6183,9 +6236,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -6193,24 +6246,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6218,22 +6271,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "wax" @@ -6270,7 +6323,7 @@ version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82fb96ee935c2cea6668ccb470fb7771f6215d1691746c2d896b447a00ad3f1f" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "rustix", "wayland-backend", "wayland-scanner", @@ -6282,7 +6335,7 @@ version = "0.31.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f81f365b8b4a97f422ac0e8737c438024b5951734506b0e1d775c73030561f4" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "wayland-backend", "wayland-client", "wayland-scanner", @@ -6294,7 +6347,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad1f61b76b6c2d8742e10f9ba5c3737f6530b4c243132c2a2ccc8aa96fe25cd6" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -6325,15 +6378,14 @@ dependencies = [ [[package]] name = "which" -version = "6.0.0" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fa5e0c10bf77f44aac573e498d1a82d5fbd5e91f6fc0a99e7be4b38e85e101c" +checksum = "8211e4f58a2b2805adfbefbc07bab82958fc91e3836339b1ab7ae32465dce0d7" dependencies = [ "either", "home", - "once_cell", "rustix", - "windows-sys 0.52.0", + "winsafe", ] [[package]] @@ -6624,9 +6676,18 @@ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winnow" -version = "0.5.35" +version = "0.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1931d78a9c73861da0134f453bb1f790ce49b2e30eba8410b4b79bac72b46a2d" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" dependencies = [ "memchr", ] @@ -6651,6 +6712,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wl-clipboard-rs" version = "0.8.1" @@ -6710,9 +6777,9 @@ dependencies = [ [[package]] name = "xxhash-rust" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53be06678ed9e83edb1745eb72efc0bbcd7b5c3c35711a860906aed827a13d61" +checksum = "927da81e25be1e1a2901d59b81b37dd2efd1fc9c9345a55007f09bf5a2d3ee03" [[package]] name = "yansi" @@ -6737,7 +6804,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.58", ] [[package]] @@ -6754,27 +6821,27 @@ dependencies = [ [[package]] name = "zstd" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.0.0" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.9+zstd.1.5.5" +version = "2.0.10+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 0b9be56a57..2708d1cf31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ members = [ "crates/nu_plugin_query", "crates/nu_plugin_custom_values", "crates/nu_plugin_formats", + "crates/nu_plugin_polars", "crates/nu-std", "crates/nu-table", "crates/nu-term-grid", diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs index cad516b9eb..1c711cdd57 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs @@ -48,7 +48,6 @@ impl Command for ToLazyFrame { let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?; let lazy = NuLazyFrame::from_dataframe(df); let value = Value::custom(Box::new(lazy), call.head); - Ok(PipelineData::Value(value, None)) } } diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml new file mode 100644 index 0000000000..9a18a3650f --- /dev/null +++ b/crates/nu_plugin_polars/Cargo.toml @@ -0,0 +1,78 @@ +[package] +authors = ["The Nushell Project Developers"] +description = "Nushell dataframe plugin commands based on polars." +edition = "2021" +license = "MIT" +name = "nu_plugin_polars" +repository = "https://github.com/nushell/nushell/tree/main/crates/nu-cmd-dataframe" +version = "0.92.2" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "nu_plugin_polars" +bench = false + +[lib] +bench = false + +[dependencies] +nu-protocol = { path = "../nu-protocol", version = "0.92.2" } +nu-plugin = { path = "../nu-plugin", version = "0.92.2" } + +# Potential dependencies for extras +chrono = { workspace = true, features = ["std", "unstable-locales"], default-features = false } +chrono-tz = "0.8" +fancy-regex = { workspace = true } +indexmap = { version = "2.2" } +num = {version = "0.4"} +serde = { version = "1.0", features = ["derive"] } +sqlparser = { version = "0.43"} +polars-io = { version = "0.37", features = ["avro"]} +polars-arrow = { version = "0.37"} +polars-ops = { version = "0.37"} +polars-plan = { version = "0.37", features = ["regex"]} +polars-utils = { version = "0.37"} +typetag = "0.2" +uuid = { version = "1.7", features = ["v4", "serde"] } + +[dependencies.polars] +features = [ + "arg_where", + "checked_arithmetic", + "concat_str", + "cross_join", + "csv", + "cum_agg", + "default", + "dtype-categorical", + "dtype-datetime", + "dtype-struct", + "dtype-i8", + "dtype-i16", + "dtype-u8", + "dtype-u16", + "dynamic_group_by", + "ipc", + "is_in", + "json", + "lazy", + "object", + "parquet", + "random", + "rolling_window", + "rows", + "serde", + "serde-lazy", + "strings", + "to_dummies", +] +optional = false +version = "0.37" + +[dev-dependencies] +nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.92.2" } +nu-engine = { path = "../nu-engine", version = "0.92.2" } +nu-parser = { path = "../nu-parser", version = "0.92.2" } +nu-command = { path = "../nu-command", version = "0.92.2" } +nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.92.2" } diff --git a/crates/nu_plugin_polars/LICENSE b/crates/nu_plugin_polars/LICENSE new file mode 100644 index 0000000000..ae174e8595 --- /dev/null +++ b/crates/nu_plugin_polars/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 - 2023 The Nushell Project Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/nu_plugin_polars/src/cache.rs b/crates/nu_plugin_polars/src/cache.rs new file mode 100644 index 0000000000..a1763fbba2 --- /dev/null +++ b/crates/nu_plugin_polars/src/cache.rs @@ -0,0 +1,122 @@ +use std::{ + collections::HashMap, + sync::{Mutex, MutexGuard}, +}; + +use nu_plugin::EngineInterface; +use nu_protocol::{LabeledError, ShellError}; +use uuid::Uuid; + +use crate::{plugin_debug, values::PolarsPluginObject, PolarsPlugin}; + +#[derive(Default)] +pub struct Cache { + cache: Mutex>, +} + +impl Cache { + fn lock(&self) -> Result>, ShellError> { + self.cache.lock().map_err(|e| ShellError::GenericError { + error: format!("error acquiring cache lock: {e}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) + } + + /// Removes an item from the plugin cache. + /// The maybe_engine parameter is required outside of testing + pub fn remove( + &self, + maybe_engine: Option<&EngineInterface>, + uuid: &Uuid, + ) -> Result, ShellError> { + let mut lock = self.lock()?; + let removed = lock.remove(uuid); + plugin_debug!("PolarsPlugin: removing {uuid} from cache: {removed:?}"); + // Once there are no more entries in the cache + // we can turn plugin gc back on + match maybe_engine { + Some(engine) if lock.is_empty() => { + plugin_debug!("PolarsPlugin: Cache is empty enabling GC"); + engine.set_gc_disabled(false).map_err(LabeledError::from)?; + } + _ => (), + }; + drop(lock); + Ok(removed) + } + + /// Inserts an item into the plugin cache. + /// The maybe_engine parameter is required outside of testing + pub fn insert( + &self, + maybe_engine: Option<&EngineInterface>, + uuid: Uuid, + value: PolarsPluginObject, + ) -> Result, ShellError> { + let mut lock = self.lock()?; + plugin_debug!("PolarsPlugin: Inserting {uuid} into cache: {value:?}"); + // turn off plugin gc the first time an entry is added to the cache + // as we don't want the plugin to be garbage collected if there + // is any live data + match maybe_engine { + Some(engine) if lock.is_empty() => { + plugin_debug!("PolarsPlugin: Cache has values disabling GC"); + engine.set_gc_disabled(true).map_err(LabeledError::from)?; + } + _ => (), + }; + let result = lock.insert(uuid, value); + drop(lock); + Ok(result) + } + + pub fn get(&self, uuid: &Uuid) -> Result, ShellError> { + let lock = self.lock()?; + let result = lock.get(uuid).cloned(); + drop(lock); + Ok(result) + } + + pub fn process_entries(&self, mut func: F) -> Result, ShellError> + where + F: FnMut((&Uuid, &PolarsPluginObject)) -> Result, + { + let lock = self.lock()?; + let mut vals: Vec = Vec::new(); + for entry in lock.iter() { + eprintln!("entry: {:?}", entry); + let val = func(entry)?; + vals.push(val); + } + drop(lock); + Ok(vals) + } +} + +pub trait Cacheable: Sized + Clone { + fn cache_id(&self) -> &Uuid; + + fn to_cache_value(&self) -> Result; + + fn from_cache_value(cv: PolarsPluginObject) -> Result; + + fn cache(self, plugin: &PolarsPlugin, engine: &EngineInterface) -> Result { + plugin.cache.insert( + Some(engine), + self.cache_id().to_owned(), + self.to_cache_value()?, + )?; + Ok(self) + } + + fn get_cached(plugin: &PolarsPlugin, id: &Uuid) -> Result, ShellError> { + if let Some(cache_value) = plugin.cache.get(id)? { + Ok(Some(Self::from_cache_value(cache_value)?)) + } else { + Ok(None) + } + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/README.md b/crates/nu_plugin_polars/src/dataframe/README.md new file mode 100644 index 0000000000..593217ede6 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/README.md @@ -0,0 +1,12 @@ +# Dataframe + +This dataframe directory holds all of the definitions of the dataframe data structures and commands. + +There are three sections of commands: + +* [eager](./eager) +* [series](./series) +* [values](./values) + +For more details see the +[Nushell book section on dataframes](https://www.nushell.sh/book/dataframes.html) diff --git a/crates/nu_plugin_polars/src/dataframe/eager/append.rs b/crates/nu_plugin_polars/src/dataframe/eager/append.rs new file mode 100644 index 0000000000..4b2ae3e0d1 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/append.rs @@ -0,0 +1,144 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::{ + values::{to_pipeline_data, Axis, Column, CustomValueSupport, NuDataFrame}, + PolarsPlugin, +}; + +#[derive(Clone)] +pub struct AppendDF; + +impl PluginCommand for AppendDF { + type Plugin = PolarsPlugin; + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("other", SyntaxShape::Any, "other dataframe to append") + .switch("col", "append as new columns instead of rows", None) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } + + fn name(&self) -> &str { + "polars append" + } + + fn usage(&self) -> &str { + "Appends a new dataframe." + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Appends a dataframe as new columns", + example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df); + $a | polars append $a"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "a_x".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b_x".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Appends a dataframe merging at the end of columns", + example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df); $a | polars append $a --col"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(3), + Value::test_int(1), + Value::test_int(3), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_int(2), + Value::test_int(4), + Value::test_int(2), + Value::test_int(4), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let other: Value = call.req(0)?; + + let axis = if call.has_flag("col")? { + Axis::Column + } else { + Axis::Row + }; + + let df_other = NuDataFrame::try_from_value_coerce(plugin, &other, call.head)?; + let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?; + let df = df.append_df(&df_other, axis, call.head)?; + + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&AppendDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/cast.rs b/crates/nu_plugin_polars/src/dataframe/eager/cast.rs new file mode 100644 index 0000000000..20a56680fb --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/cast.rs @@ -0,0 +1,202 @@ +use crate::{ + dataframe::values::{str_to_dtype, to_pipeline_data, NuExpression, NuLazyFrame}, + values::{cant_convert_err, PolarsPluginObject, PolarsPluginType}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, + SyntaxShape, Type, Value, +}; +use polars::prelude::*; + +#[derive(Clone)] +pub struct CastDF; + +impl PluginCommand for CastDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars cast" + } + + fn usage(&self) -> &str { + "Cast a column to a different dtype." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .required( + "dtype", + SyntaxShape::String, + "The dtype to cast the column to", + ) + .optional( + "column", + SyntaxShape::String, + "The column to cast. Required when used with a dataframe.", + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Cast a column in a dataframe to a different dtype", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema", + result: Some(Value::record( + record! { + "a" => Value::string("u8", Span::test_data()), + "b" => Value::string("i64", Span::test_data()), + }, + Span::test_data(), + )), + }, + Example { + description: "Cast a column in a lazy dataframe to a different dtype", + example: + "[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema", + result: Some(Value::record( + record! { + "a" => Value::string("u8", Span::test_data()), + "b" => Value::string("i64", Span::test_data()), + }, + Span::test_data(), + )), + }, + Example { + description: "Cast a column in a expression to a different dtype", + example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#, + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuLazyFrame(lazy) => { + let (dtype, column_nm) = df_args(call)?; + command_lazy(plugin, engine, call, column_nm, dtype, lazy) + } + PolarsPluginObject::NuDataFrame(df) => { + let (dtype, column_nm) = df_args(call)?; + command_eager(plugin, engine, call, column_nm, dtype, df) + } + PolarsPluginObject::NuExpression(expr) => { + let dtype: String = call.req(0)?; + let dtype = str_to_dtype(&dtype, call.head)?; + let expr: NuExpression = expr.to_polars().cast(dtype).into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> { + let dtype = dtype_arg(call)?; + let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter { + param_name: "column_name".into(), + span: call.head, + })?; + Ok((dtype, column_nm)) +} + +fn dtype_arg(call: &EvaluatedCall) -> Result { + let dtype: String = call.req(0)?; + str_to_dtype(&dtype, call.head) +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + column_nm: String, + dtype: DataType, + lazy: NuLazyFrame, +) -> Result { + let column = col(&column_nm).cast(dtype); + let lazy = lazy.to_polars().with_columns(&[column]); + let lazy = NuLazyFrame::new(false, lazy); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + column_nm: String, + dtype: DataType, + nu_df: NuDataFrame, +) -> Result { + let mut df = (*nu_df.df).clone(); + let column = df + .column(&column_nm) + .map_err(|e| ShellError::GenericError { + error: format!("{e}"), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError { + error: format!("{e}"), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let _ = df + .with_column(casted) + .map_err(|e| ShellError::GenericError { + error: format!("{e}"), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::new(false, df); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&CastDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/columns.rs b/crates/nu_plugin_polars/src/dataframe/eager/columns.rs new file mode 100644 index 0000000000..892d3a5175 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/columns.rs @@ -0,0 +1,79 @@ +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct ColumnsDF; + +impl PluginCommand for ColumnsDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars columns" + } + + fn usage(&self) -> &str { + "Show dataframe columns." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Dataframe columns", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars columns", + result: Some(Value::list( + vec![Value::test_string("a"), Value::test_string("b")], + Span::test_data(), + )), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, call, input).map_err(|e| e.into()) + } +} + +fn command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let names: Vec = df + .as_ref() + .get_column_names() + .iter() + .map(|v| Value::string(*v, call.head)) + .collect(); + + let names = Value::list(names, call.head); + + Ok(PipelineData::Value(names, None)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ColumnsDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop.rs b/crates/nu_plugin_polars/src/dataframe/eager/drop.rs new file mode 100644 index 0000000000..e8a24bb328 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/drop.rs @@ -0,0 +1,127 @@ +use crate::values::to_pipeline_data; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::values::CustomValueSupport; +use crate::PolarsPlugin; + +use super::super::values::utils::convert_columns; +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct DropDF; + +impl PluginCommand for DropDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars drop" + } + + fn usage(&self) -> &str { + "Creates a new dataframe by dropping the selected columns." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest("rest", SyntaxShape::Any, "column names to be dropped") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "drop column a", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let columns: Vec = call.rest(0)?; + let (col_string, col_span) = convert_columns(columns, call.head)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let new_df = col_string + .first() + .ok_or_else(|| ShellError::GenericError { + error: "Empty names list".into(), + msg: "No column names were found".into(), + span: Some(col_span), + help: None, + inner: vec![], + }) + .and_then(|col| { + df.as_ref() + .drop(&col.item) + .map_err(|e| ShellError::GenericError { + error: "Error dropping column".into(), + msg: e.to_string(), + span: Some(col.span), + help: None, + inner: vec![], + }) + })?; + + // If there are more columns in the drop selection list, these + // are added from the resulting dataframe + let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| { + new_df + .drop(&col.item) + .map_err(|e| ShellError::GenericError { + error: "Error dropping column".into(), + msg: e.to_string(), + span: Some(col.span), + help: None, + inner: vec![], + }) + })?; + + let final_df = NuDataFrame::new(df.from_lazy, polars_df); + + to_pipeline_data(plugin, engine, call.head, final_df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&DropDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs b/crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs new file mode 100644 index 0000000000..57cd0ca642 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs @@ -0,0 +1,133 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::UniqueKeepStrategy; + +use crate::values::{to_pipeline_data, CustomValueSupport}; +use crate::PolarsPlugin; + +use super::super::values::utils::convert_columns_string; +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct DropDuplicates; + +impl PluginCommand for DropDuplicates { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars drop-duplicates" + } + + fn usage(&self) -> &str { + "Drops duplicate values in dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .optional( + "subset", + SyntaxShape::Table(vec![]), + "subset of columns to drop duplicates", + ) + .switch("maintain", "maintain order", Some('m')) + .switch( + "last", + "keeps last duplicate value (by default keeps first)", + Some('l'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "drop duplicates", + example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(3), Value::test_int(1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(2)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let columns: Option> = call.opt(0)?; + let (subset, col_span) = match columns { + Some(cols) => { + let (agg_string, col_span) = convert_columns_string(cols, call.head)?; + (Some(agg_string), col_span) + } + None => (None, call.head), + }; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let subset_slice = subset.as_ref().map(|cols| &cols[..]); + + let keep_strategy = if call.has_flag("last")? { + UniqueKeepStrategy::Last + } else { + UniqueKeepStrategy::First + }; + + let polars_df = df + .as_ref() + .unique(subset_slice, keep_strategy, None) + .map_err(|e| ShellError::GenericError { + error: "Error dropping duplicates".into(), + msg: e.to_string(), + span: Some(col_span), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::new(df.from_lazy, polars_df); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&DropDuplicates) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs b/crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs new file mode 100644 index 0000000000..fed4596338 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs @@ -0,0 +1,149 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::values::{to_pipeline_data, CustomValueSupport}; +use crate::PolarsPlugin; + +use super::super::values::utils::convert_columns_string; +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct DropNulls; + +impl PluginCommand for DropNulls { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars drop-nulls" + } + + fn usage(&self) -> &str { + "Drops null values in dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .optional( + "subset", + SyntaxShape::Table(vec![]), + "subset of columns to drop nulls", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "drop null values in dataframe", + example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df); + let res = ($df.b / $df.b); + let a = ($df | polars with-column $res --name res); + $a | polars drop-nulls"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + Column::new( + "res".to_string(), + vec![Value::test_int(1), Value::test_int(1)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "drop null values in dataframe", + example: r#"let s = ([1 2 0 0 3 4] | polars into-df); + ($s / $s) | polars drop-nulls"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "div_0_0".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(1), + Value::test_int(1), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let columns: Option> = call.opt(0)?; + + let (subset, col_span) = match columns { + Some(cols) => { + let (agg_string, col_span) = convert_columns_string(cols, call.head)?; + (Some(agg_string), col_span) + } + None => (None, call.head), + }; + + let subset_slice = subset.as_ref().map(|cols| &cols[..]); + + let polars_df = df + .as_ref() + .drop_nulls(subset_slice) + .map_err(|e| ShellError::GenericError { + error: "Error dropping nulls".into(), + msg: e.to_string(), + span: Some(col_span), + help: None, + inner: vec![], + })?; + let df = NuDataFrame::new(df.from_lazy, polars_df); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&DropNulls) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/dtypes.rs b/crates/nu_plugin_polars/src/dataframe/eager/dtypes.rs new file mode 100644 index 0000000000..106200baaa --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/dtypes.rs @@ -0,0 +1,111 @@ +use crate::PolarsPlugin; + +use super::super::values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct DataTypes; + +impl PluginCommand for DataTypes { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars dtypes" + } + + fn usage(&self) -> &str { + "Show dataframe data types." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Dataframe dtypes", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dtypes", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "column".to_string(), + vec![Value::test_string("a"), Value::test_string("b")], + ), + Column::new( + "dtype".to_string(), + vec![Value::test_string("i64"), Value::test_string("i64")], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut dtypes: Vec = Vec::new(); + let names: Vec = df + .as_ref() + .get_column_names() + .iter() + .map(|v| { + let dtype = df + .as_ref() + .column(v) + .expect("using name from list of names from dataframe") + .dtype(); + + let dtype_str = dtype.to_string(); + + dtypes.push(Value::string(dtype_str, call.head)); + + Value::string(*v, call.head) + }) + .collect(); + + let names_col = Column::new("column".to_string(), names); + let dtypes_col = Column::new("dtype".to_string(), dtypes); + + let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&DataTypes) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/dummies.rs b/crates/nu_plugin_polars/src/dataframe/eager/dummies.rs new file mode 100644 index 0000000000..4c71f92741 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/dummies.rs @@ -0,0 +1,119 @@ +use super::super::values::NuDataFrame; +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{prelude::*, series::Series}; + +#[derive(Clone)] +pub struct Dummies; + +impl PluginCommand for Dummies { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars dummies" + } + + fn usage(&self) -> &str { + "Creates a new dataframe with dummy variables." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .switch("drop-first", "Drop first row", Some('d')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create new dataframe with dummy variables from a dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dummies", + result: Some( + NuDataFrame::try_from_series_vec( + vec![ + Series::new("a_1", &[1_u8, 0]), + Series::new("a_3", &[0_u8, 1]), + Series::new("b_2", &[1_u8, 0]), + Series::new("b_4", &[0_u8, 1]), + ], + Span::test_data(), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Create new dataframe with dummy variables from a series", + example: "[1 2 2 3 3] | polars into-df | polars dummies", + result: Some( + NuDataFrame::try_from_series_vec( + vec![ + Series::new("0_1", &[1_u8, 0, 0, 0, 0]), + Series::new("0_2", &[0_u8, 1, 1, 0, 0]), + Series::new("0_3", &[0_u8, 0, 0, 1, 1]), + ], + Span::test_data(), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let drop_first: bool = call.has_flag("drop-first")?; + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let polars_df = + df.as_ref() + .to_dummies(None, drop_first) + .map_err(|e| ShellError::GenericError { + error: "Error calculating dummies".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The only allowed column types for dummies are String or Int".into()), + inner: vec![], + })?; + + let df: NuDataFrame = polars_df.into(); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Dummies) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs b/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs new file mode 100644 index 0000000000..1dcb84053f --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs @@ -0,0 +1,165 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::LazyFrame; + +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct FilterWith; + +impl PluginCommand for FilterWith { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars filter-with" + } + + fn usage(&self) -> &str { + "Filters dataframe using a mask or expression as reference." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "mask or expression", + SyntaxShape::Any, + "boolean mask used to filter data", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Filter dataframe using a bool mask", + example: r#"let mask = ([true false] | polars into-df); + [[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Filter dataframe using an expression", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(cant_convert_err( + &value, + &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], + )), + } + .map_err(LabeledError::from) + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let mask_value: Value = call.req(0)?; + let mask_span = mask_value.span(); + + if NuExpression::can_downcast(&mask_value) { + let expression = NuExpression::try_from_value(plugin, &mask_value)?; + let lazy = df.lazy(); + let lazy = lazy.apply_with_expr(expression, LazyFrame::filter); + + to_pipeline_data(plugin, engine, call.head, lazy) + } else { + let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)? + .as_series(mask_span)?; + let mask = mask.bool().map_err(|e| ShellError::GenericError { + error: "Error casting to bool".into(), + msg: e.to_string(), + span: Some(mask_span), + help: Some("Perhaps you want to use a series with booleans as mask".into()), + inner: vec![], + })?; + + let polars_df = df + .as_ref() + .filter(mask) + .map_err(|e| ShellError::GenericError { + error: "Error filtering dataframe".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The only allowed column types for dummies are String or Int".into()), + inner: vec![], + })?; + let df = NuDataFrame::new(df.from_lazy, polars_df); + to_pipeline_data(plugin, engine, call.head, df) + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let expr: Value = call.req(0)?; + let expr = NuExpression::try_from_value(plugin, &expr)?; + let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&FilterWith) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/first.rs b/crates/nu_plugin_polars/src/dataframe/eager/first.rs new file mode 100644 index 0000000000..72a0a6cc8c --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/first.rs @@ -0,0 +1,137 @@ +use crate::{ + values::{to_pipeline_data, Column, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{NuDataFrame, NuExpression}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct FirstDF; + +impl PluginCommand for FirstDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars first" + } + + fn usage(&self) -> &str { + "Show only the first number of rows or create a first expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .optional( + "rows", + SyntaxShape::Int, + "starting from the front, the number of rows to return", + ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Return the first row of a dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) + .expect("should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Return the first two rows of a dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a first expression from a column", + example: "polars col a | polars first", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(plugin, &value)?; + command(plugin, engine, call, df).map_err(|e| e.into()) + } else { + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = expr.to_polars().first().into(); + + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let rows: Option = call.opt(0)?; + let rows = rows.unwrap_or(1); + + let res = df.as_ref().head(Some(rows)); + let res = NuDataFrame::new(false, res); + + to_pipeline_data(plugin, engine, call.head, res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&FirstDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/get.rs b/crates/nu_plugin_polars/src/dataframe/eager/get.rs new file mode 100644 index 0000000000..32b8c3143a --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/get.rs @@ -0,0 +1,103 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::{ + dataframe::values::utils::convert_columns_string, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct GetDF; + +impl PluginCommand for GetDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get" + } + + fn usage(&self) -> &str { + "Creates dataframe with the selected columns." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest("rest", SyntaxShape::Any, "column names to sort dataframe") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns the selected column", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let columns: Vec = call.rest(0)?; + let (col_string, col_span) = convert_columns_string(columns, call.head)?; + + let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?; + + let df = df + .as_ref() + .select(col_string) + .map_err(|e| ShellError::GenericError { + error: "Error selecting columns".into(), + msg: e.to_string(), + span: Some(col_span), + help: None, + inner: vec![], + })?; + let df = NuDataFrame::new(false, df); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/last.rs b/crates/nu_plugin_polars/src/dataframe/eager/last.rs new file mode 100644 index 0000000000..e9a019093b --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/last.rs @@ -0,0 +1,112 @@ +use crate::{ + values::{to_pipeline_data, Column, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LastDF; + +impl PluginCommand for LastDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars last" + } + + fn usage(&self) -> &str { + "Creates new dataframe with tail rows or creates a last expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .optional("rows", SyntaxShape::Int, "Number of rows for tail") + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create new dataframe with last rows", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a last expression from a column", + example: "polars col a | polars last", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(plugin, &value)?; + command(plugin, engine, call, df).map_err(|e| e.into()) + } else { + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = expr.to_polars().last().into(); + + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let rows: Option = call.opt(0)?; + let rows = rows.unwrap_or(DEFAULT_ROWS); + + let res = df.as_ref().tail(Some(rows)); + let res = NuDataFrame::new(false, res); + to_pipeline_data(plugin, engine, call.head, res) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LastDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/list.rs b/crates/nu_plugin_polars/src/dataframe/eager/list.rs new file mode 100644 index 0000000000..1548013415 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/list.rs @@ -0,0 +1,96 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, IntoPipelineData, LabeledError, PipelineData, Signature, Value, +}; + +use crate::{values::PolarsPluginObject, PolarsPlugin}; + +#[derive(Clone)] +pub struct ListDF; + +impl PluginCommand for ListDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars ls" + } + + fn usage(&self) -> &str { + "Lists stored dataframes." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()).category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates a new dataframe and shows it in the dataframe list", + example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df); + polars ls"#, + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + let vals = plugin.cache.process_entries(|(key, value)| match value { + PolarsPluginObject::NuDataFrame(df) => Ok(Some(Value::record( + record! { + "key" => Value::string(key.to_string(), call.head), + "columns" => Value::int(df.as_ref().width() as i64, call.head), + "rows" => Value::int(df.as_ref().height() as i64, call.head), + "type" => Value::string("NuDataFrame", call.head), + }, + call.head, + ))), + PolarsPluginObject::NuLazyFrame(lf) => { + let lf = lf.clone().collect(call.head)?; + Ok(Some(Value::record( + record! { + "key" => Value::string(key.to_string(), call.head), + "columns" => Value::int(lf.as_ref().width() as i64, call.head), + "rows" => Value::int(lf.as_ref().height() as i64, call.head), + "type" => Value::string("NuLazyFrame", call.head), + }, + call.head, + ))) + } + PolarsPluginObject::NuExpression(_) => Ok(Some(Value::record( + record! { + "key" => Value::string(key.to_string(), call.head), + "columns" => Value::nothing(call.head), + "rows" => Value::nothing(call.head), + "type" => Value::string("NuExpression", call.head), + }, + call.head, + ))), + PolarsPluginObject::NuLazyGroupBy(_) => Ok(Some(Value::record( + record! { + "key" => Value::string(key.to_string(), call.head), + "columns" => Value::nothing(call.head), + "rows" => Value::nothing(call.head), + "type" => Value::string("NuLazyGroupBy", call.head), + }, + call.head, + ))), + PolarsPluginObject::NuWhen(_) => Ok(Some(Value::record( + record! { + "key" => Value::string(key.to_string(), call.head), + "columns" => Value::nothing(call.head), + "rows" => Value::nothing(call.head), + "type" => Value::string("NuWhen", call.head), + }, + call.head, + ))), + })?; + let vals = vals.into_iter().flatten().collect(); + let list = Value::list(vals, call.head); + Ok(list.into_pipeline_data()) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/melt.rs b/crates/nu_plugin_polars/src/dataframe/eager/melt.rs new file mode 100644 index 0000000000..c9e13fcfea --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/melt.rs @@ -0,0 +1,255 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, +}; + +use crate::{ + dataframe::values::utils::convert_columns_string, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct MeltDF; + +impl PluginCommand for MeltDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars melt" + } + + fn usage(&self) -> &str { + "Unpivot a DataFrame from wide to long format." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required_named( + "columns", + SyntaxShape::Table(vec![]), + "column names for melting", + Some('c'), + ) + .required_named( + "values", + SyntaxShape::Table(vec![]), + "column names used as value columns", + Some('v'), + ) + .named( + "variable-name", + SyntaxShape::String, + "optional name for variable column", + Some('r'), + ) + .named( + "value-name", + SyntaxShape::String, + "optional name for value column", + Some('l'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "melt dataframe", + example: + "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "b".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(4), + Value::test_int(5), + Value::test_int(6), + Value::test_int(4), + Value::test_int(5), + Value::test_int(6), + ], + ), + Column::new( + "variable".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("a"), + Value::test_string("a"), + Value::test_string("d"), + Value::test_string("d"), + Value::test_string("d"), + ], + ), + Column::new( + "value".to_string(), + vec![ + Value::test_string("x"), + Value::test_string("y"), + Value::test_string("z"), + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + ), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let id_col: Vec = call.get_flag("columns")?.expect("required value"); + let val_col: Vec = call.get_flag("values")?.expect("required value"); + + let value_name: Option> = call.get_flag("value-name")?; + let variable_name: Option> = call.get_flag("variable-name")?; + + let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; + let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; + + let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?; + + check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; + check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; + + let mut res = df + .as_ref() + .melt(&id_col_string, &val_col_string) + .map_err(|e| ShellError::GenericError { + error: "Error calculating melt".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + if let Some(name) = &variable_name { + res.rename("variable", &name.item) + .map_err(|e| ShellError::GenericError { + error: "Error renaming column".into(), + msg: e.to_string(), + span: Some(name.span), + help: None, + inner: vec![], + })?; + } + + if let Some(name) = &value_name { + res.rename("value", &name.item) + .map_err(|e| ShellError::GenericError { + error: "Error renaming column".into(), + msg: e.to_string(), + span: Some(name.span), + help: None, + inner: vec![], + })?; + } + + let res = NuDataFrame::new(false, res); + to_pipeline_data(plugin, engine, call.head, res) +} + +fn check_column_datatypes>( + df: &polars::prelude::DataFrame, + cols: &[T], + col_span: Span, +) -> Result<(), ShellError> { + if cols.is_empty() { + return Err(ShellError::GenericError { + error: "Merge error".into(), + msg: "empty column list".into(), + span: Some(col_span), + help: None, + inner: vec![], + }); + } + + // Checking if they are same type + if cols.len() > 1 { + for w in cols.windows(2) { + let l_series = df + .column(w[0].as_ref()) + .map_err(|e| ShellError::GenericError { + error: "Error selecting columns".into(), + msg: e.to_string(), + span: Some(col_span), + help: None, + inner: vec![], + })?; + + let r_series = df + .column(w[1].as_ref()) + .map_err(|e| ShellError::GenericError { + error: "Error selecting columns".into(), + msg: e.to_string(), + span: Some(col_span), + help: None, + inner: vec![], + })?; + + if l_series.dtype() != r_series.dtype() { + return Err(ShellError::GenericError { + error: "Merge error".into(), + msg: "found different column types in list".into(), + span: Some(col_span), + help: Some(format!( + "datatypes {} and {} are incompatible", + l_series.dtype(), + r_series.dtype() + )), + inner: vec![], + }); + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&MeltDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/mod.rs b/crates/nu_plugin_polars/src/dataframe/eager/mod.rs new file mode 100644 index 0000000000..33df6e5281 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/mod.rs @@ -0,0 +1,105 @@ +mod append; +mod cast; +mod columns; +mod drop; +mod drop_duplicates; +mod drop_nulls; +mod dtypes; +mod dummies; +mod filter_with; +mod first; +mod get; +mod last; +mod list; +mod melt; +mod open; +mod query_df; +mod rename; +mod sample; +mod schema; +mod shape; +mod slice; +mod sql_context; +mod sql_expr; +mod summary; +mod take; +mod to_arrow; +mod to_avro; +mod to_csv; +mod to_df; +mod to_json_lines; +mod to_nu; +mod to_parquet; +mod with_column; + +use crate::PolarsPlugin; + +pub use self::open::OpenDataFrame; +pub use append::AppendDF; +pub use cast::CastDF; +pub use columns::ColumnsDF; +pub use drop::DropDF; +pub use drop_duplicates::DropDuplicates; +pub use drop_nulls::DropNulls; +pub use dtypes::DataTypes; +pub use dummies::Dummies; +pub use filter_with::FilterWith; +pub use first::FirstDF; +pub use get::GetDF; +pub use last::LastDF; +pub use list::ListDF; +pub use melt::MeltDF; +use nu_plugin::PluginCommand; +pub use query_df::QueryDf; +pub use rename::RenameDF; +pub use sample::SampleDF; +pub use schema::SchemaCmd; +pub use shape::ShapeDF; +pub use slice::SliceDF; +pub use sql_context::SQLContext; +pub use summary::Summary; +pub use take::TakeDF; +pub use to_arrow::ToArrow; +pub use to_avro::ToAvro; +pub use to_csv::ToCSV; +pub use to_df::ToDataFrame; +pub use to_json_lines::ToJsonLines; +pub use to_nu::ToNu; +pub use to_parquet::ToParquet; +pub use with_column::WithColumn; + +pub(crate) fn eager_commands() -> Vec>> { + vec![ + Box::new(AppendDF), + Box::new(CastDF), + Box::new(ColumnsDF), + Box::new(DataTypes), + Box::new(DropDF), + Box::new(DropDuplicates), + Box::new(DropNulls), + Box::new(Dummies), + Box::new(FilterWith), + Box::new(GetDF), + Box::new(OpenDataFrame), + Box::new(MeltDF), + Box::new(Summary), + Box::new(FirstDF), + Box::new(LastDF), + Box::new(ListDF), + Box::new(RenameDF), + Box::new(SampleDF), + Box::new(ShapeDF), + Box::new(SliceDF), + Box::new(SchemaCmd), + Box::new(TakeDF), + Box::new(ToNu), + Box::new(ToArrow), + Box::new(ToAvro), + Box::new(ToDataFrame), + Box::new(ToCSV), + Box::new(ToJsonLines), + Box::new(ToParquet), + Box::new(QueryDf), + Box::new(WithColumn), + ] +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/open.rs b/crates/nu_plugin_polars/src/dataframe/eager/open.rs new file mode 100644 index 0000000000..ec8fe6b76e --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/open.rs @@ -0,0 +1,531 @@ +use crate::{ + dataframe::values::NuSchema, + values::{cache_and_to_value, NuLazyFrame}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; +use nu_plugin::PluginCommand; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; + +use std::{fs::File, io::BufReader, path::PathBuf}; + +use polars::prelude::{ + CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, + LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, +}; + +use polars_io::{avro::AvroReader, prelude::ParallelStrategy}; + +#[derive(Clone)] +pub struct OpenDataFrame; + +impl PluginCommand for OpenDataFrame { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars open" + } + + fn usage(&self) -> &str { + "Opens CSV, JSON, JSON lines, arrow, avro, or parquet file to create dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "file", + SyntaxShape::Filepath, + "file path to load values from", + ) + .switch("lazy", "creates a lazy dataframe", Some('l')) + .named( + "type", + SyntaxShape::String, + "File type: csv, tsv, json, parquet, arrow, avro. If omitted, derive from file extension", + Some('t'), + ) + .named( + "delimiter", + SyntaxShape::String, + "file delimiter character. CSV file", + Some('d'), + ) + .switch( + "no-header", + "Indicates if file doesn't have header. CSV file", + None, + ) + .named( + "infer-schema", + SyntaxShape::Number, + "Number of rows to infer the schema of the file. CSV file", + None, + ) + .named( + "skip-rows", + SyntaxShape::Number, + "Number of rows to skip from file. CSV file", + None, + ) + .named( + "columns", + SyntaxShape::List(Box::new(SyntaxShape::String)), + "Columns to be selected from csv file. CSV and Parquet file", + None, + ) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s') + ) + .input_output_type(Type::Any, Type::Custom("dataframe".into())) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Takes a file name and creates a dataframe", + example: "polars open test.csv", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, + _input: nu_protocol::PipelineData, + ) -> Result { + command(plugin, engine, call).map_err(|e| e.into()) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + let file: Spanned = call.req(0)?; + + let type_option: Option> = call.get_flag("type")?; + + let type_id = match &type_option { + Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)), + None => file.item.extension().map(|e| { + ( + e.to_string_lossy().into_owned(), + "Invalid extension", + file.span, + ) + }), + }; + + match type_id { + Some((e, msg, blamed)) => match e.as_str() { + "csv" | "tsv" => from_csv(plugin, engine, call), + "parquet" | "parq" => from_parquet(plugin, engine, call), + "ipc" | "arrow" => from_ipc(plugin, engine, call), + "json" => from_json(plugin, engine, call), + "jsonl" => from_jsonl(plugin, engine, call), + "avro" => from_avro(plugin, engine, call), + _ => Err(ShellError::FileNotFoundCustom { + msg: format!( + "{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json, jsonl, avro" + ), + span: blamed, + }), + }, + None => Err(ShellError::FileNotFoundCustom { + msg: "File without extension".into(), + span: file.span, + }), + } + .map(|value| PipelineData::Value(value, None)) +} + +fn from_parquet( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + if call.has_flag("lazy")? { + let file: String = call.req(0)?; + let args = ScanArgsParquet { + n_rows: None, + cache: true, + parallel: ParallelStrategy::Auto, + rechunk: false, + row_index: None, + low_memory: false, + cloud_options: None, + use_statistics: false, + hive_partitioning: false, + }; + + let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) + .map_err(|e| ShellError::GenericError { + error: "Parquet reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } else { + let file: Spanned = call.req(0)?; + let columns: Option> = call.get_flag("columns")?; + + let r = File::open(&file.item).map_err(|e| ShellError::GenericError { + error: "Error opening file".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })?; + let reader = ParquetReader::new(r); + + let reader = match columns { + None => reader, + Some(columns) => reader.with_columns(Some(columns)), + }; + + let df: NuDataFrame = reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Parquet reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } +} + +fn from_avro( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + let file: Spanned = call.req(0)?; + let columns: Option> = call.get_flag("columns")?; + + let r = File::open(&file.item).map_err(|e| ShellError::GenericError { + error: "Error opening file".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })?; + let reader = AvroReader::new(r); + + let reader = match columns { + None => reader, + Some(columns) => reader.with_columns(Some(columns)), + }; + + let df: NuDataFrame = reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Avro reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) +} + +fn from_ipc( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + if call.has_flag("lazy")? { + let file: String = call.req(0)?; + let args = ScanArgsIpc { + n_rows: None, + cache: true, + rechunk: false, + row_index: None, + memmap: true, + }; + + let df: NuLazyFrame = LazyFrame::scan_ipc(file, args) + .map_err(|e| ShellError::GenericError { + error: "IPC reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } else { + let file: Spanned = call.req(0)?; + let columns: Option> = call.get_flag("columns")?; + + let r = File::open(&file.item).map_err(|e| ShellError::GenericError { + error: "Error opening file".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })?; + let reader = IpcReader::new(r); + + let reader = match columns { + None => reader, + Some(columns) => reader.with_columns(Some(columns)), + }; + + let df: NuDataFrame = reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "IPC reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } +} + +fn from_json( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + let file: Spanned = call.req(0)?; + let file = File::open(&file.item).map_err(|e| ShellError::GenericError { + error: "Error opening file".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })?; + let maybe_schema = call + .get_flag("schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + let buf_reader = BufReader::new(file); + let reader = JsonReader::new(buf_reader); + + let reader = match maybe_schema { + Some(schema) => reader.with_schema(schema.into()), + None => reader, + }; + + let df: NuDataFrame = reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Json reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) +} + +fn from_jsonl( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + let infer_schema: Option = call.get_flag("infer-schema")?; + let maybe_schema = call + .get_flag("schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + let file: Spanned = call.req(0)?; + let file = File::open(&file.item).map_err(|e| ShellError::GenericError { + error: "Error opening file".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })?; + + let buf_reader = BufReader::new(file); + let reader = JsonReader::new(buf_reader) + .with_json_format(JsonFormat::JsonLines) + .infer_schema_len(infer_schema); + + let reader = match maybe_schema { + Some(schema) => reader.with_schema(schema.into()), + None => reader, + }; + + let df: NuDataFrame = reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Json lines reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) +} + +fn from_csv( + plugin: &PolarsPlugin, + engine: &nu_plugin::EngineInterface, + call: &nu_plugin::EvaluatedCall, +) -> Result { + let delimiter: Option> = call.get_flag("delimiter")?; + let no_header: bool = call.has_flag("no-header")?; + let infer_schema: Option = call.get_flag("infer-schema")?; + let skip_rows: Option = call.get_flag("skip-rows")?; + let columns: Option> = call.get_flag("columns")?; + + let maybe_schema = call + .get_flag("schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + if call.has_flag("lazy")? { + let file: String = call.req(0)?; + let csv_reader = LazyCsvReader::new(file); + + let csv_reader = match delimiter { + None => csv_reader, + Some(d) => { + if d.item.len() != 1 { + return Err(ShellError::GenericError { + error: "Incorrect delimiter".into(), + msg: "Delimiter has to be one character".into(), + span: Some(d.span), + help: None, + inner: vec![], + }); + } else { + let delimiter = match d.item.chars().next() { + Some(d) => d as u8, + None => unreachable!(), + }; + csv_reader.with_separator(delimiter) + } + } + }; + + let csv_reader = csv_reader.has_header(!no_header); + + let csv_reader = match maybe_schema { + Some(schema) => csv_reader.with_schema(Some(schema.into())), + None => csv_reader, + }; + + let csv_reader = match infer_schema { + None => csv_reader, + Some(r) => csv_reader.with_infer_schema_length(Some(r)), + }; + + let csv_reader = match skip_rows { + None => csv_reader, + Some(r) => csv_reader.with_skip_rows(r), + }; + + let df: NuLazyFrame = csv_reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Parquet reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } else { + let file: Spanned = call.req(0)?; + let csv_reader = CsvReader::from_path(&file.item) + .map_err(|e| ShellError::GenericError { + error: "Error creating CSV reader".into(), + msg: e.to_string(), + span: Some(file.span), + help: None, + inner: vec![], + })? + .with_encoding(CsvEncoding::LossyUtf8); + + let csv_reader = match delimiter { + None => csv_reader, + Some(d) => { + if d.item.len() != 1 { + return Err(ShellError::GenericError { + error: "Incorrect delimiter".into(), + msg: "Delimiter has to be one character".into(), + span: Some(d.span), + help: None, + inner: vec![], + }); + } else { + let delimiter = match d.item.chars().next() { + Some(d) => d as u8, + None => unreachable!(), + }; + csv_reader.with_separator(delimiter) + } + } + }; + + let csv_reader = csv_reader.has_header(!no_header); + + let csv_reader = match maybe_schema { + Some(schema) => csv_reader.with_schema(Some(schema.into())), + None => csv_reader, + }; + + let csv_reader = match infer_schema { + None => csv_reader, + Some(r) => csv_reader.infer_schema(Some(r)), + }; + + let csv_reader = match skip_rows { + None => csv_reader, + Some(r) => csv_reader.with_skip_rows(r), + }; + + let csv_reader = match columns { + None => csv_reader, + Some(columns) => csv_reader.with_columns(Some(columns)), + }; + + let df: NuDataFrame = csv_reader + .finish() + .map_err(|e| ShellError::GenericError { + error: "Parquet reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + cache_and_to_value(plugin, engine, call.head, df) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs b/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs new file mode 100644 index 0000000000..a568a17b9b --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs @@ -0,0 +1,108 @@ +use super::super::values::NuDataFrame; +use crate::dataframe::values::Column; +use crate::dataframe::{eager::SQLContext, values::NuLazyFrame}; +use crate::values::{to_pipeline_data, CustomValueSupport}; +use crate::PolarsPlugin; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +// attribution: +// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you. +// maybe we should just use the crate at some point but it's not published yet. +// https://github.com/pola-rs/polars/tree/master/polars-sql + +#[derive(Clone)] +pub struct QueryDf; + +impl PluginCommand for QueryDf { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars query" + } + + fn usage(&self) -> &str { + "Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("sql", SyntaxShape::String, "sql query") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn search_terms(&self) -> Vec<&str> { + vec!["dataframe", "sql", "search"] + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Query dataframe using SQL", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars query 'select a from df'", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let sql_query: String = call.req(0)?; + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut ctx = SQLContext::new(); + ctx.register("df", &df.df); + let df_sql = ctx + .execute(&sql_query) + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + let lazy = NuLazyFrame::new(!df.from_lazy, df_sql); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&QueryDf) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/rename.rs b/crates/nu_plugin_polars/src/dataframe/eager/rename.rs new file mode 100644 index 0000000000..a2ef981664 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/rename.rs @@ -0,0 +1,203 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::{ + dataframe::{utils::extract_strings, values::NuLazyFrame}, + values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct RenameDF; + +impl PluginCommand for RenameDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars rename" + } + + fn usage(&self) -> &str { + "Rename a dataframe column." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "columns", + SyntaxShape::Any, + "Column(s) to be renamed. A string or list of strings", + ) + .required( + "new names", + SyntaxShape::Any, + "New names for the selected column(s). A string or list of strings", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Renames a series", + example: "[5 6 7 8] | polars into-df | polars rename '0' new_name", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "new_name".to_string(), + vec![ + Value::test_int(5), + Value::test_int(6), + Value::test_int(7), + Value::test_int(8), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Renames a dataframe column", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Renames two dataframe columns", + example: + "[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b_new".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? { + PolarsPluginObject::NuDataFrame(df) => { + command_eager(plugin, engine, call, df).map_err(LabeledError::from) + } + PolarsPluginObject::NuLazyFrame(lazy) => { + command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) + } + _ => Err(LabeledError::new(format!("Unsupported type: {value:?}")) + .with_label("Unsupported Type", call.head)), + } + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let columns: Value = call.req(0)?; + let columns = extract_strings(columns)?; + + let new_names: Value = call.req(1)?; + let new_names = extract_strings(new_names)?; + + let mut polars_df = df.to_polars(); + + for (from, to) in columns.iter().zip(new_names.iter()) { + polars_df + .rename(from, to) + .map_err(|e| ShellError::GenericError { + error: "Error renaming".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + } + + let df = NuDataFrame::new(false, polars_df); + to_pipeline_data(plugin, engine, call.head, df) +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let columns: Value = call.req(0)?; + let columns = extract_strings(columns)?; + + let new_names: Value = call.req(1)?; + let new_names = extract_strings(new_names)?; + + if columns.len() != new_names.len() { + let value: Value = call.req(1)?; + return Err(ShellError::IncompatibleParametersSingle { + msg: "New name list has different size to column list".into(), + span: value.span(), + }); + } + + let lazy = lazy.to_polars(); + let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into(); + + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&RenameDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/sample.rs b/crates/nu_plugin_polars/src/dataframe/eager/sample.rs new file mode 100644 index 0000000000..3ad7514251 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/sample.rs @@ -0,0 +1,138 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, +}; +use polars::prelude::NamedFrom; +use polars::series::Series; + +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct SampleDF; + +impl PluginCommand for SampleDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars sample" + } + + fn usage(&self) -> &str { + "Create sample dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "n-rows", + SyntaxShape::Int, + "number of rows to be taken from dataframe", + Some('n'), + ) + .named( + "fraction", + SyntaxShape::Number, + "fraction of dataframe to be taken", + Some('f'), + ) + .named( + "seed", + SyntaxShape::Number, + "seed for the selection", + Some('s'), + ) + .switch("replace", "sample with replace", Some('e')) + .switch("shuffle", "shuffle sample", Some('u')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Sample rows from dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars sample --n-rows 1", + result: None, // No expected value because sampling is random + }, + Example { + description: "Shows sample row using fraction and replace", + example: + "[[a b]; [1 2] [3 4] [5 6]] | polars into-df | polars sample --fraction 0.5 --replace", + result: None, // No expected value because sampling is random + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let rows: Option> = call.get_flag("n-rows")?; + let fraction: Option> = call.get_flag("fraction")?; + let seed: Option = call.get_flag::("seed")?.map(|val| val as u64); + let replace: bool = call.has_flag("replace")?; + let shuffle: bool = call.has_flag("shuffle")?; + + let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?; + + let df = match (rows, fraction) { + (Some(rows), None) => df + .as_ref() + .sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed) + .map_err(|e| ShellError::GenericError { + error: "Error creating sample".into(), + msg: e.to_string(), + span: Some(rows.span), + help: None, + inner: vec![], + }), + (None, Some(frac)) => df + .as_ref() + .sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed) + .map_err(|e| ShellError::GenericError { + error: "Error creating sample".into(), + msg: e.to_string(), + span: Some(frac.span), + help: None, + inner: vec![], + }), + (Some(_), Some(_)) => Err(ShellError::GenericError { + error: "Incompatible flags".into(), + msg: "Only one selection criterion allowed".into(), + span: Some(call.head), + help: None, + inner: vec![], + }), + (None, None) => Err(ShellError::GenericError { + error: "No selection".into(), + msg: "No selection criterion was found".into(), + span: Some(call.head), + help: Some("Perhaps you want to use the flag -n or -f".into()), + inner: vec![], + }), + }; + let df = NuDataFrame::new(false, df?); + to_pipeline_data(plugin, engine, call.head, df) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/schema.rs b/crates/nu_plugin_polars/src/dataframe/eager/schema.rs new file mode 100644 index 0000000000..b55d8ee5e2 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/schema.rs @@ -0,0 +1,133 @@ +use crate::{values::PolarsPluginObject, PolarsPlugin}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct SchemaCmd; + +impl PluginCommand for SchemaCmd { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars schema" + } + + fn usage(&self) -> &str { + "Show schema for a dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .switch("datatype-list", "creates a lazy dataframe", Some('l')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Dataframe schema", + example: r#"[[a b]; [1 "foo"] [3 "bar"]] | polars into-df | polars schema"#, + result: Some(Value::record( + record! { + "a" => Value::string("i64", Span::test_data()), + "b" => Value::string("str", Span::test_data()), + }, + Span::test_data(), + )), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + if call.has_flag("datatype-list")? { + Ok(PipelineData::Value(datatype_list(Span::unknown()), None)) + } else { + command(plugin, engine, call, input).map_err(LabeledError::from) + } + } +} + +fn command( + plugin: &PolarsPlugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? { + PolarsPluginObject::NuDataFrame(df) => { + let schema = df.schema(); + let value: Value = schema.into(); + Ok(PipelineData::Value(value, None)) + } + PolarsPluginObject::NuLazyFrame(lazy) => { + let schema = lazy.schema()?; + let value: Value = schema.into(); + Ok(PipelineData::Value(value, None)) + } + _ => Err(ShellError::GenericError { + error: "Must be a dataframe or lazy dataframe".into(), + msg: "".into(), + span: Some(call.head), + help: None, + inner: vec![], + }), + } +} + +fn datatype_list(span: Span) -> Value { + let types: Vec = [ + ("null", ""), + ("bool", ""), + ("u8", ""), + ("u16", ""), + ("u32", ""), + ("u64", ""), + ("i8", ""), + ("i16", ""), + ("i32", ""), + ("i64", ""), + ("f32", ""), + ("f64", ""), + ("str", ""), + ("binary", ""), + ("date", ""), + ("datetime", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."), + ("duration", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."), + ("time", ""), + ("object", ""), + ("unknown", ""), + ("list", ""), + ] + .iter() + .map(|(dtype, note)| { + Value::record(record! { + "dtype" => Value::string(*dtype, span), + "note" => Value::string(*note, span), + }, + span) + }) + .collect(); + Value::list(types, span) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&SchemaCmd) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/shape.rs b/crates/nu_plugin_polars/src/dataframe/eager/shape.rs new file mode 100644 index 0000000000..7be6d2088c --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/shape.rs @@ -0,0 +1,94 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +use crate::{ + dataframe::values::Column, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ShapeDF; + +impl PluginCommand for ShapeDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars shape" + } + + fn usage(&self) -> &str { + "Shows column and row size for a dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Shows row and column shape", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars shape", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("rows".to_string(), vec![Value::test_int(2)]), + Column::new("columns".to_string(), vec![Value::test_int(2)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let rows = Value::int(df.as_ref().height() as i64, call.head); + + let cols = Value::int(df.as_ref().width() as i64, call.head); + + let rows_col = Column::new("rows".to_string(), vec![rows]); + let cols_col = Column::new("columns".to_string(), vec![cols]); + + let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ShapeDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/slice.rs b/crates/nu_plugin_polars/src/dataframe/eager/slice.rs new file mode 100644 index 0000000000..38370703c8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/slice.rs @@ -0,0 +1,95 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use crate::{ + dataframe::values::Column, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct SliceDF; + +impl PluginCommand for SliceDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars slice" + } + + fn usage(&self) -> &str { + "Creates new dataframe from a slice of rows." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("offset", SyntaxShape::Int, "start of slice") + .required("size", SyntaxShape::Int, "size of slice") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Create new dataframe from a slice of the rows", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let offset: i64 = call.req(0)?; + let size: usize = call.req(1)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let res = df.as_ref().slice(offset, size); + let res = NuDataFrame::new(false, res); + + to_pipeline_data(plugin, engine, call.head, res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&SliceDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/sql_context.rs b/crates/nu_plugin_polars/src/dataframe/eager/sql_context.rs new file mode 100644 index 0000000000..f558904344 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/sql_context.rs @@ -0,0 +1,228 @@ +use crate::dataframe::eager::sql_expr::parse_sql_expr; +use polars::error::{ErrString, PolarsError}; +use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame}; +use sqlparser::ast::{ + Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor, + Value as SQLValue, +}; +use sqlparser::dialect::GenericDialect; +use sqlparser::parser::Parser; +use std::collections::HashMap; + +#[derive(Default)] +pub struct SQLContext { + table_map: HashMap, + dialect: GenericDialect, +} + +impl SQLContext { + pub fn new() -> Self { + Self { + table_map: HashMap::new(), + dialect: GenericDialect, + } + } + + pub fn register(&mut self, name: &str, df: &DataFrame) { + self.table_map.insert(name.to_owned(), df.clone().lazy()); + } + + fn execute_select(&self, select_stmt: &Select) -> Result { + // Determine involved dataframe + // Implicit join require some more work in query parsers, Explicit join are preferred for now. + let tbl = select_stmt.from.first().ok_or_else(|| { + PolarsError::ComputeError(ErrString::from("No table found in select statement")) + })?; + let mut alias_map = HashMap::new(); + let tbl_name = match &tbl.relation { + TableFactor::Table { name, alias, .. } => { + let tbl_name = name + .0 + .first() + .ok_or_else(|| { + PolarsError::ComputeError(ErrString::from( + "No table found in select statement", + )) + })? + .value + .to_string(); + if self.table_map.contains_key(&tbl_name) { + if let Some(alias) = alias { + alias_map.insert(alias.name.value.clone(), tbl_name.to_owned()); + }; + tbl_name + } else { + return Err(PolarsError::ComputeError( + format!("Table name {tbl_name} was not found").into(), + )); + } + } + // Support bare table, optional with alias for now + _ => return Err(PolarsError::ComputeError("Not implemented".into())), + }; + let df = &self.table_map[&tbl_name]; + let mut raw_projection_before_alias: HashMap = HashMap::new(); + let mut contain_wildcard = false; + // Filter Expression + let df = match select_stmt.selection.as_ref() { + Some(expr) => { + let filter_expression = parse_sql_expr(expr)?; + df.clone().filter(filter_expression) + } + None => df.clone(), + }; + // Column Projections + let projection = select_stmt + .projection + .iter() + .enumerate() + .map(|(i, select_item)| { + Ok(match select_item { + SelectItem::UnnamedExpr(expr) => { + let expr = parse_sql_expr(expr)?; + raw_projection_before_alias.insert(format!("{expr:?}"), i); + expr + } + SelectItem::ExprWithAlias { expr, alias } => { + let expr = parse_sql_expr(expr)?; + raw_projection_before_alias.insert(format!("{expr:?}"), i); + expr.alias(&alias.value) + } + SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => { + contain_wildcard = true; + col("*") + } + }) + }) + .collect::, PolarsError>>()?; + // Check for group by + // After projection since there might be number. + let group_by = match &select_stmt.group_by { + GroupByExpr::All => + Err( + PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into()) + )?, + GroupByExpr::Expressions(expressions) => expressions + } + .iter() + .map( + |e|match e { + SqlExpr::Value(SQLValue::Number(idx, _)) => { + let idx = match idx.parse::() { + Ok(0)| Err(_) => Err( + PolarsError::ComputeError( + format!("Group-By Error: Only positive number or expression are supported, got {idx}").into() + )), + Ok(idx) => Ok(idx) + }?; + Ok(projection[idx].clone()) + } + SqlExpr::Value(_) => Err( + PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into()) + ), + _ => parse_sql_expr(e) + } + ) + .collect::, PolarsError>>()?; + + let df = if group_by.is_empty() { + df.select(projection) + } else { + // check groupby and projection due to difference between SQL and polars + // Return error on wild card, shouldn't process this + if contain_wildcard { + return Err(PolarsError::ComputeError( + "Group-By Error: Can't process wildcard in group-by".into(), + )); + } + // Default polars group by will have group by columns at the front + // need some container to contain position of group by columns and its position + // at the final agg projection, check the schema for the existence of group by column + // and its projections columns, keeping the original index + let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by + .iter() + .map(|expr| raw_projection_before_alias.get(&format!("{expr:?}"))) + .enumerate() + .filter(|(_, proj_p)| proj_p.is_some()) + .map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p))) + .unzip(); + let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection + .iter() + .enumerate() + .filter(|(i, _)| !exclude_expr.contains(i)) + .enumerate() + .map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len()))) + .unzip(); + let agg_df = df.group_by(group_by).agg(agg_projection); + let mut final_proj_pos = groupby_pos + .into_iter() + .chain(agg_proj_pos) + .collect::>(); + + final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb)); + let final_proj = final_proj_pos + .into_iter() + .map(|(_, shm_p)| { + col(agg_df + .clone() + // FIXME: had to do this mess to get get_index to work, not sure why. need help + .collect() + .unwrap_or_default() + .schema() + .get_at_index(shm_p) + .unwrap_or((&"".into(), &DataType::Null)) + .0) + }) + .collect::>(); + agg_df.select(final_proj) + }; + Ok(df) + } + + pub fn execute(&self, query: &str) -> Result { + let ast = Parser::parse_sql(&self.dialect, query) + .map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?; + if ast.len() != 1 { + Err(PolarsError::ComputeError( + "One and only one statement at a time please".into(), + )) + } else { + let ast = ast + .first() + .ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?; + Ok(match ast { + Statement::Query(query) => { + let rs = match &*query.body { + SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?, + _ => { + return Err(PolarsError::ComputeError( + "INSERT, UPDATE is not supported for polars".into(), + )) + } + }; + match &query.limit { + Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => { + let nrow = nrow.parse().map_err(|err| { + PolarsError::ComputeError( + format!("Conversion Error: {err:?}").into(), + ) + })?; + rs.limit(nrow) + } + None => rs, + _ => { + return Err(PolarsError::ComputeError( + "Only support number argument to LIMIT clause".into(), + )) + } + } + } + _ => { + return Err(PolarsError::ComputeError( + format!("Statement type {ast:?} is not supported").into(), + )) + } + }) + } + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs b/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs new file mode 100644 index 0000000000..9c0728ea5f --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs @@ -0,0 +1,200 @@ +use polars::error::PolarsError; +use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit}; + +use sqlparser::ast::{ + ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, + Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType, +}; + +fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result { + Ok(match data_type { + SQLDataType::Char(_) + | SQLDataType::Varchar(_) + | SQLDataType::Uuid + | SQLDataType::Clob(_) + | SQLDataType::Text + | SQLDataType::String(_) => DataType::String, + SQLDataType::Float(_) => DataType::Float32, + SQLDataType::Real => DataType::Float32, + SQLDataType::Double => DataType::Float64, + SQLDataType::TinyInt(_) => DataType::Int8, + SQLDataType::UnsignedTinyInt(_) => DataType::UInt8, + SQLDataType::SmallInt(_) => DataType::Int16, + SQLDataType::UnsignedSmallInt(_) => DataType::UInt16, + SQLDataType::Int(_) => DataType::Int32, + SQLDataType::UnsignedInt(_) => DataType::UInt32, + SQLDataType::BigInt(_) => DataType::Int64, + SQLDataType::UnsignedBigInt(_) => DataType::UInt64, + + SQLDataType::Boolean => DataType::Boolean, + SQLDataType::Date => DataType::Date, + SQLDataType::Time(_, _) => DataType::Time, + SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None), + SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds), + SQLDataType::Array(array_type_def) => match array_type_def { + ArrayElemTypeDef::AngleBracket(inner_type) + | ArrayElemTypeDef::SquareBracket(inner_type) => { + DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)) + } + _ => { + return Err(PolarsError::ComputeError( + "SQL Datatype Array(None) was not supported in polars-sql yet!".into(), + )) + } + }, + _ => { + return Err(PolarsError::ComputeError( + format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(), + )) + } + }) +} + +fn cast_(expr: Expr, data_type: &SQLDataType) -> Result { + let polars_type = map_sql_polars_datatype(data_type)?; + Ok(expr.cast(polars_type)) +} + +fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result { + Ok(match op { + SQLBinaryOperator::Plus => left + right, + SQLBinaryOperator::Minus => left - right, + SQLBinaryOperator::Multiply => left * right, + SQLBinaryOperator::Divide => left / right, + SQLBinaryOperator::Modulo => left % right, + SQLBinaryOperator::StringConcat => { + left.cast(DataType::String) + right.cast(DataType::String) + } + SQLBinaryOperator::Gt => left.gt(right), + SQLBinaryOperator::Lt => left.lt(right), + SQLBinaryOperator::GtEq => left.gt_eq(right), + SQLBinaryOperator::LtEq => left.lt_eq(right), + SQLBinaryOperator::Eq => left.eq(right), + SQLBinaryOperator::NotEq => left.eq(right).not(), + SQLBinaryOperator::And => left.and(right), + SQLBinaryOperator::Or => left.or(right), + SQLBinaryOperator::Xor => left.xor(right), + _ => { + return Err(PolarsError::ComputeError( + format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(), + )) + } + }) +} + +fn literal_expr(value: &SqlValue) -> Result { + Ok(match value { + SqlValue::Number(s, _) => { + // Check for existence of decimal separator dot + if s.contains('.') { + s.parse::().map(lit).map_err(|_| { + PolarsError::ComputeError(format!("Can't parse literal {s:?}").into()) + }) + } else { + s.parse::().map(lit).map_err(|_| { + PolarsError::ComputeError(format!("Can't parse literal {s:?}").into()) + }) + }? + } + SqlValue::SingleQuotedString(s) => lit(s.clone()), + SqlValue::NationalStringLiteral(s) => lit(s.clone()), + SqlValue::HexStringLiteral(s) => lit(s.clone()), + SqlValue::DoubleQuotedString(s) => lit(s.clone()), + SqlValue::Boolean(b) => lit(*b), + SqlValue::Null => Expr::Literal(LiteralValue::Null), + _ => { + return Err(PolarsError::ComputeError( + format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(), + )) + } + }) +} + +pub fn parse_sql_expr(expr: &SqlExpr) -> Result { + Ok(match expr { + SqlExpr::Identifier(e) => col(&e.value), + SqlExpr::BinaryOp { left, op, right } => { + let left = parse_sql_expr(left)?; + let right = parse_sql_expr(right)?; + binary_op_(left, right, op)? + } + SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?, + SqlExpr::Cast { + expr, + data_type, + format: _, + } => cast_(parse_sql_expr(expr)?, data_type)?, + SqlExpr::Nested(expr) => parse_sql_expr(expr)?, + SqlExpr::Value(value) => literal_expr(value)?, + _ => { + return Err(PolarsError::ComputeError( + format!("Expression: {expr:?} was not supported in polars-sql yet!").into(), + )) + } + }) +} + +fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result { + Ok(match &window_type { + Some(wtype) => match wtype { + WindowType::WindowSpec(window_spec) => { + // Process for simple window specification, partition by first + let partition_by = window_spec + .partition_by + .iter() + .map(parse_sql_expr) + .collect::>>()?; + expr.over(partition_by) + // Order by and Row range may not be supported at the moment + } + // TODO: make NamedWindow work + WindowType::NamedWindow(_named) => { + return Err(PolarsError::ComputeError( + format!("Expression: {expr:?} was not supported in polars-sql yet!").into(), + )) + } + }, + None => expr, + }) +} + +fn parse_sql_function(sql_function: &SQLFunction) -> Result { + use sqlparser::ast::{FunctionArg, FunctionArgExpr}; + // Function name mostly do not have name space, so it mostly take the first args + let function_name = sql_function.name.0[0].value.to_ascii_lowercase(); + let args = sql_function + .args + .iter() + .map(|arg| match arg { + FunctionArg::Named { arg, .. } => arg, + FunctionArg::Unnamed(arg) => arg, + }) + .collect::>(); + Ok( + match ( + function_name.as_str(), + args.as_slice(), + sql_function.distinct, + ) { + ("sum", [FunctionArgExpr::Expr(expr)], false) => { + apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum() + } + ("count", [FunctionArgExpr::Expr(expr)], false) => { + apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count() + } + ("count", [FunctionArgExpr::Expr(expr)], true) => { + apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique() + } + // Special case for wildcard args to count function. + ("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(), + _ => { + return Err(PolarsError::ComputeError( + format!( + "Function {function_name:?} with args {args:?} was not supported in polars-sql yet!" + ) + .into(), + )) + } + }, + ) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs new file mode 100644 index 0000000000..dd1ad3df61 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs @@ -0,0 +1,293 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::{ + chunked_array::ChunkedArray, + prelude::{ + AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray, + QuantileInterpolOptions, Series, StringType, + }, +}; + +#[derive(Clone)] +pub struct Summary; + +impl PluginCommand for Summary { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars summary" + } + + fn usage(&self) -> &str { + "For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .category(Category::Custom("dataframe".into())) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .named( + "quantiles", + SyntaxShape::Table(vec![]), + "provide optional quantiles", + Some('q'), + ) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "list dataframe descriptives", + example: "[[a b]; [1 1] [1 1]] | polars into-df | polars summary", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "descriptor".to_string(), + vec![ + Value::test_string("count"), + Value::test_string("sum"), + Value::test_string("mean"), + Value::test_string("median"), + Value::test_string("std"), + Value::test_string("min"), + Value::test_string("25%"), + Value::test_string("50%"), + Value::test_string("75%"), + Value::test_string("max"), + ], + ), + Column::new( + "a (i64)".to_string(), + vec![ + Value::test_float(2.0), + Value::test_float(2.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(0.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + ], + ), + Column::new( + "b (i64)".to_string(), + vec![ + Value::test_float(2.0), + Value::test_float(2.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(0.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + Value::test_float(1.0), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let quantiles: Option> = call.get_flag("quantiles")?; + let quantiles = quantiles.map(|values| { + values + .iter() + .map(|value| { + let span = value.span(); + match value { + Value::Float { val, .. } => { + if (&0.0..=&1.0).contains(&val) { + Ok(*val) + } else { + Err(ShellError::GenericError { + error: "Incorrect value for quantile".into(), + msg: "value should be between 0 and 1".into(), + span: Some(span), + help: None, + inner: vec![], + }) + } + } + Value::Error { error, .. } => Err(*error.clone()), + _ => Err(ShellError::GenericError { + error: "Incorrect value for quantile".into(), + msg: "value should be a float".into(), + span: Some(span), + help: None, + inner: vec![], + }), + } + }) + .collect::, ShellError>>() + }); + + let quantiles = match quantiles { + Some(quantiles) => quantiles?, + None => vec![0.25, 0.50, 0.75], + }; + + let mut quantiles_labels = quantiles + .iter() + .map(|q| Some(format!("{}%", q * 100.0))) + .collect::>>(); + let mut labels = vec![ + Some("count".to_string()), + Some("sum".to_string()), + Some("mean".to_string()), + Some("median".to_string()), + Some("std".to_string()), + Some("min".to_string()), + ]; + labels.append(&mut quantiles_labels); + labels.push(Some("max".to_string())); + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let names = ChunkedArray::::from_slice_options("descriptor", &labels).into_series(); + + let head = std::iter::once(names); + + let tail = df + .as_ref() + .get_columns() + .iter() + .filter(|col| !matches!(col.dtype(), &DataType::Object("object", _))) + .map(|col| { + let count = col.len() as f64; + + let sum = col.sum_as_series().ok().and_then(|series| { + series + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }) + }); + + let mean = match col.mean_as_series().get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }; + + let median = match col.median_as_series() { + Ok(v) => match v.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }, + _ => None, + }; + + let std = match col.std_as_series(0) { + Ok(v) => match v.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }, + _ => None, + }; + + let min = col.min_as_series().ok().and_then(|series| { + series + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }) + }); + + let mut quantiles = quantiles + .clone() + .into_iter() + .map(|q| { + col.quantile_as_series(q, QuantileInterpolOptions::default()) + .ok() + .and_then(|ca| ca.cast(&DataType::Float64).ok()) + .and_then(|ca| match ca.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }) + }) + .collect::>>(); + + let max = col.max_as_series().ok().and_then(|series| { + series + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { + Ok(AnyValue::Float64(v)) => Some(v), + _ => None, + }) + }); + + let mut descriptors = vec![Some(count), sum, mean, median, std, min]; + descriptors.append(&mut quantiles); + descriptors.push(max); + + let name = format!("{} ({})", col.name(), col.dtype()); + ChunkedArray::::from_slice_options(&name, &descriptors).into_series() + }); + + let res = head.chain(tail).collect::>(); + + let polars_df = DataFrame::new(res).map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::new(df.from_lazy, polars_df); + + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Summary) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/take.rs b/crates/nu_plugin_polars/src/dataframe/eager/take.rs new file mode 100644 index 0000000000..87601080d5 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/take.rs @@ -0,0 +1,162 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::DataType; + +use crate::{ + dataframe::values::Column, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct TakeDF; + +impl PluginCommand for TakeDF { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars take" + } + + fn usage(&self) -> &str { + "Creates new dataframe using the given indices." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "indices", + SyntaxShape::Any, + "list of indices used to take data", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Takes selected rows from dataframe", + example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | polars into-df); + let indices = ([0 2] | polars into-df); + $df | polars take $indices"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(4), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Takes selected rows from series", + example: r#"let series = ([4 1 5 2 4 3] | polars into-df); + let indices = ([0 2] | polars into-df); + $series | polars take $indices"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(4), Value::test_int(5)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let index_value: Value = call.req(0)?; + let index_span = index_value.span(); + let index = NuDataFrame::try_from_value(plugin, &index_value)?.as_series(index_span)?; + + let casted = match index.dtype() { + DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => index + .cast(&DataType::UInt32) + .map_err(|e| ShellError::GenericError { + error: "Error casting index list".into(), + msg: e.to_string(), + span: Some(index_span), + help: None, + inner: vec![], + }), + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: "Series with incorrect type".into(), + span: Some(call.head), + help: Some("Consider using a Series with type int type".into()), + inner: vec![], + }), + }?; + + let indices = casted.u32().map_err(|e| ShellError::GenericError { + error: "Error casting index list".into(), + msg: e.to_string(), + span: Some(index_span), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let polars_df = df + .to_polars() + .take(indices) + .map_err(|e| ShellError::GenericError { + error: "Error taking values".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::new(df.from_lazy, polars_df); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&TakeDF) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs new file mode 100644 index 0000000000..fe09b27851 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs @@ -0,0 +1,87 @@ +use std::{fs::File, path::PathBuf}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; +use polars::prelude::{IpcWriter, SerWriter}; + +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToArrow; + +impl PluginCommand for ToArrow { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars to-arrow" + } + + fn usage(&self) -> &str { + "Saves dataframe to arrow file." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("file", SyntaxShape::Filepath, "file path to save dataframe") + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Saves dataframe to arrow file", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-arrow test.arrow", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, call, input).map_err(|e| e.into()) + } +} + +fn command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let file_name: Spanned = call.req(0)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { + error: "Error with file name".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + IpcWriter::new(&mut file) + .finish(&mut df.to_polars()) + .map_err(|e| ShellError::GenericError { + error: "Error saving file".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); + + Ok(PipelineData::Value( + Value::list(vec![file_value], call.head), + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs new file mode 100644 index 0000000000..aec58893ad --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs @@ -0,0 +1,117 @@ +use std::{fs::File, path::PathBuf}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; +use polars_io::avro::{AvroCompression, AvroWriter}; +use polars_io::SerWriter; + +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToAvro; + +impl PluginCommand for ToAvro { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars to-avro" + } + + fn usage(&self) -> &str { + "Saves dataframe to avro file." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "compression", + SyntaxShape::String, + "use compression, supports deflate or snappy", + Some('c'), + ) + .required("file", SyntaxShape::Filepath, "file path to save dataframe") + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Saves dataframe to avro file", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-avro test.avro", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn get_compression(call: &EvaluatedCall) -> Result, ShellError> { + if let Some((compression, span)) = call + .get_flag_value("compression") + .map(|e| e.as_str().map(|s| (s.to_owned(), e.span()))) + .transpose()? + { + match compression.as_ref() { + "snappy" => Ok(Some(AvroCompression::Snappy)), + "deflate" => Ok(Some(AvroCompression::Deflate)), + _ => Err(ShellError::IncorrectValue { + msg: "compression must be one of deflate or snappy".to_string(), + val_span: span, + call_span: span, + }), + } + } else { + Ok(None) + } +} + +fn command( + plugin: &PolarsPlugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let file_name: Spanned = call.req(0)?; + let compression = get_compression(call)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { + error: "Error with file name".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + AvroWriter::new(file) + .with_compression(compression) + .finish(&mut df.to_polars()) + .map_err(|e| ShellError::GenericError { + error: "Error saving file".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); + + Ok(PipelineData::Value( + Value::list(vec![file_value], call.head), + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs new file mode 100644 index 0000000000..460bda79c8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs @@ -0,0 +1,133 @@ +use std::{fs::File, path::PathBuf}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; +use polars::prelude::{CsvWriter, SerWriter}; + +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToCSV; + +impl PluginCommand for ToCSV { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars to-csv" + } + + fn usage(&self) -> &str { + "Saves dataframe to CSV file." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("file", SyntaxShape::Filepath, "file path to save dataframe") + .named( + "delimiter", + SyntaxShape::String, + "file delimiter character", + Some('d'), + ) + .switch("no-header", "Indicates if file doesn't have header", None) + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Saves dataframe to CSV file", + example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv", + result: None, + }, + Example { + description: "Saves dataframe to CSV file using other delimiter", + example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv --delimiter '|'", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, call, input).map_err(|e| e.into()) + } +} + +fn command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let file_name: Spanned = call.req(0)?; + let delimiter: Option> = call.get_flag("delimiter")?; + let no_header: bool = call.has_flag("no-header")?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { + error: "Error with file name".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let writer = CsvWriter::new(&mut file); + + let writer = if no_header { + writer.include_header(false) + } else { + writer.include_header(true) + }; + + let mut writer = match delimiter { + None => writer, + Some(d) => { + if d.item.len() != 1 { + return Err(ShellError::GenericError { + error: "Incorrect delimiter".into(), + msg: "Delimiter has to be one char".into(), + span: Some(d.span), + help: None, + inner: vec![], + }); + } else { + let delimiter = match d.item.chars().next() { + Some(d) => d as u8, + None => unreachable!(), + }; + + writer.with_separator(delimiter) + } + } + }; + + writer + .finish(&mut df.to_polars()) + .map_err(|e| ShellError::GenericError { + error: "Error writing to file".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); + + Ok(PipelineData::Value( + Value::list(vec![file_value], call.head), + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_df.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_df.rs new file mode 100644 index 0000000000..758feacf70 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_df.rs @@ -0,0 +1,201 @@ +use crate::{ + dataframe::values::NuSchema, + values::{to_pipeline_data, Column, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; +use polars::{ + prelude::{AnyValue, DataType, Field, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct ToDataFrame; + +impl PluginCommand for ToDataFrame { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars into-df" + } + + fn usage(&self) -> &str { + "Converts a list, table or record into a dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s'), + ) + .input_output_type(Type::Any, Type::Custom("dataframe".into())) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Takes a dictionary and creates a dataframe", + example: "[[a b];[1 2] [3 4]] | polars into-df", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Takes a list of tables and creates a dataframe", + example: "[[1 2 a] [3 4 b] [5 6 c]] | polars into-df", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], + ), + Column::new( + "1".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "2".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Takes a list and creates a dataframe", + example: "[a b c] | polars into-df", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Takes a list of booleans and creates a dataframe", + example: "[true true false] | polars into-df", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Convert to a dataframe and provide a schema", + example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list}, c: list}", + result: Some( + NuDataFrame::try_from_series_vec(vec![ + Series::new("a", &[1u8]), + { + let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]); + let vals = vec![AnyValue::StructOwned( + Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1]; + Series::from_any_values_and_dtype("b", &vals, &dtype, false) + .expect("Struct series should not fail") + }, + { + let dtype = DataType::List(Box::new(DataType::String)); + let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))]; + Series::from_any_values_and_dtype("c", &vals, &dtype, false) + .expect("List series should not fail") + } + ], Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Convert to a dataframe and provide a schema that adds a new column", + example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: u8, b:str, c:i64} | polars fill-null 3"#, + result: Some(NuDataFrame::try_from_series_vec(vec![ + Series::new("a", [1u8, 2]), + Series::new("b", ["foo", "bar"]), + Series::new("c", [3i64, 3]), + ], Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + } + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let maybe_schema = call + .get_flag("schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?; + to_pipeline_data(plugin, engine, call.head, df).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + use nu_protocol::ShellError; + + #[test] + fn test_into_df() -> Result<(), ShellError> { + test_polars_plugin_command(&ToDataFrame) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs new file mode 100644 index 0000000000..fa313db895 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs @@ -0,0 +1,89 @@ +use std::{fs::File, io::BufWriter, path::PathBuf}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; +use polars::prelude::{JsonWriter, SerWriter}; + +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToJsonLines; + +impl PluginCommand for ToJsonLines { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars to-jsonl" + } + + fn usage(&self) -> &str { + "Saves dataframe to a JSON lines file." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("file", SyntaxShape::Filepath, "file path to save dataframe") + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Saves dataframe to JSON lines file", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-jsonl test.jsonl", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let file_name: Spanned = call.req(0)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { + error: "Error with file name".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + let buf_writer = BufWriter::new(file); + + JsonWriter::new(buf_writer) + .finish(&mut df.to_polars()) + .map_err(|e| ShellError::GenericError { + error: "Error saving file".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); + + Ok(PipelineData::Value( + Value::list(vec![file_value], call.head), + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs new file mode 100644 index 0000000000..832396b139 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs @@ -0,0 +1,144 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, + SyntaxShape, Type, Value, +}; + +use crate::{dataframe::values::NuExpression, values::CustomValueSupport, PolarsPlugin}; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToNu; + +impl PluginCommand for ToNu { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars into-nu" + } + + fn usage(&self) -> &str { + "Converts a dataframe or an expression into into nushell value for access and exploration." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "rows", + SyntaxShape::Number, + "number of rows to be shown", + Some('n'), + ) + .switch("tail", "shows tail rows", Some('t')) + .input_output_types(vec![ + (Type::Custom("expression".into()), Type::Any), + (Type::Custom("dataframe".into()), Type::Table(vec![])), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + let rec_1 = Value::test_record(record! { + "index" => Value::test_int(0), + "a" => Value::test_int(1), + "b" => Value::test_int(2), + }); + let rec_2 = Value::test_record(record! { + "index" => Value::test_int(1), + "a" => Value::test_int(3), + "b" => Value::test_int(4), + }); + let rec_3 = Value::test_record(record! { + "index" => Value::test_int(2), + "a" => Value::test_int(3), + "b" => Value::test_int(4), + }); + + vec![ + Example { + description: "Shows head rows from dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars into-nu", + result: Some(Value::list(vec![rec_1, rec_2], Span::test_data())), + }, + Example { + description: "Shows tail rows from dataframe", + example: + "[[a b]; [1 2] [5 6] [3 4]] | polars into-df | polars into-nu --tail --rows 1", + result: Some(Value::list(vec![rec_3], Span::test_data())), + }, + Example { + description: "Convert a col expression into a nushell value", + example: "polars col a | polars into-nu", + result: Some(Value::test_record(record! { + "expr" => Value::test_string("column"), + "value" => Value::test_string("a"), + })), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + dataframe_command(plugin, call, value) + } else { + expression_command(plugin, call, value) + } + .map_err(|e| e.into()) + } +} + +fn dataframe_command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: Value, +) -> Result { + let rows: Option = call.get_flag("rows")?; + let tail: bool = call.has_flag("tail")?; + + let df = NuDataFrame::try_from_value(plugin, &input)?; + + let values = if tail { + df.tail(rows, call.head)? + } else { + // if rows is specified, return those rows, otherwise return everything + if rows.is_some() { + df.head(rows, call.head)? + } else { + df.head(Some(df.height()), call.head)? + } + }; + + let value = Value::list(values, call.head); + + Ok(PipelineData::Value(value, None)) +} + +fn expression_command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: Value, +) -> Result { + let expr = NuExpression::try_from_value(plugin, &input)?; + let value = expr.to_value(call.head)?; + + Ok(PipelineData::Value(value, None)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToNu) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs new file mode 100644 index 0000000000..571d5e040d --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs @@ -0,0 +1,87 @@ +use std::{fs::File, path::PathBuf}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape, + Type, Value, +}; +use polars::prelude::ParquetWriter; + +use crate::PolarsPlugin; + +use super::super::values::NuDataFrame; + +#[derive(Clone)] +pub struct ToParquet; + +impl PluginCommand for ToParquet { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars to-parquet" + } + + fn usage(&self) -> &str { + "Saves dataframe to parquet file." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("file", SyntaxShape::Filepath, "file path to save dataframe") + .input_output_type(Type::Custom("dataframe".into()), Type::Any) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Saves dataframe to parquet file", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-parquet test.parquet", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let file_name: Spanned = call.req(0)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { + error: "Error with file name".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + let mut polars_df = df.to_polars(); + ParquetWriter::new(file) + .finish(&mut polars_df) + .map_err(|e| ShellError::GenericError { + error: "Error saving file".into(), + msg: e.to_string(), + span: Some(file_name.span), + help: None, + inner: vec![], + })?; + + let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); + + Ok(PipelineData::Value( + Value::list(vec![file_value], call.head), + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs b/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs new file mode 100644 index 0000000000..049d0beeb7 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs @@ -0,0 +1,195 @@ +use super::super::values::{Column, NuDataFrame}; +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame}, + values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct WithColumn; + +impl PluginCommand for WithColumn { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars with-column" + } + + fn usage(&self) -> &str { + "Adds a series to the dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named("name", SyntaxShape::String, "new column name", Some('n')) + .rest( + "series or expressions", + SyntaxShape::Any, + "series to be added or expressions used to define the new columns", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Adds a series to the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | polars into-df + | polars with-column ([5 6] | polars into-df) --name c"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(5), Value::test_int(6)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Adds a series to the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | polars into-lazy + | polars with-column [ + ((polars col a) * 2 | polars as "c") + ((polars col a) * 3 | polars as "d") + ] + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(2), Value::test_int(6)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(3), Value::test_int(9)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(ShellError::CantConvert { + to_type: "lazy or eager dataframe".into(), + from_type: value.get_type().to_string(), + span: value.span(), + help: None, + }), + } + .map_err(LabeledError::from) + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let new_column: Value = call.req(0)?; + let column_span = new_column.span(); + + if NuExpression::can_downcast(&new_column) { + let vals: Vec = call.rest(0)?; + let value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, value)?; + let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions)); + let df = lazy.collect(call.head)?; + to_pipeline_data(plugin, engine, call.head, df) + } else { + let mut other = NuDataFrame::try_from_value(plugin, &new_column)?.as_series(column_span)?; + + let name = match call.get_flag::("name")? { + Some(name) => name, + None => other.name().to_string(), + }; + + let series = other.rename(&name).clone(); + + let mut polars_df = df.to_polars(); + polars_df + .with_column(series) + .map_err(|e| ShellError::GenericError { + error: "Error adding column to dataframe".into(), + msg: e.to_string(), + span: Some(column_span), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::new(df.from_lazy, polars_df); + to_pipeline_data(plugin, engine, call.head, df) + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let vals: Vec = call.rest(0)?; + let value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, value)?; + let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into(); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&WithColumn) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/alias.rs b/crates/nu_plugin_polars/src/dataframe/expressions/alias.rs new file mode 100644 index 0000000000..7d9b851997 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/alias.rs @@ -0,0 +1,88 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::NuExpression; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct ExprAlias; + +impl PluginCommand for ExprAlias { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars as" + } + + fn usage(&self) -> &str { + "Creates an alias expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "Alias name", + SyntaxShape::String, + "Alias name for the expression", + ) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates and alias expression", + example: "polars col a | polars as new_a | polars into-nu", + result: { + let record = Value::test_record(record! { + "expr" => Value::test_record(record! { + "expr" => Value::test_string("column"), + "value" => Value::test_string("a"), + }), + "alias" => Value::test_string("new_a"), + }); + + Some(record) + }, + }] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["aka", "abbr", "otherwise"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let alias: String = call.req(0)?; + + let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?; + let expr: NuExpression = expr.to_polars().alias(alias.as_str()).into(); + + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprAlias) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/arg_where.rs b/crates/nu_plugin_polars/src/dataframe/expressions/arg_where.rs new file mode 100644 index 0000000000..924c0472bc --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/arg_where.rs @@ -0,0 +1,79 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; +use polars::prelude::arg_where; + +#[derive(Clone)] +pub struct ExprArgWhere; + +impl PluginCommand for ExprArgWhere { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-where" + } + + fn usage(&self) -> &str { + "Creates an expression that returns the arguments where expression is true." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("column name", SyntaxShape::Any, "Expression to evaluate") + .input_output_type(Type::Any, Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Return a dataframe where the value match the expression", + example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df); + $df | polars select (polars arg-where ((polars col b) >= 2) | polars as b_arg)", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b_arg".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["condition", "match", "if"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + let value: Value = call.req(0)?; + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = arg_where(expr.to_polars()).into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprArgWhere) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/col.rs b/crates/nu_plugin_polars/src/dataframe/expressions/col.rs new file mode 100644 index 0000000000..536a38fce5 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/col.rs @@ -0,0 +1,70 @@ +use crate::{dataframe::values::NuExpression, values::to_pipeline_data, PolarsPlugin}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value, +}; +use polars::prelude::col; + +#[derive(Clone)] +pub struct ExprCol; + +impl PluginCommand for ExprCol { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars col" + } + + fn usage(&self) -> &str { + "Creates a named column expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "column name", + SyntaxShape::String, + "Name of column to be used", + ) + .input_output_type(Type::Any, Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates a named column expression and converts it to a nu object", + example: "polars col a | polars into-nu", + result: Some(Value::test_record(record! { + "expr" => Value::test_string("column"), + "value" => Value::test_string("a"), + })), + }] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["create"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + let name: String = call.req(0)?; + let expr: NuExpression = col(name.as_str()).into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprCol) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/concat_str.rs b/crates/nu_plugin_polars/src/dataframe/expressions/concat_str.rs new file mode 100644 index 0000000000..818e9f7e31 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/concat_str.rs @@ -0,0 +1,109 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; +use polars::prelude::concat_str; + +#[derive(Clone)] +pub struct ExprConcatStr; + +impl PluginCommand for ExprConcatStr { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars concat-str" + } + + fn usage(&self) -> &str { + "Creates a concat string expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "separator", + SyntaxShape::String, + "Separator used during the concatenation", + ) + .required( + "concat expressions", + SyntaxShape::List(Box::new(SyntaxShape::Any)), + "Expression(s) that define the string concatenation", + ) + .input_output_type(Type::Any, Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates a concat string expression", + example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | polars into-df); + $df | polars with-column ((polars concat-str "-" [(polars col a) (polars col b) ((polars col c) * 2)]) | polars as concat)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("three")], + ), + Column::new( + "b".to_string(), + vec![Value::test_string("two"), Value::test_string("four")], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "concat".to_string(), + vec![ + Value::test_string("one-two-2"), + Value::test_string("three-four-4"), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["join", "connect", "update"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + let separator: String = call.req(0)?; + let value: Value = call.req(1)?; + + let expressions = NuExpression::extract_exprs(plugin, value)?; + let expr: NuExpression = concat_str(expressions, &separator, false).into(); + + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprConcatStr) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/datepart.rs b/crates/nu_plugin_polars/src/dataframe/expressions/datepart.rs new file mode 100644 index 0000000000..bec32ebbfb --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/datepart.rs @@ -0,0 +1,165 @@ +use super::super::values::NuExpression; + +use crate::{ + dataframe::values::{Column, NuDataFrame}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use chrono::{DateTime, FixedOffset}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, +}; +use polars::{ + datatypes::{DataType, TimeUnit}, + prelude::NamedFrom, + series::Series, +}; + +#[derive(Clone)] +pub struct ExprDatePart; + +impl PluginCommand for ExprDatePart { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars datepart" + } + + fn usage(&self) -> &str { + "Creates an expression for capturing the specified datepart in a column." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "Datepart name", + SyntaxShape::String, + "Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond", + ) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + let dt = DateTime::::parse_from_str( + "2021-12-30T01:02:03.123456789 +0000", + "%Y-%m-%dT%H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"); + vec![ + Example { + description: "Creates an expression to capture the year date part", + example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("datetime".to_string(), vec![Value::test_date(dt)]), + Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates an expression to capture multiple date parts", + example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | + polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ), + (polars col datetime | polars datepart month | polars as datetime_month ), + (polars col datetime | polars datepart day | polars as datetime_day ), + (polars col datetime | polars datepart hour | polars as datetime_hour ), + (polars col datetime | polars datepart minute | polars as datetime_minute ), + (polars col datetime | polars datepart second | polars as datetime_second ), + (polars col datetime | polars datepart nanosecond | polars as datetime_ns ) ]"#, + result: Some( + NuDataFrame::try_from_series_vec( + vec![ + Series::new("datetime", &[dt.timestamp_nanos_opt()]) + .cast(&DataType::Datetime(TimeUnit::Nanoseconds, None)) + .expect("Error casting to datetime type"), + Series::new("datetime_year", &[2021_i64]), // i32 was coerced to i64 + Series::new("datetime_month", &[12_i8]), + Series::new("datetime_day", &[30_i8]), + Series::new("datetime_hour", &[1_i8]), + Series::new("datetime_minute", &[2_i8]), + Series::new("datetime_second", &[3_i8]), + Series::new("datetime_ns", &[123456789_i64]), // i32 was coerced to i64 + ], + Span::test_data(), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn search_terms(&self) -> Vec<&str> { + vec![ + "year", + "month", + "week", + "weekday", + "quarter", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let part: Spanned = call.req(0)?; + + let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?; + let expr_dt = expr.to_polars().dt(); + let expr: NuExpression = match part.item.as_str() { + "year" => expr_dt.year(), + "quarter" => expr_dt.quarter(), + "month" => expr_dt.month(), + "week" => expr_dt.week(), + "day" => expr_dt.day(), + "hour" => expr_dt.hour(), + "minute" => expr_dt.minute(), + "second" => expr_dt.second(), + "millisecond" => expr_dt.millisecond(), + "microsecond" => expr_dt.microsecond(), + "nanosecond" => expr_dt.nanosecond(), + _ => { + return Err(LabeledError::from(ShellError::UnsupportedInput { + msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item), + input: "value originates from here".to_string(), + msg_span: call.head, + input_span: part.span, + })) + } + }.into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ExprDatePart) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs new file mode 100644 index 0000000000..7f53409450 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs @@ -0,0 +1,645 @@ +/// Definition of multiple Expression commands using a macro rule +/// All of these expressions have an identical body and only require +/// to have a change in the name, description and expression function +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; +use crate::values::{to_pipeline_data, CustomValueSupport}; +use crate::PolarsPlugin; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +// The structs defined in this file are structs that form part of other commands +// since they share a similar name +macro_rules! expr_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .usage($desc) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let expr = NuExpression::try_from_pipeline(plugin, input, call.head) + .map_err(LabeledError::from)?; + let expr: NuExpression = expr.to_polars().$func().into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; + + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .usage($desc) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("expression".into())) + .plugin_examples($examples) + } + + fn run( + &self, + _plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let expr = NuExpression::try_from_pipeline(input, call.head) + .map_err(LabeledError::from)?; + let expr: NuExpression = expr.into_polars().$func($ddof).into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; +} + +// The structs defined in this file are structs that form part of other commands +// since they share a similar name +macro_rules! lazy_expr_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .usage($desc) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) + .map_err(LabeledError::from)?; + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars() + .$func() + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + help: None, + span: None, + inner: vec![], + }) + .map_err(LabeledError::from)?, + ); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } else { + let expr = + NuExpression::try_from_value(plugin, &value).map_err(LabeledError::from)?; + let expr: NuExpression = expr.to_polars().$func().into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; + + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) + .map_err(LabeledError::from)?; + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars() + .$func($ddof) + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + help: None, + span: None, + inner: vec![], + }) + .map_err(LabeledError::from)?, + ); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } else { + let expr = NuExpression::try_from_value(plugin, &value)?; + let expr: NuExpression = expr.to_polars().$func($ddof).into(); + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; +} + +// ExprList command +// Expands to a command definition for a list expression +expr_command!( + ExprList, + "polars implode", + "Aggregates a group to a Series.", + vec![Example { + description: "", + example: "", + result: None, + }], + implode, + test_implode +); + +// ExprAggGroups command +// Expands to a command definition for a agg groups expression +expr_command!( + ExprAggGroups, + "polars agg-groups", + "Creates an agg_groups expression.", + vec![Example { + description: "", + example: "", + result: None, + }], + agg_groups, + test_groups +); + +// ExprCount command +// Expands to a command definition for a count expression +expr_command!( + ExprCount, + "polars count", + "Creates a count expression.", + vec![Example { + description: "", + example: "", + result: None, + }], + count, + test_count +); + +// ExprNot command +// Expands to a command definition for a not expression +expr_command!( + ExprNot, + "polars expr-not", + "Creates a not expression.", + vec![Example { + description: "Creates a not expression", + example: "(polars col a) > 2) | polars expr-not", + result: None, + },], + not, + test_not +); + +// ExprMax command +// Expands to a command definition for max aggregation +lazy_expr_command!( + ExprMax, + "polars max", + "Creates a max expression or aggregates columns to their max value.", + vec![ + Example { + description: "Max value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars max", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(6)],), + Column::new("b".to_string(), vec![Value::test_int(4)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Max aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars max)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + max, + test_max +); + +// ExprMin command +// Expands to a command definition for min aggregation +lazy_expr_command!( + ExprMin, + "polars min", + "Creates a min expression or aggregates columns to their min value.", + vec![ + Example { + description: "Min value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars min", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(1)],), + Column::new("b".to_string(), vec![Value::test_int(1)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Min aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars min)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(1)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + min, + test_min +); + +// ExprSum command +// Expands to a command definition for sum aggregation +lazy_expr_command!( + ExprSum, + "polars sum", + "Creates a sum expression for an aggregation or aggregates columns to their sum value.", + vec![ + Example { + description: "Sums all columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sum", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(11)],), + Column::new("b".to_string(), vec![Value::test_int(7)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Sum aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars sum)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(6), Value::test_int(1)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + sum, + test_sum +); + +// ExprMean command +// Expands to a command definition for mean aggregation +lazy_expr_command!( + ExprMean, + "polars mean", + "Creates a mean expression for an aggregation or aggregates columns to their mean value.", + vec![ + Example { + description: "Mean value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars mean", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Mean aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars mean)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + mean, + test_mean +); + +// ExprStd command +// Expands to a command definition for std aggregation +lazy_expr_command!( + ExprStd, + "polars std", + "Creates a std expression for an aggregation of std value from columns in a dataframe.", + vec![ + Example { + description: "Std value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars std", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(2.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Std aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars std)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + std, + test_std, + 1 +); + +// ExprVar command +// Expands to a command definition for var aggregation +lazy_expr_command!( + ExprVar, + "polars var", + "Create a var expression for an aggregation.", + vec![ + Example { + description: + "Var value from columns in a dataframe or aggregates columns to their var value", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars var", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Var aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars var)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + var, + test_var, + 1 +); diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs new file mode 100644 index 0000000000..4402b11adf --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs @@ -0,0 +1,200 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{is_in, lit, DataType, IntoSeries}; + +#[derive(Clone)] +pub struct ExprIsIn; + +impl PluginCommand for ExprIsIn { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars is-in" + } + + fn usage(&self) -> &str { + "Creates an is-in expression or checks to see if the elements are contained in the right series" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("list", SyntaxShape::Any, "List to check if values are in") + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Creates a is-in expression", + example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df); + $df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_string("one"), + Value::test_string("two"), + Value::test_string("three"), + ], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + ), + Column::new( + "a_in".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Checks if elements from a series are contained in right series", + example: r#"let other = ([1 3 6] | polars into-df); + [5 6 6 6 8 8 8] | polars into-df | polars is-in $other"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_in".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["check", "contained", "is-contain", "match"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => { + command_df(plugin, engine, call, lazy.collect(call.head)?) + } + PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr), + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command_expr( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, +) -> Result { + let list: Vec = call.req(0)?; + + let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?; + let list = values.as_series(call.head)?; + + if matches!(list.dtype(), DataType::Object(..)) { + return Err(ShellError::IncompatibleParametersSingle { + msg: "Cannot use a mixed list as argument".into(), + span: call.head, + }); + } + + let expr: NuExpression = expr.to_polars().is_in(lit(list)).into(); + to_pipeline_data(plugin, engine, call.head, expr) +} + +fn command_df( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let other_value: Value = call.req(0)?; + let other_span = other_value.span(); + let other_df = NuDataFrame::try_from_value(plugin, &other_value)?; + let other = other_df.as_series(other_span)?; + let series = df.as_series(call.head)?; + + let mut res = is_in(&series, &other) + .map_err(|e| ShellError::GenericError { + error: "Error finding in other".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename("is_in"); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ExprIsIn) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs b/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs new file mode 100644 index 0000000000..48fc53b35d --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs @@ -0,0 +1,73 @@ +use crate::{ + dataframe::values::NuExpression, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct ExprLit; + +impl PluginCommand for ExprLit { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars lit" + } + + fn usage(&self) -> &str { + "Creates a literal expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "literal", + SyntaxShape::Any, + "literal to construct the expression", + ) + .input_output_type(Type::Any, Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Created a literal expression and converts it to a nu object", + example: "polars lit 2 | polars into-nu", + result: Some(Value::test_record(record! { + "expr" => Value::test_string("literal"), + "value" => Value::test_string("2"), + })), + }] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["string", "literal", "expression"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + let literal: Value = call.req(0)?; + let expr = NuExpression::try_from_value(plugin, &literal)?; + to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprLit) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/mod.rs b/crates/nu_plugin_polars/src/dataframe/expressions/mod.rs new file mode 100644 index 0000000000..055b836dac --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/mod.rs @@ -0,0 +1,48 @@ +mod alias; +mod arg_where; +mod col; +mod concat_str; +mod datepart; +mod expressions_macro; +mod is_in; +mod lit; +mod otherwise; +mod when; + +use nu_plugin::PluginCommand; + +pub use crate::dataframe::expressions::alias::ExprAlias; +pub use crate::dataframe::expressions::arg_where::ExprArgWhere; +pub use crate::dataframe::expressions::col::ExprCol; +pub use crate::dataframe::expressions::concat_str::ExprConcatStr; +pub use crate::dataframe::expressions::datepart::ExprDatePart; +pub use crate::dataframe::expressions::expressions_macro::*; +pub use crate::dataframe::expressions::is_in::ExprIsIn; +pub use crate::dataframe::expressions::lit::ExprLit; +pub use crate::dataframe::expressions::otherwise::ExprOtherwise; +pub use crate::dataframe::expressions::when::ExprWhen; +use crate::PolarsPlugin; + +pub(crate) fn expr_commands() -> Vec>> { + vec![ + Box::new(ExprAlias), + Box::new(ExprArgWhere), + Box::new(ExprAggGroups), + Box::new(ExprCol), + Box::new(ExprConcatStr), + Box::new(ExprCount), + Box::new(ExprDatePart), + Box::new(ExprIsIn), + Box::new(ExprList), + Box::new(ExprLit), + Box::new(ExprNot), + Box::new(ExprMax), + Box::new(ExprMin), + Box::new(ExprOtherwise), + Box::new(ExprSum), + Box::new(ExprMean), + Box::new(ExprStd), + Box::new(ExprVar), + Box::new(ExprWhen), + ] +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs new file mode 100644 index 0000000000..5a5764b205 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs @@ -0,0 +1,122 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen, NuWhenType}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct ExprOtherwise; + +impl PluginCommand for ExprOtherwise { + type Plugin = PolarsPlugin; + fn name(&self) -> &str { + "polars otherwise" + } + + fn usage(&self) -> &str { + "Completes a when expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "otherwise expression", + SyntaxShape::Any, + "expression to apply when no when predicate matches", + ) + .input_output_type(Type::Any, Type::Custom("expression".into())) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create a when conditions", + example: "polars when ((polars col a) > 2) 4 | polars otherwise 5", + result: None, + }, + Example { + description: "Create a when conditions", + example: + "polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6 | polars otherwise 0", + result: None, + }, + Example { + description: "Create a new column for the dataframe", + example: r#"[[a b]; [6 2] [1 4] [4 1]] + | polars into-lazy + | polars with-column ( + polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c + ) + | polars with-column ( + polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d + ) + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["condition", "else"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let otherwise_predicate: Value = call.req(0)?; + let otherwise_predicate = NuExpression::try_from_value(plugin, &otherwise_predicate)?; + + let value = input.into_value(call.head); + let complete: NuExpression = match NuWhen::try_from_value(plugin, &value)?.when_type { + NuWhenType::Then(then) => then.otherwise(otherwise_predicate.to_polars()).into(), + NuWhenType::ChainedThen(chained_when) => chained_when + .otherwise(otherwise_predicate.to_polars()) + .into(), + }; + to_pipeline_data(plugin, engine, call.head, complete).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprOtherwise) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/when.rs b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs new file mode 100644 index 0000000000..7d3b608e61 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs @@ -0,0 +1,144 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen}, + values::{to_pipeline_data, CustomValueSupport, NuWhenType}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; +use polars::prelude::when; + +#[derive(Clone)] +pub struct ExprWhen; + +impl PluginCommand for ExprWhen { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars when" + } + + fn usage(&self) -> &str { + "Creates and modifies a when expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "when expression", + SyntaxShape::Any, + "when expression used for matching", + ) + .required( + "then expression", + SyntaxShape::Any, + "expression that will be applied when predicate is true", + ) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create a when conditions", + example: "polars when ((polars col a) > 2) 4", + result: None, + }, + Example { + description: "Create a when conditions", + example: "polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6", + result: None, + }, + Example { + description: "Create a new column for the dataframe", + example: r#"[[a b]; [6 2] [1 4] [4 1]] + | polars into-lazy + | polars with-column ( + polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c + ) + | polars with-column ( + polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d + ) + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn search_terms(&self) -> Vec<&str> { + vec!["condition", "match", "if", "else"] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let when_predicate: Value = call.req(0)?; + let when_predicate = NuExpression::try_from_value(plugin, &when_predicate)?; + + let then_predicate: Value = call.req(1)?; + let then_predicate = NuExpression::try_from_value(plugin, &then_predicate)?; + + let value = input.into_value(call.head); + let when_then: NuWhen = match value { + Value::Nothing { .. } => when(when_predicate.to_polars()) + .then(then_predicate.to_polars()) + .into(), + v => match NuWhen::try_from_value(plugin, &v)?.when_type { + NuWhenType::Then(when_then) => when_then + .when(when_predicate.to_polars()) + .then(then_predicate.to_polars()) + .into(), + NuWhenType::ChainedThen(when_then_then) => when_then_then + .when(when_predicate.to_polars()) + .then(then_predicate.to_polars()) + .into(), + }, + }; + + to_pipeline_data(plugin, engine, call.head, when_then).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&ExprWhen) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs new file mode 100644 index 0000000000..25ba033859 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs @@ -0,0 +1,210 @@ +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy}, + values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame}, + PolarsPlugin, +}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::{datatypes::DataType, prelude::Expr}; + +#[derive(Clone)] +pub struct LazyAggregate; + +impl PluginCommand for LazyAggregate { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars agg" + } + + fn usage(&self) -> &str { + "Performs a series of aggregations from a group-by." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "Group-by expressions", + SyntaxShape::Any, + "Expression(s) that define the aggregations to be applied", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | polars into-df + | polars group-by a + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ]"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | polars into-lazy + | polars group-by a + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ] + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(0)?; + let value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, value)?; + + let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?; + + for expr in expressions.iter() { + if let Some(name) = get_col_name(expr) { + let dtype = group_by.schema.schema.get(name.as_str()); + + if matches!(dtype, Some(DataType::Object(..))) { + return Err(ShellError::GenericError { + error: "Object type column not supported for aggregation".into(), + msg: format!("Column '{name}' is type Object"), + span: Some(call.head), + help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()), + inner: vec![], + }).map_err(|e| e.into()); + } + } + } + + let polars = group_by.to_polars(); + let lazy = NuLazyFrame::new(false, polars.agg(&expressions)); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } +} + +fn get_col_name(expr: &Expr) -> Option { + match expr { + Expr::Column(column) => Some(column.to_string()), + Expr::Agg(agg) => match agg { + polars::prelude::AggExpr::Min { input: e, .. } + | polars::prelude::AggExpr::Max { input: e, .. } + | polars::prelude::AggExpr::Median(e) + | polars::prelude::AggExpr::NUnique(e) + | polars::prelude::AggExpr::First(e) + | polars::prelude::AggExpr::Last(e) + | polars::prelude::AggExpr::Mean(e) + | polars::prelude::AggExpr::Implode(e) + | polars::prelude::AggExpr::Count(e, _) + | polars::prelude::AggExpr::Sum(e) + | polars::prelude::AggExpr::AggGroups(e) + | polars::prelude::AggExpr::Std(e, _) + | polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()), + polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()), + }, + Expr::Filter { input: expr, .. } + | Expr::Slice { input: expr, .. } + | Expr::Cast { expr, .. } + | Expr::Sort { expr, .. } + | Expr::Gather { expr, .. } + | Expr::SortBy { expr, .. } + | Expr::Exclude(expr, _) + | Expr::Alias(expr, _) + | Expr::KeepName(expr) + | Expr::Explode(expr) => get_col_name(expr.as_ref()), + Expr::Ternary { .. } + | Expr::AnonymousFunction { .. } + | Expr::Function { .. } + | Expr::Columns(_) + | Expr::DtypeColumn(_) + | Expr::Literal(_) + | Expr::BinaryExpr { .. } + | Expr::Window { .. } + | Expr::Wildcard + | Expr::RenameAlias { .. } + | Expr::Len + | Expr::Nth(_) + | Expr::SubPlan(_, _) + | Expr::Selector(_) => None, + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyAggregate) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs new file mode 100644 index 0000000000..9cfe903497 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs @@ -0,0 +1,98 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame}, + values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, + Cacheable, PolarsPlugin, +}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value}; + +#[derive(Clone)] +pub struct LazyCollect; + +impl PluginCommand for LazyCollect { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars collect" + } + + fn usage(&self) -> &str { + "Collect lazy dataframe into eager dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "drop duplicates", + example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuLazyFrame(lazy) => { + let eager = lazy.collect(call.head)?; + Ok(PipelineData::Value( + eager.cache(plugin, engine)?.into_value(call.head), + None, + )) + } + PolarsPluginObject::NuDataFrame(df) => { + // just return the dataframe, add to cache again to be safe + Ok(PipelineData::Value( + df.cache(plugin, engine)?.into_value(call.head), + None, + )) + } + _ => Err(cant_convert_err( + &value, + &[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame], + )), + } + .map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&LazyCollect) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs b/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs new file mode 100644 index 0000000000..e388630a8d --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs @@ -0,0 +1,175 @@ +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; +use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject}; +use crate::PolarsPlugin; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazyExplode; + +impl PluginCommand for LazyExplode { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars explode" + } + + fn usage(&self) -> &str { + "Explodes a dataframe or creates a explode expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "columns", + SyntaxShape::String, + "columns to explode, only applicable for dataframes", + ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Explode the specified dataframe", + example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "id".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ]), + Column::new( + "name".to_string(), + vec![ + Value::test_string("Mercy"), + Value::test_string("Mercy"), + Value::test_string("Bob"), + Value::test_string("Bob"), + ]), + Column::new( + "hobbies".to_string(), + vec![ + Value::test_string("Cycling"), + Value::test_string("Knitting"), + Value::test_string("Skiing"), + Value::test_string("Football"), + ]), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ) + }, + Example { + description: "Select a column and explode the values", + example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "hobbies".to_string(), + vec![ + Value::test_string("Cycling"), + Value::test_string("Knitting"), + Value::test_string("Skiing"), + Value::test_string("Football"), + ]), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + explode(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +pub(crate) fn explode( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => { + let lazy = df.lazy(); + explode_lazy(plugin, engine, call, lazy) + } + PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy), + PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr), + _ => Err(ShellError::CantConvert { + to_type: "dataframe or expression".into(), + from_type: value.get_type().to_string(), + span: call.head, + help: None, + }), + } +} + +pub(crate) fn explode_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let columns = call + .positional + .iter() + .map(|e| e.as_str().map(|s| s.to_string())) + .collect::, ShellError>>()?; + + let exploded = lazy + .to_polars() + .explode(columns.iter().map(AsRef::as_ref).collect::>()); + let lazy = NuLazyFrame::from(exploded); + + to_pipeline_data(plugin, engine, call.head, lazy) +} + +pub(crate) fn explode_expr( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, +) -> Result { + let expr: NuExpression = expr.to_polars().explode().into(); + to_pipeline_data(plugin, engine, call.head, expr) +} +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyExplode) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs new file mode 100644 index 0000000000..77795778ce --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs @@ -0,0 +1,100 @@ +use crate::dataframe::values::{Column, NuDataFrame}; +use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame}; +use crate::PolarsPlugin; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazyFetch; + +impl PluginCommand for LazyFetch { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars fetch" + } + + fn usage(&self) -> &str { + "Collects the lazyframe to the selected rows." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "rows", + SyntaxShape::Int, + "number of rows to be fetched from lazyframe", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Fetch a rows from the dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let rows: i64 = call.req(0)?; + let value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + + let mut eager: NuDataFrame = lazy + .to_polars() + .fetch(rows as usize) + .map_err(|e| ShellError::GenericError { + error: "Error fetching rows".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + // mark this as not from lazy so it doesn't get converted back to a lazy frame + eager.from_lazy = false; + to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyFetch) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs new file mode 100644 index 0000000000..8329a7b410 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs @@ -0,0 +1,189 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazyFillNA; + +impl PluginCommand for LazyFillNA { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars fill-nan" + } + + fn usage(&self) -> &str { + "Replaces NaN values with the given expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "fill", + SyntaxShape::Any, + "Expression to use to fill the NAN values", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Fills the NaN values with 0", + example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(0), + Value::test_int(3), + Value::test_int(0), + ], + )], + None, + ) + .expect("Df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Fills the NaN values of a whole dataframe", + example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_float(0.2), Value::test_float(0.1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(0)], + ), + ], + None, + ) + .expect("Df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let fill: Value = call.req(0)?; + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => { + cmd_df(plugin, engine, call, df, fill, value.span()) + } + PolarsPluginObject::NuLazyFrame(lazy) => cmd_df( + plugin, + engine, + call, + lazy.collect(value.span())?, + fill, + value.span(), + ), + PolarsPluginObject::NuExpression(expr) => { + Ok(cmd_expr(plugin, engine, call, expr, fill)?) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn cmd_df( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + frame: NuDataFrame, + fill: Value, + val_span: Span, +) -> Result { + let columns = frame.columns(val_span)?; + let dataframe = columns + .into_iter() + .map(|column| { + let column_name = column.name().to_string(); + let values = column + .into_iter() + .map(|value| { + let span = value.span(); + match value { + Value::Float { val, .. } => { + if val.is_nan() { + fill.clone() + } else { + value + } + } + Value::List { vals, .. } => { + NuDataFrame::fill_list_nan(vals, span, fill.clone()) + } + _ => value, + } + }) + .collect::>(); + Column::new(column_name, values) + }) + .collect::>(); + let df = NuDataFrame::try_from_columns(dataframe, None)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +fn cmd_expr( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, + fill: Value, +) -> Result { + let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars(); + let expr: NuExpression = expr.to_polars().fill_nan(fill).into(); + + to_pipeline_data(plugin, engine, call.head, expr) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyFillNA) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs new file mode 100644 index 0000000000..5c64dc620b --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs @@ -0,0 +1,127 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazyFillNull; + +impl PluginCommand for LazyFillNull { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars fill-null" + } + + fn usage(&self) -> &str { + "Replaces NULL values with the given expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "fill", + SyntaxShape::Any, + "Expression to use to fill the null values", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Fills the null values by 0", + example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(0), + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let fill: Value = call.req(0)?; + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill), + PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill), + PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill), + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn cmd_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + fill: Value, +) -> Result { + let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars(); + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr)); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +fn cmd_expr( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, + fill: Value, +) -> Result { + let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars(); + let expr: NuExpression = expr.to_polars().fill_null(fill).into(); + to_pipeline_data(plugin, engine, call.head, expr) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyFillNull) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs new file mode 100644 index 0000000000..481208d7f6 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs @@ -0,0 +1,104 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazyFilter; + +impl PluginCommand for LazyFilter { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars filter" + } + + fn usage(&self) -> &str { + "Filter dataframe based in expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "filter expression", + SyntaxShape::Any, + "Expression that define the column selection", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Filter dataframe using an expression", + example: + "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let expr_value: Value = call.req(0)?; + let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?; + let pipeline_value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; + command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + filter_expr: NuExpression, +) -> Result { + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars().filter(filter_expr.to_polars()), + ); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyFilter) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs b/crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs new file mode 100644 index 0000000000..93402bbe6a --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs @@ -0,0 +1,125 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; + +use crate::{ + dataframe::values::{Column, NuDataFrame}, + values::CustomValueSupport, + PolarsPlugin, +}; + +use super::explode::explode; + +#[derive(Clone)] +pub struct LazyFlatten; + +impl PluginCommand for LazyFlatten { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars flatten" + } + + fn usage(&self) -> &str { + "An alias for polars explode." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "columns", + SyntaxShape::String, + "columns to flatten, only applicable for dataframes", + ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ +Example { + description: "Flatten the specified dataframe", + example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "id".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ]), + Column::new( + "name".to_string(), + vec![ + Value::test_string("Mercy"), + Value::test_string("Mercy"), + Value::test_string("Bob"), + Value::test_string("Bob"), + ]), + Column::new( + "hobbies".to_string(), + vec![ + Value::test_string("Cycling"), + Value::test_string("Knitting"), + Value::test_string("Skiing"), + Value::test_string("Football"), + ]), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ) + }, + Example { + description: "Select a column and flatten the values", + example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "hobbies".to_string(), + vec![ + Value::test_string("Cycling"), + Value::test_string("Knitting"), + Value::test_string("Skiing"), + Value::test_string("Football"), + ]), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + explode(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&LazyFlatten) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs new file mode 100644 index 0000000000..765ffd003a --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs @@ -0,0 +1,168 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::Expr; + +#[derive(Clone)] +pub struct ToLazyGroupBy; + +impl PluginCommand for ToLazyGroupBy { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars group-by" + } + + fn usage(&self) -> &str { + "Creates a group-by object that can be used for other aggregations." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "Group-by expressions", + SyntaxShape::Any, + "Expression(s) that define the lazy group-by", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | polars into-df + | polars group-by a + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ]"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + | polars into-lazy + | polars group-by a + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ] + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(0)?; + let expr_value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, expr_value)?; + + if expressions + .iter() + .any(|expr| !matches!(expr, Expr::Column(..))) + { + let value: Value = call.req(0)?; + Err(ShellError::IncompatibleParametersSingle { + msg: "Expected only Col expressions".into(), + span: value.span(), + })?; + } + + let pipeline_value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; + command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + expressions: Vec, +) -> Result { + let group_by = lazy.to_polars().group_by(expressions); + let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?); + to_pipeline_data(plugin, engine, call.head, group_by) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToLazyGroupBy) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs new file mode 100644 index 0000000000..68784adc81 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs @@ -0,0 +1,260 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{Expr, JoinType}; + +#[derive(Clone)] +pub struct LazyJoin; + +impl PluginCommand for LazyJoin { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars join" + } + + fn usage(&self) -> &str { + "Joins a lazy frame with other lazy frame." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("other", SyntaxShape::Any, "LazyFrame to join with") + .required("left_on", SyntaxShape::Any, "Left column(s) to join on") + .required("right_on", SyntaxShape::Any, "Right column(s) to join on") + .switch( + "inner", + "inner joining between lazyframes (default)", + Some('i'), + ) + .switch("left", "left join between lazyframes", Some('l')) + .switch("outer", "outer join between lazyframes", Some('o')) + .switch("cross", "cross join between lazyframes", Some('c')) + .named( + "suffix", + SyntaxShape::String, + "Suffix to use on columns with same name", + Some('s'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Join two lazy dataframes", + example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); + $df_a | polars join $df_b a foo | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Join one eager dataframe with a lazy dataframe", + example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); + $df_a | polars join $df_b a foo"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let left = call.has_flag("left")?; + let outer = call.has_flag("outer")?; + let cross = call.has_flag("cross")?; + + let how = if left { + JoinType::Left + } else if outer { + JoinType::Outer { coalesce: true } + } else if cross { + JoinType::Cross + } else { + JoinType::Inner + }; + + let other: Value = call.req(0)?; + let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?; + let other = other.to_polars(); + + let left_on: Value = call.req(1)?; + let left_on = NuExpression::extract_exprs(plugin, left_on)?; + + let right_on: Value = call.req(2)?; + let right_on = NuExpression::extract_exprs(plugin, right_on)?; + + if left_on.len() != right_on.len() { + let right_on: Value = call.req(2)?; + Err(ShellError::IncompatibleParametersSingle { + msg: "The right column list has a different size to the left column list".into(), + span: right_on.span(), + })?; + } + + // Checking that both list of expressions are made out of col expressions or strings + for (index, list) in &[(1usize, &left_on), (2, &left_on)] { + if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) { + let value: Value = call.req(*index)?; + Err(ShellError::IncompatibleParametersSingle { + msg: "Expected only a string, col expressions or list of strings".into(), + span: value.span(), + })?; + } + } + + let suffix: Option = call.get_flag("suffix")?; + let suffix = suffix.unwrap_or_else(|| "_x".into()); + + let value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + let from_eager = lazy.from_eager; + let lazy = lazy.to_polars(); + + let lazy = lazy + .join_builder() + .with(other) + .left_on(left_on) + .right_on(right_on) + .how(how) + .force_parallel(true) + .suffix(suffix) + .finish(); + + let lazy = NuLazyFrame::new(from_eager, lazy); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyJoin) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs b/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs new file mode 100644 index 0000000000..38968c2bda --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs @@ -0,0 +1,225 @@ +/// Definition of multiple lazyframe commands using a macro rule +/// All of these commands have an identical body and only require +/// to have a change in the name, description and function +use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame}; +use crate::values::{to_pipeline_data, CustomValueSupport}; +use crate::PolarsPlugin; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value}; + +macro_rules! lazy_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .usage($desc) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head) + .map_err(LabeledError::from)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func()); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; + + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn signature(&self) -> Signature { + Signature::build($name) + .usage($desc) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + .plugin_examples($examples) + } + + fn run( + &self, + _plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head) + .map_err(LabeledError::from)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot)); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; + + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => { + #[derive(Clone)] + pub struct $command; + + impl PluginCommand for $command { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head) + .map_err(LabeledError::from)?; + + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars() + .$func() + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + help: None, + span: None, + inner: vec![], + }) + .map_err(LabeledError::from)?, + ); + + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } + } + + #[cfg(test)] + mod $test { + use super::*; + use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&$command) + } + } + }; +} + +// LazyReverse command +// Expands to a command definition for reverse +lazy_command!( + LazyReverse, + "polars reverse", + "Reverses the LazyFrame", + vec![Example { + description: "Reverses the dataframe.", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], + ), + ], + None + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + },], + reverse, + test_reverse +); + +// LazyCache command +// Expands to a command definition for cache +lazy_command!( + LazyCache, + "polars cache", + "Caches operations in a new LazyFrame.", + vec![Example { + description: "Caches the result into a new LazyFrame", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache", + result: None, + }], + cache, + test_cache +); diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs new file mode 100644 index 0000000000..da21a1d7eb --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs @@ -0,0 +1,143 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +#[derive(Clone)] +pub struct LazyMedian; + +impl PluginCommand for LazyMedian { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars median" + } + + fn usage(&self) -> &str { + "Median value from columns in a dataframe or creates expression for an aggregation" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Median aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars median)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Median value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)]), + Column::new("b".to_string(), vec![Value::test_float(2.0)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()), + PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy), + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr.to_polars().median().into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let polars_lazy = lazy + .to_polars() + .median() + .map_err(|e| ShellError::GenericError { + error: format!("Error in median operation: {e}"), + msg: "".into(), + help: None, + span: None, + inner: vec![], + })?; + let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy); + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyMedian) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs b/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs new file mode 100644 index 0000000000..e70143e6ce --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs @@ -0,0 +1,57 @@ +mod aggregate; +mod collect; +mod explode; +mod fetch; +mod fill_nan; +mod fill_null; +mod filter; +mod flatten; +pub mod groupby; +mod join; +mod macro_commands; +mod median; +mod quantile; +mod select; +mod sort_by_expr; +mod to_lazy; + +use nu_plugin::PluginCommand; + +pub use crate::dataframe::lazy::aggregate::LazyAggregate; +pub use crate::dataframe::lazy::collect::LazyCollect; +use crate::dataframe::lazy::fetch::LazyFetch; +use crate::dataframe::lazy::fill_nan::LazyFillNA; +pub use crate::dataframe::lazy::fill_null::LazyFillNull; +use crate::dataframe::lazy::filter::LazyFilter; +use crate::dataframe::lazy::groupby::ToLazyGroupBy; +use crate::dataframe::lazy::join::LazyJoin; +pub(crate) use crate::dataframe::lazy::macro_commands::*; +use crate::dataframe::lazy::quantile::LazyQuantile; +pub(crate) use crate::dataframe::lazy::select::LazySelect; +use crate::dataframe::lazy::sort_by_expr::LazySortBy; +pub use crate::dataframe::lazy::to_lazy::ToLazyFrame; +use crate::PolarsPlugin; +pub use explode::LazyExplode; +pub use flatten::LazyFlatten; + +pub(crate) fn lazy_commands() -> Vec>> { + vec![ + Box::new(LazyAggregate), + Box::new(LazyCache), + Box::new(LazyCollect), + Box::new(LazyExplode), + Box::new(LazyFetch), + Box::new(LazyFillNA), + Box::new(LazyFillNull), + Box::new(LazyFilter), + Box::new(LazyFlatten), + Box::new(LazyJoin), + Box::new(median::LazyMedian), + Box::new(LazyReverse), + Box::new(LazySelect), + Box::new(LazySortBy), + Box::new(LazyQuantile), + Box::new(ToLazyFrame), + Box::new(ToLazyGroupBy), + ] +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs new file mode 100644 index 0000000000..7dc6bb5c18 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs @@ -0,0 +1,160 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{lit, QuantileInterpolOptions}; + +#[derive(Clone)] +pub struct LazyQuantile; + +impl PluginCommand for LazyQuantile { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars quantile" + } + + fn usage(&self) -> &str { + "Aggregates the columns to the selected quantile." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "quantile", + SyntaxShape::Number, + "quantile value for quantile operation", + ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "quantile value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)]), + Column::new("b".to_string(), vec![Value::test_float(2.0)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Quantile aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | polars into-df + | polars group-by a + | polars agg (polars col b | polars quantile 0.5)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(4.0), Value::test_float(1.0)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + let quantile: f64 = call.req(0)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => { + command(plugin, engine, call, df.lazy(), quantile) + } + PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile), + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr + .to_polars() + .quantile(lit(quantile), QuantileInterpolOptions::default()) + .into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + quantile: f64, +) -> Result { + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars() + .quantile(lit(quantile), QuantileInterpolOptions::default()) + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + help: None, + span: None, + inner: vec![], + })?, + ); + + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazyQuantile) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs new file mode 100644 index 0000000000..4da89cf218 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs @@ -0,0 +1,85 @@ +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, +}; +#[derive(Clone)] +pub struct LazySelect; + +impl PluginCommand for LazySelect { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars select" + } + + fn usage(&self) -> &str { + "Selects columns from lazyframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "select expressions", + SyntaxShape::Any, + "Expression(s) that define the column selection", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Select a column from the dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(0)?; + let expr_value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, expr_value)?; + + let pipeline_value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions)); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use crate::test::test_polars_plugin_command; + + use super::*; + + #[test] + fn test_examples() -> Result<(), nu_protocol::ShellError> { + test_polars_plugin_command(&LazySelect) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs new file mode 100644 index 0000000000..5e4f3a5e88 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs @@ -0,0 +1,160 @@ +use super::super::values::NuLazyFrame; +use crate::{ + dataframe::values::{Column, NuDataFrame, NuExpression}, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct LazySortBy; + +impl PluginCommand for LazySortBy { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars sort-by" + } + + fn usage(&self) -> &str { + "Sorts a lazy dataframe based on expression(s)." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "sort expression", + SyntaxShape::Any, + "sort expression for the dataframe", + ) + .named( + "reverse", + SyntaxShape::List(Box::new(SyntaxShape::Boolean)), + "Reverse sorting. Default is false", + Some('r'), + ) + .switch( + "nulls-last", + "nulls are shown last in the dataframe", + Some('n'), + ) + .switch("maintain-order", "Maintains order during sort", Some('m')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Sort dataframe by one column", + example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)], + ), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Sort column using two columns", + example: + "[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(2), + Value::test_int(6), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_int(4), + Value::test_int(1), + Value::test_int(4), + Value::test_int(2), + ], + ), + ], None) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(0)?; + let expr_value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, expr_value)?; + let nulls_last = call.has_flag("nulls-last")?; + let maintain_order = call.has_flag("maintain-order")?; + + let reverse: Option> = call.get_flag("reverse")?; + let reverse = match reverse { + Some(list) => { + if expressions.len() != list.len() { + let span = call + .get_flag::("reverse")? + .expect("already checked and it exists") + .span(); + Err(ShellError::GenericError { + error: "Incorrect list size".into(), + msg: "Size doesn't match expression list".into(), + span: Some(span), + help: None, + inner: vec![], + })? + } else { + list + } + } + None => expressions.iter().map(|_| false).collect::>(), + }; + + let pipeline_value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.to_polars() + .sort_by_exprs(&expressions, reverse, nulls_last, maintain_order), + ); + to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&LazySortBy) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs b/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs new file mode 100644 index 0000000000..2991a0fb33 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs @@ -0,0 +1,61 @@ +use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin}; + +use super::super::values::{NuDataFrame, NuLazyFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type}; + +#[derive(Clone)] +pub struct ToLazyFrame; + +impl PluginCommand for ToLazyFrame { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars into-lazy" + } + + fn usage(&self) -> &str { + "Converts a dataframe into a lazy dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "schema", + SyntaxShape::Record(vec![]), + r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, + Some('s'), + ) + .input_output_type(Type::Any, Type::Custom("dataframe".into())) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Takes a dictionary and creates a lazy dataframe", + example: "[[a b];[1 2] [3 4]] | polars into-lazy", + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let maybe_schema = call + .get_flag("schema")? + .map(|schema| NuSchema::try_from(&schema)) + .transpose()?; + + let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?; + let lazy = NuLazyFrame::from_dataframe(df); + Ok(PipelineData::Value( + lazy.cache(plugin, engine)?.into_value(call.head), + None, + )) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/mod.rs new file mode 100644 index 0000000000..e41ef5bc1a --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/mod.rs @@ -0,0 +1,19 @@ +use nu_protocol::{ShellError, Span}; + +pub mod eager; +pub mod expressions; +pub mod lazy; +pub mod series; +pub mod stub; +mod utils; +pub mod values; + +pub fn missing_flag_error(flag: &str, span: Span) -> ShellError { + ShellError::GenericError { + error: format!("Missing flag: {flag}"), + msg: "".into(), + span: Some(span), + help: None, + inner: vec![], + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/all_false.rs b/crates/nu_plugin_polars/src/dataframe/series/all_false.rs new file mode 100644 index 0000000000..171aa20333 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/all_false.rs @@ -0,0 +1,119 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct AllFalse; + +impl PluginCommand for AllFalse { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars all-false" + } + + fn usage(&self) -> &str { + "Returns true if all values are false." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns true if all values are false", + example: "[false false false] | polars into-df | polars all-false", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![Value::test_bool(true)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Checks the result from a comparison", + example: r#"let s = ([5 6 2 10] | polars into-df); + let res = ($s > 9); + $res | polars all-false"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![Value::test_bool(false)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let series = df.as_series(call.head)?; + let bool = series.bool().map_err(|_| ShellError::GenericError { + error: "Error converting to bool".into(), + msg: "all-false only works with series of type bool".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let value = Value::bool(!bool.any(), call.head); + + let df = NuDataFrame::try_from_columns( + vec![Column::new("all_false".to_string(), vec![value])], + None, + )?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&AllFalse) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/all_true.rs b/crates/nu_plugin_polars/src/dataframe/series/all_true.rs new file mode 100644 index 0000000000..de69b29d72 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/all_true.rs @@ -0,0 +1,119 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct AllTrue; + +impl PluginCommand for AllTrue { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars all-true" + } + + fn usage(&self) -> &str { + "Returns true if all values are true." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns true if all values are true", + example: "[true true true] | polars into-df | polars all-true", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![Value::test_bool(true)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Checks the result from a comparison", + example: r#"let s = ([5 6 2 8] | polars into-df); + let res = ($s > 9); + $res | polars all-true"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![Value::test_bool(false)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let series = df.as_series(call.head)?; + let bool = series.bool().map_err(|_| ShellError::GenericError { + error: "Error converting to bool".into(), + msg: "all-false only works with series of type bool".into(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let value = Value::bool(bool.all(), call.head); + + let df = NuDataFrame::try_from_columns( + vec![Column::new("all_true".to_string(), vec![value])], + None, + )?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&AllTrue) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/arg_max.rs b/crates/nu_plugin_polars/src/dataframe/series/arg_max.rs new file mode 100644 index 0000000000..e62999066e --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/arg_max.rs @@ -0,0 +1,96 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked}; + +#[derive(Clone)] +pub struct ArgMax; + +impl PluginCommand for ArgMax { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-max" + } + + fn usage(&self) -> &str { + "Return index for max value in series." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["argmax", "maximum", "most", "largest", "greatest"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns index for max value", + example: "[1 3 2] | polars into-df | polars arg-max", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let res = series.arg_max(); + let chunked = match res { + Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]), + None => UInt32Chunked::from_slice("arg_max", &[]), + }; + + let res = chunked.into_series(); + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ArgMax) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/arg_min.rs b/crates/nu_plugin_polars/src/dataframe/series/arg_min.rs new file mode 100644 index 0000000000..80876e2736 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/arg_min.rs @@ -0,0 +1,96 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked}; + +#[derive(Clone)] +pub struct ArgMin; + +impl PluginCommand for ArgMin { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-min" + } + + fn usage(&self) -> &str { + "Return index for min value in series." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["argmin", "minimum", "least", "smallest", "lowest"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns index for min value", + example: "[1 3 2] | polars into-df | polars arg-min", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let res = series.arg_min(); + let chunked = match res { + Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]), + None => UInt32Chunked::from_slice("arg_min", &[]), + }; + + let res = chunked.into_series(); + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ArgMin) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/cumulative.rs b/crates/nu_plugin_polars/src/dataframe/series/cumulative.rs new file mode 100644 index 0000000000..420a98c9c5 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/cumulative.rs @@ -0,0 +1,159 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, +}; +use polars::prelude::{DataType, IntoSeries}; +use polars_ops::prelude::{cum_max, cum_min, cum_sum}; + +enum CumulativeType { + Min, + Max, + Sum, +} + +impl CumulativeType { + fn from_str(roll_type: &str, span: Span) -> Result { + match roll_type { + "min" => Ok(Self::Min), + "max" => Ok(Self::Max), + "sum" => Ok(Self::Sum), + _ => Err(ShellError::GenericError { + error: "Wrong operation".into(), + msg: "Operation not valid for cumulative".into(), + span: Some(span), + help: Some("Allowed values: max, min, sum".into()), + inner: vec![], + }), + } + } + + fn to_str(&self) -> &'static str { + match self { + CumulativeType::Min => "cumulative_min", + CumulativeType::Max => "cumulative_max", + CumulativeType::Sum => "cumulative_sum", + } + } +} + +#[derive(Clone)] +pub struct Cumulative; + +impl PluginCommand for Cumulative { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars cumulative" + } + + fn usage(&self) -> &str { + "Cumulative calculation for a series." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("type", SyntaxShape::String, "rolling operation") + .switch("reverse", "Reverse cumulative calculation", Some('r')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Cumulative sum for a series", + example: "[1 2 3 4 5] | polars into-df | polars cumulative sum", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0_cumulative_sum".to_string(), + vec![ + Value::test_int(1), + Value::test_int(3), + Value::test_int(6), + Value::test_int(10), + Value::test_int(15), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let cum_type: Spanned = call.req(0)?; + let reverse = call.has_flag("reverse")?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + if let DataType::Object(..) = series.dtype() { + return Err(ShellError::GenericError { + error: "Found object series".into(), + msg: "Series of type object cannot be used for cumulative operation".into(), + span: Some(call.head), + help: None, + inner: vec![], + }); + } + + let cum_type = CumulativeType::from_str(&cum_type.item, cum_type.span)?; + let mut res = match cum_type { + CumulativeType::Max => cum_max(&series, reverse), + CumulativeType::Min => cum_min(&series, reverse), + CumulativeType::Sum => cum_sum(&series, reverse), + } + .map_err(|e| ShellError::GenericError { + error: "Error creating cumulative".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let name = format!("{}_{}", series.name(), cum_type.to_str()); + res.rename(&name); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Cumulative) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/as_date.rs b/crates/nu_plugin_polars/src/dataframe/series/date/as_date.rs new file mode 100644 index 0000000000..8d613917b0 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/as_date.rs @@ -0,0 +1,101 @@ +use crate::{values::to_pipeline_data, PolarsPlugin}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, SyntaxShape, Type, +}; +use polars::prelude::{IntoSeries, StringMethods}; + +#[derive(Clone)] +pub struct AsDate; + +impl PluginCommand for AsDate { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars as-date" + } + + fn usage(&self) -> &str { + r#"Converts string to date."# + } + + fn extra_usage(&self) -> &str { + r#"Format example: + "%Y-%m-%d" => 2021-12-31 + "%d-%m-%Y" => 31-12-2021 + "%Y%m%d" => 2021319 (2021-03-19)"# + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("format", SyntaxShape::String, "formatting date string") + .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Converts string to date", + example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#, + result: None, + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let format: String = call.req(0)?; + let not_exact = call.has_flag("not-exact")?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + let casted = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = if not_exact { + casted.as_date_not_exact(Some(format.as_str())) + } else { + casted.as_date(Some(format.as_str()), false) + }; + + let mut res = res + .map_err(|e| ShellError::GenericError { + error: "Error creating datetime".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename("date"); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/as_datetime.rs b/crates/nu_plugin_polars/src/dataframe/series/date/as_datetime.rs new file mode 100644 index 0000000000..1f8d151819 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/as_datetime.rs @@ -0,0 +1,198 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use chrono::DateTime; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, StringMethods, TimeUnit}; + +#[derive(Clone)] +pub struct AsDateTime; + +impl PluginCommand for AsDateTime { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars as-datetime" + } + + fn usage(&self) -> &str { + r#"Converts string to datetime."# + } + + fn extra_usage(&self) -> &str { + r#"Format example: + "%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98 + "%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 + "%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01 + "%y%m%d %H:%M:%S" => 210319 23:58:50 + "%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98 + "%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 + "%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01 + "%Y%m%d %H:%M:%S" => 20210319 23:58:50 + "%FT%H:%M:%S" => 2019-04-18T02:45:55 + "%FT%H:%M:%S.%6f" => microseconds + "%FT%H:%M:%S.%9f" => nanoseconds"# + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("format", SyntaxShape::String, "formatting date time string") + .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Converts string to datetime", + example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Converts string to datetime with high resolutions", + example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00.123456789 +0000", + "%Y-%m-%d %H:%M:%S.%9f %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let format: String = call.req(0)?; + let not_exact = call.has_flag("not-exact")?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + let casted = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = if not_exact { + casted.as_datetime_not_exact( + Some(format.as_str()), + TimeUnit::Nanoseconds, + false, + None, + &Default::default(), + ) + } else { + casted.as_datetime( + Some(format.as_str()), + TimeUnit::Nanoseconds, + false, + false, + None, + &Default::default(), + ) + }; + + let mut res = res + .map_err(|e| ShellError::GenericError { + error: "Error creating datetime".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename("datetime"); + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&AsDateTime) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_day.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_day.rs new file mode 100644 index 0000000000..fb0d891640 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_day.rs @@ -0,0 +1,105 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetDay; + +impl PluginCommand for GetDay { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-day" + } + + fn usage(&self) -> &str { + "Gets day from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns day from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-day"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[4i8, 4]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } + + fn extra_usage(&self) -> &str { + "" + } + + fn search_terms(&self) -> Vec<&str> { + vec![] + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.day().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetDay) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_hour.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_hour.rs new file mode 100644 index 0000000000..2d658f22aa --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_hour.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetHour; + +impl PluginCommand for GetHour { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-hour" + } + + fn usage(&self) -> &str { + "Gets hour from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns hour from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-hour"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[16i8, 16]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.hour().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetHour) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_minute.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_minute.rs new file mode 100644 index 0000000000..4a195bba10 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_minute.rs @@ -0,0 +1,95 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; +use polars::{prelude::NamedFrom, series::Series}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::prelude::{DatetimeMethods, IntoSeries}; + +#[derive(Clone)] +pub struct GetMinute; + +impl PluginCommand for GetMinute { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-minute" + } + + fn usage(&self) -> &str { + "Gets minute from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns minute from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-minute"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[39i8, 39]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.minute().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetMinute) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_month.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_month.rs new file mode 100644 index 0000000000..b54fec47ec --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_month.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetMonth; + +impl PluginCommand for GetMonth { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-month" + } + + fn usage(&self) -> &str { + "Gets month from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns month from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-month"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[8i8, 8]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.month().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetMonth) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_nanosecond.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_nanosecond.rs new file mode 100644 index 0000000000..586bf3304f --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_nanosecond.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetNanosecond; + +impl PluginCommand for GetNanosecond { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-nanosecond" + } + + fn usage(&self) -> &str { + "Gets nanosecond from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns nanosecond from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-nanosecond"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[0i32, 0]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.nanosecond().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetNanosecond) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_ordinal.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_ordinal.rs new file mode 100644 index 0000000000..0261593cbd --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_ordinal.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetOrdinal; + +impl PluginCommand for GetOrdinal { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-ordinal" + } + + fn usage(&self) -> &str { + "Gets ordinal from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns ordinal from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-ordinal"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[217i16, 217]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.ordinal().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetOrdinal) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_second.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_second.rs new file mode 100644 index 0000000000..58e4665a44 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_second.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetSecond; + +impl PluginCommand for GetSecond { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-second" + } + + fn usage(&self) -> &str { + "Gets second from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns second from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-second"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[18i8, 18]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.second().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetSecond) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_week.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_week.rs new file mode 100644 index 0000000000..d19dc8c3f3 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_week.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetWeek; + +impl PluginCommand for GetWeek { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-week" + } + + fn usage(&self) -> &str { + "Gets week from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns week from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-week"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[32i8, 32]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.week().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetWeek) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_weekday.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_weekday.rs new file mode 100644 index 0000000000..ed66f3a357 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_weekday.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetWeekDay; + +impl PluginCommand for GetWeekDay { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-weekday" + } + + fn usage(&self) -> &str { + "Gets weekday from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns weekday from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-weekday"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[2i8, 2]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.weekday().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetWeekDay) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/get_year.rs b/crates/nu_plugin_polars/src/dataframe/series/date/get_year.rs new file mode 100644 index 0000000000..7cb548eac0 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/get_year.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::NuDataFrame; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, +}; +use polars::{ + prelude::{DatetimeMethods, IntoSeries, NamedFrom}, + series::Series, +}; + +#[derive(Clone)] +pub struct GetYear; + +impl PluginCommand for GetYear { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars get-year" + } + + fn usage(&self) -> &str { + "Gets year from date." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns year from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars get-year"#, + result: Some( + NuDataFrame::try_from_series(Series::new("0", &[2020i32, 2020]), Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to datetime type".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = casted.year().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&GetYear) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/date/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/date/mod.rs new file mode 100644 index 0000000000..ed3895a172 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/date/mod.rs @@ -0,0 +1,25 @@ +mod as_date; +mod as_datetime; +mod get_day; +mod get_hour; +mod get_minute; +mod get_month; +mod get_nanosecond; +mod get_ordinal; +mod get_second; +mod get_week; +mod get_weekday; +mod get_year; + +pub use as_date::AsDate; +pub use as_datetime::AsDateTime; +pub use get_day::GetDay; +pub use get_hour::GetHour; +pub use get_minute::GetMinute; +pub use get_month::GetMonth; +pub use get_nanosecond::GetNanosecond; +pub use get_ordinal::GetOrdinal; +pub use get_second::GetSecond; +pub use get_week::GetWeek; +pub use get_weekday::GetWeekDay; +pub use get_year::GetYear; diff --git a/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_sort.rs b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_sort.rs new file mode 100644 index 0000000000..c56deb33ed --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_sort.rs @@ -0,0 +1,140 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{IntoSeries, SortOptions}; + +#[derive(Clone)] +pub struct ArgSort; + +impl PluginCommand for ArgSort { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-sort" + } + + fn usage(&self) -> &str { + "Returns indexes for a sorted series." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["argsort", "order", "arrange"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .switch("reverse", "reverse order", Some('r')) + .switch("nulls-last", "nulls ordered last", Some('n')) + .switch( + "maintain-order", + "maintain order on sorted items", + Some('m'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns indexes for a sorted series", + example: "[1 2 2 3 3] | polars into-df | polars arg-sort", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_sort".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + Value::test_int(4), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Returns indexes for a sorted series", + example: "[1 2 2 3 3] | polars into-df | polars arg-sort --reverse", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_sort".to_string(), + vec![ + Value::test_int(3), + Value::test_int(4), + Value::test_int(1), + Value::test_int(2), + Value::test_int(0), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let sort_options = SortOptions { + descending: call.has_flag("reverse")?, + nulls_last: call.has_flag("nulls-last")?, + multithreaded: true, + maintain_order: call.has_flag("maintain-order")?, + }; + + let mut res = df + .as_series(call.head)? + .arg_sort(sort_options) + .into_series(); + res.rename("arg_sort"); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ArgSort) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_true.rs b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_true.rs new file mode 100644 index 0000000000..d004cb9f7f --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_true.rs @@ -0,0 +1,126 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{arg_where, col, IntoLazy}; + +#[derive(Clone)] +pub struct ArgTrue; + +impl PluginCommand for ArgTrue { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-true" + } + + fn usage(&self) -> &str { + "Returns indexes where values are true." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["argtrue", "truth", "boolean-true"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns indexes where values are true", + example: "[false true false] | polars into-df | polars arg-true", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_true".to_string(), + vec![Value::test_int(1)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let columns = df.as_ref().get_column_names(); + if columns.len() > 1 { + return Err(ShellError::GenericError { + error: "Error using as series".into(), + msg: "dataframe has more than one column".into(), + span: Some(call.head), + help: None, + inner: vec![], + }); + } + + match columns.first() { + Some(column) => { + let expression = arg_where(col(column).eq(true)).alias("arg_true"); + let res: NuDataFrame = df + .as_ref() + .clone() + .lazy() + .select(&[expression]) + .collect() + .map_err(|err| ShellError::GenericError { + error: "Error creating index column".into(), + msg: err.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into(); + + to_pipeline_data(plugin, engine, call.head, res) + } + _ => Err(ShellError::UnsupportedInput { + msg: "Expected the dataframe to have a column".to_string(), + input: "".to_string(), + msg_span: call.head, + input_span: call.head, + }), + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ArgTrue) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_unique.rs b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_unique.rs new file mode 100644 index 0000000000..92ad4a4952 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/indexes/arg_unique.rs @@ -0,0 +1,104 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct ArgUnique; + +impl PluginCommand for ArgUnique { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars arg-unique" + } + + fn usage(&self) -> &str { + "Returns indexes for unique values." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["argunique", "distinct", "noduplicate", "unrepeated"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns indexes for unique values", + example: "[1 2 2 3 3] | polars into-df | polars arg-unique", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "arg_unique".to_string(), + vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut res = df + .as_series(call.head)? + .arg_unique() + .map_err(|e| ShellError::GenericError { + error: "Error extracting unique values".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + res.rename("arg_unique"); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ArgUnique) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/indexes/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/indexes/mod.rs new file mode 100644 index 0000000000..c0af8c8653 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/indexes/mod.rs @@ -0,0 +1,9 @@ +mod arg_sort; +mod arg_true; +mod arg_unique; +mod set_with_idx; + +pub use arg_sort::ArgSort; +pub use arg_true::ArgTrue; +pub use arg_unique::ArgUnique; +pub use set_with_idx::SetWithIndex; diff --git a/crates/nu_plugin_polars/src/dataframe/series/indexes/set_with_idx.rs b/crates/nu_plugin_polars/src/dataframe/series/indexes/set_with_idx.rs new file mode 100644 index 0000000000..25dc952f23 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/indexes/set_with_idx.rs @@ -0,0 +1,227 @@ +use crate::{ + missing_flag_error, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{ChunkSet, DataType, IntoSeries}; + +#[derive(Clone)] +pub struct SetWithIndex; + +impl PluginCommand for SetWithIndex { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars set-with-idx" + } + + fn usage(&self) -> &str { + "Sets value in the given index." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("value", SyntaxShape::Any, "value to be inserted in series") + .required_named( + "indices", + SyntaxShape::Any, + "list of indices indicating where to set the value", + Some('i'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Set value in selected rows from series", + example: r#"let series = ([4 1 5 2 4 3] | polars into-df); + let indices = ([0 2] | polars into-df); + $series | polars set-with-idx 6 --indices $indices"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(6), + Value::test_int(1), + Value::test_int(6), + Value::test_int(2), + Value::test_int(4), + Value::test_int(3), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let value: Value = call.req(0)?; + + let indices_value: Value = call + .get_flag("indices")? + .ok_or_else(|| missing_flag_error("indices", call.head))?; + + let indices_span = indices_value.span(); + let indices = NuDataFrame::try_from_value_coerce(plugin, &indices_value, call.head)? + .as_series(indices_span)?; + + let casted = match indices.dtype() { + DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices + .as_ref() + .cast(&DataType::UInt32) + .map_err(|e| ShellError::GenericError { + error: "Error casting indices".into(), + msg: e.to_string(), + span: Some(indices_span), + help: None, + inner: vec![], + }), + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: "Series with incorrect type".into(), + span: Some(indices_span), + help: Some("Consider using a Series with type int type".into()), + inner: vec![], + }), + }?; + + let indices = casted + .u32() + .map_err(|e| ShellError::GenericError { + error: "Error casting indices".into(), + msg: e.to_string(), + span: Some(indices_span), + help: None, + inner: vec![], + })? + .into_iter() + .flatten(); + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let span = value.span(); + let res = match value { + Value::Int { val, .. } => { + let chunked = series.i64().map_err(|e| ShellError::GenericError { + error: "Error casting to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked.scatter_single(indices, Some(val)).map_err(|e| { + ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + } + })?; + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + Value::Float { val, .. } => { + let chunked = series.f64().map_err(|e| ShellError::GenericError { + error: "Error casting to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked.scatter_single(indices, Some(val)).map_err(|e| { + ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + } + })?; + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + Value::String { val, .. } => { + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked + .scatter_single(indices, Some(val.as_ref())) + .map_err(|e| ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let mut res = res.into_series(); + res.rename("string"); + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + _ => Err(ShellError::GenericError { + error: "Incorrect value type".into(), + msg: format!( + "this value cannot be set in a series of type '{}'", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + }?; + + to_pipeline_data(plugin, engine, call.head, res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&SetWithIndex) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_duplicated.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_duplicated.rs new file mode 100644 index 0000000000..56cd6e4d70 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_duplicated.rs @@ -0,0 +1,133 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct IsDuplicated; + +impl PluginCommand for IsDuplicated { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars is-duplicated" + } + + fn usage(&self) -> &str { + "Creates mask indicating duplicated values." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create mask indicating duplicated values", + example: "[5 6 6 6 8 8 8] | polars into-df | polars is-duplicated", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_duplicated".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Create mask indicating duplicated rows in a dataframe", + example: + "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-duplicated", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_duplicated".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut res = df + .as_ref() + .is_duplicated() + .map_err(|e| ShellError::GenericError { + error: "Error finding duplicates".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename("is_duplicated"); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&IsDuplicated) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs new file mode 100644 index 0000000000..626e3cb4fa --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs @@ -0,0 +1,130 @@ +use crate::{ + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame, NuExpression}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct IsNotNull; + +impl PluginCommand for IsNotNull { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars is-not-null" + } + + fn usage(&self) -> &str { + "Creates mask where value is not null." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create mask where values are not null", + example: r#"let s = ([5 6 0 8] | polars into-df); + let res = ($s / $s); + $res | polars is-not-null"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_not_null".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(true), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is not null expression from a column", + example: "polars col a | polars is-not-null", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => { + command(plugin, engine, call, lazy.collect(call.head)?) + } + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr.to_polars().is_not_null().into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let mut res = df.as_series(call.head)?.is_not_null(); + res.rename("is_not_null"); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&IsNotNull) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs new file mode 100644 index 0000000000..5e10d7056c --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs @@ -0,0 +1,130 @@ +use crate::{ + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct IsNull; + +impl PluginCommand for IsNull { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars is-null" + } + + fn usage(&self) -> &str { + "Creates mask where value is null." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create mask where values are null", + example: r#"let s = ([5 6 0 8] | polars into-df); + let res = ($s / $s); + $res | polars is-null"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_null".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is null expression from a column", + example: "polars col a | polars is-null", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => { + command(plugin, engine, call, lazy.collect(call.head)?) + } + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr.to_polars().is_null().into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let mut res = df.as_series(call.head)?.is_null(); + res.rename("is_null"); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&IsNull) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_unique.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_unique.rs new file mode 100644 index 0000000000..b2802e8b95 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_unique.rs @@ -0,0 +1,133 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct IsUnique; + +impl PluginCommand for IsUnique { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars is-unique" + } + + fn usage(&self) -> &str { + "Creates mask indicating unique values." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create mask indicating unique values", + example: "[5 6 6 6 8 8 8] | polars into-df | polars is-unique", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_unique".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Create mask indicating duplicated rows in a dataframe", + example: + "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-unique", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "is_unique".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let mut res = df + .as_ref() + .is_unique() + .map_err(|e| ShellError::GenericError { + error: "Error finding unique values".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename("is_unique"); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&IsUnique) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/mod.rs new file mode 100644 index 0000000000..985b14eaec --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/mod.rs @@ -0,0 +1,13 @@ +mod is_duplicated; +mod is_not_null; +mod is_null; +mod is_unique; +mod not; +mod set; + +pub use is_duplicated::IsDuplicated; +pub use is_not_null::IsNotNull; +pub use is_null::IsNull; +pub use is_unique::IsUnique; +pub use not::NotSeries; +pub use set::SetSeries; diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/not.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/not.rs new file mode 100644 index 0000000000..5d43714365 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/not.rs @@ -0,0 +1,103 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::IntoSeries; + +use std::ops::Not; + +#[derive(Clone)] +pub struct NotSeries; + +impl PluginCommand for NotSeries { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars not" + } + + fn usage(&self) -> &str { + "Inverts boolean mask." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Inverts boolean mask", + example: "[true false true] | polars into-df | polars not", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + command(plugin, engine, call, df).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let series = df.as_series(call.head)?; + + let bool = series.bool().map_err(|e| ShellError::GenericError { + error: "Error inverting mask".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = bool.not(); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&NotSeries) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/set.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/set.rs new file mode 100644 index 0000000000..3172af46c3 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/set.rs @@ -0,0 +1,210 @@ +use crate::{ + missing_flag_error, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{ChunkSet, DataType, IntoSeries}; + +#[derive(Clone)] +pub struct SetSeries; + +impl PluginCommand for SetSeries { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars set" + } + + fn usage(&self) -> &str { + "Sets value where given mask is true." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("value", SyntaxShape::Any, "value to be inserted in series") + .required_named( + "mask", + SyntaxShape::Any, + "mask indicating insertions", + Some('m'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Shifts the values by a given period", + example: r#"let s = ([1 2 2 3 3] | polars into-df | polars shift 2); + let mask = ($s | polars is-null); + $s | polars set 0 --mask $mask"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(0), + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let value: Value = call.req(0)?; + + let mask_value: Value = call + .get_flag("mask")? + .ok_or_else(|| missing_flag_error("mask", call.head))?; + + let mask_span = mask_value.span(); + let mask = + NuDataFrame::try_from_value_coerce(plugin, &mask_value, call.head)?.as_series(mask_span)?; + + let bool_mask = match mask.dtype() { + DataType::Boolean => mask.bool().map_err(|e| ShellError::GenericError { + error: "Error casting to bool".into(), + msg: e.to_string(), + span: Some(mask_span), + help: None, + inner: vec![], + }), + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: "can only use bool series as mask".into(), + span: Some(mask_span), + help: None, + inner: vec![], + }), + }?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + let span = value.span(); + let res = match value { + Value::Int { val, .. } => { + let chunked = series.i64().map_err(|e| ShellError::GenericError { + error: "Error casting to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked + .set(bool_mask, Some(val)) + .map_err(|e| ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + Value::Float { val, .. } => { + let chunked = series.f64().map_err(|e| ShellError::GenericError { + error: "Error casting to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked + .set(bool_mask, Some(val)) + .map_err(|e| ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + Value::String { val, .. } => { + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + let res = chunked.set(bool_mask, Some(val.as_ref())).map_err(|e| { + ShellError::GenericError { + error: "Error setting value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + } + })?; + + let mut res = res.into_series(); + res.rename("string"); + + NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head) + } + _ => Err(ShellError::GenericError { + error: "Incorrect value type".into(), + msg: format!( + "this value cannot be set in a series of type '{}'", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + }?; + + to_pipeline_data(plugin, engine, call.head, res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&SetSeries) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/mod.rs new file mode 100644 index 0000000000..94f28b0801 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/mod.rs @@ -0,0 +1,85 @@ +mod date; +pub use date::*; + +mod string; +pub use string::*; + +mod masks; +pub use masks::*; + +mod indexes; +pub use indexes::*; + +mod all_false; +mod all_true; +mod arg_max; +mod arg_min; +mod cumulative; +mod n_null; +mod n_unique; +mod rolling; +mod shift; +mod unique; +mod value_counts; + +pub use all_false::AllFalse; +use nu_plugin::PluginCommand; + +use crate::PolarsPlugin; +pub use all_true::AllTrue; +pub use arg_max::ArgMax; +pub use arg_min::ArgMin; +pub use cumulative::Cumulative; +pub use n_null::NNull; +pub use n_unique::NUnique; +pub use rolling::Rolling; +pub use shift::Shift; +pub use unique::Unique; +pub use value_counts::ValueCount; + +pub(crate) fn series_commands() -> Vec>> { + vec![ + Box::new(AllFalse), + Box::new(AllTrue), + Box::new(ArgMax), + Box::new(ArgMin), + Box::new(ArgSort), + Box::new(ArgTrue), + Box::new(ArgUnique), + Box::new(AsDate), + Box::new(AsDateTime), + Box::new(Concatenate), + Box::new(Contains), + Box::new(Cumulative), + Box::new(GetDay), + Box::new(GetHour), + Box::new(GetMinute), + Box::new(GetMonth), + Box::new(GetNanosecond), + Box::new(GetOrdinal), + Box::new(GetSecond), + Box::new(GetWeek), + Box::new(GetWeekDay), + Box::new(GetYear), + Box::new(IsDuplicated), + Box::new(IsNotNull), + Box::new(IsNull), + Box::new(IsUnique), + Box::new(NNull), + Box::new(NUnique), + Box::new(NotSeries), + Box::new(Replace), + Box::new(ReplaceAll), + Box::new(Rolling), + Box::new(SetSeries), + Box::new(SetWithIndex), + Box::new(Shift), + Box::new(StrLengths), + Box::new(StrSlice), + Box::new(StrFTime), + Box::new(ToLowerCase), + Box::new(ToUpperCase), + Box::new(Unique), + Box::new(ValueCount), + ] +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/n_null.rs b/crates/nu_plugin_polars/src/dataframe/series/n_null.rs new file mode 100644 index 0000000000..fc934b56fb --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/n_null.rs @@ -0,0 +1,93 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct NNull; + +impl PluginCommand for NNull { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars count-null" + } + + fn usage(&self) -> &str { + "Counts null values." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Counts null values", + example: r#"let s = ([1 1 0 0 3 3 4] | polars into-df); + ($s / $s) | polars count-null"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "count_null".to_string(), + vec![Value::test_int(2)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let res = df.as_series(call.head)?.null_count(); + let value = Value::int(res as i64, call.head); + + let df = NuDataFrame::try_from_columns( + vec![Column::new("count_null".to_string(), vec![value])], + None, + )?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&NNull) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs b/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs new file mode 100644 index 0000000000..89cde4cc36 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs @@ -0,0 +1,135 @@ +use crate::{ + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame, NuExpression}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +#[derive(Clone)] +pub struct NUnique; + +impl PluginCommand for NUnique { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars n-unique" + } + + fn usage(&self) -> &str { + "Counts unique values." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Counts unique values", + example: "[1 1 2 2 3 3 4] | polars into-df | polars n-unique", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "count_unique".to_string(), + vec![Value::test_int(4)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is n-unique expression from a column", + example: "polars col a | polars n-unique", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => { + command(plugin, engine, call, lazy.collect(call.head)?) + } + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr.to_polars().n_unique().into(); + to_pipeline_data(plugin, engine, call.head, expr) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let res = df + .as_series(call.head)? + .n_unique() + .map_err(|e| ShellError::GenericError { + error: "Error counting unique values".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let value = Value::int(res as i64, call.head); + + let df = NuDataFrame::try_from_columns( + vec![Column::new("count_unique".to_string(), vec![value])], + None, + )?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&NUnique) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/rolling.rs b/crates/nu_plugin_polars/src/dataframe/series/rolling.rs new file mode 100644 index 0000000000..a64e6f1e70 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/rolling.rs @@ -0,0 +1,196 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, +}; +use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime}; + +enum RollType { + Min, + Max, + Sum, + Mean, +} + +impl RollType { + fn from_str(roll_type: &str, span: Span) -> Result { + match roll_type { + "min" => Ok(Self::Min), + "max" => Ok(Self::Max), + "sum" => Ok(Self::Sum), + "mean" => Ok(Self::Mean), + _ => Err(ShellError::GenericError { + error: "Wrong operation".into(), + msg: "Operation not valid for cumulative".into(), + span: Some(span), + help: Some("Allowed values: min, max, sum, mean".into()), + inner: vec![], + }), + } + } + + fn to_str(&self) -> &'static str { + match self { + RollType::Min => "rolling_min", + RollType::Max => "rolling_max", + RollType::Sum => "rolling_sum", + RollType::Mean => "rolling_mean", + } + } +} + +#[derive(Clone)] +pub struct Rolling; + +impl PluginCommand for Rolling { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars rolling" + } + + fn usage(&self) -> &str { + "Rolling calculation for a series." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("type", SyntaxShape::String, "rolling operation") + .required("window", SyntaxShape::Int, "Window size for rolling") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Rolling sum for a series", + example: "[1 2 3 4 5] | polars into-df | polars rolling sum 2 | polars drop-nulls", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0_rolling_sum".to_string(), + vec![ + Value::test_int(3), + Value::test_int(5), + Value::test_int(7), + Value::test_int(9), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Rolling max for a series", + example: "[1 2 3 4 5] | polars into-df | polars rolling max 2 | polars drop-nulls", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0_rolling_max".to_string(), + vec![ + Value::test_int(2), + Value::test_int(3), + Value::test_int(4), + Value::test_int(5), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let roll_type: Spanned = call.req(0)?; + let window_size: i64 = call.req(1)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + if let DataType::Object(..) = series.dtype() { + return Err(ShellError::GenericError { + error: "Found object series".into(), + msg: "Series of type object cannot be used for rolling operation".into(), + span: Some(call.head), + help: None, + inner: vec![], + }); + } + + let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?; + + let rolling_opts = RollingOptionsImpl { + window_size: Duration::new(window_size), + min_periods: window_size as usize, + weights: None, + center: false, + by: None, + closed_window: None, + tu: None, + tz: None, + fn_params: None, + }; + let res = match roll_type { + RollType::Max => series.rolling_max(rolling_opts), + RollType::Min => series.rolling_min(rolling_opts), + RollType::Sum => series.rolling_sum(rolling_opts), + RollType::Mean => series.rolling_mean(rolling_opts), + }; + + let mut res = res.map_err(|e| ShellError::GenericError { + error: "Error calculating rolling values".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let name = format!("{}_{}", series.name(), roll_type.to_str()); + res.rename(&name); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Rolling) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/shift.rs b/crates/nu_plugin_polars/src/dataframe/series/shift.rs new file mode 100644 index 0000000000..ee35072f84 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/shift.rs @@ -0,0 +1,136 @@ +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +use polars_plan::prelude::lit; + +#[derive(Clone)] +pub struct Shift; + +impl PluginCommand for Shift { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars shift" + } + + fn usage(&self) -> &str { + "Shifts the values by a given period." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("period", SyntaxShape::Int, "shift period") + .named( + "fill", + SyntaxShape::Any, + "Expression used to fill the null values (lazy df)", + Some('f'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Shifts the values by a given period", + example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars drop-nulls", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyGroupBy, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let period: i64 = call.req(0)?; + let series = df.as_series(call.head)?.shift(period); + + let df = NuDataFrame::try_from_series_vec(vec![series], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let shift: i64 = call.req(0)?; + let fill: Option = call.get_flag("fill")?; + + let lazy = lazy.to_polars(); + + let lazy: NuLazyFrame = match fill { + Some(ref fill) => { + let expr = NuExpression::try_from_value(plugin, fill)?.to_polars(); + lazy.shift_and_fill(lit(shift), expr).into() + } + None => lazy.shift(shift).into(), + }; + + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Shift) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/concatenate.rs b/crates/nu_plugin_polars/src/dataframe/series/string/concatenate.rs new file mode 100644 index 0000000000..25ecbf01b5 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/concatenate.rs @@ -0,0 +1,124 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct Concatenate; + +impl PluginCommand for Concatenate { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars concatenate" + } + + fn usage(&self) -> &str { + "Concatenates strings with other array." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "other", + SyntaxShape::Any, + "Other array with string to be concatenated", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Concatenate string", + example: r#"let other = ([za xs cd] | polars into-df); + [abc abc abc] | polars into-df | polars concatenate $other"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("abcza"), + Value::test_string("abcxs"), + Value::test_string("abccd"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + + let other: Value = call.req(0)?; + let other_span = other.span(); + let other_df = NuDataFrame::try_from_value_coerce(plugin, &other, other_span)?; + + let other_series = other_df.as_series(other_span)?; + let other_chunked = other_series.str().map_err(|e| ShellError::GenericError { + error: "The concatenate only with string columns".into(), + msg: e.to_string(), + span: Some(other_span), + help: None, + inner: vec![], + })?; + + let series = df.as_series(call.head)?; + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "The concatenate only with string columns".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let mut res = chunked.concat(other_chunked); + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Concatenate) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/contains.rs b/crates/nu_plugin_polars/src/dataframe/series/string/contains.rs new file mode 100644 index 0000000000..701da87bfc --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/contains.rs @@ -0,0 +1,117 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct Contains; + +impl PluginCommand for Contains { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars contains" + } + + fn usage(&self) -> &str { + "Checks if a pattern is contained in a string." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "pattern", + SyntaxShape::String, + "Regex pattern to be searched", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns boolean indicating if pattern was found", + example: "[abc acb acb] | polars into-df | polars contains ab", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let pattern: String = call.req(0)?; + + let series = df.as_series(call.head)?; + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "The contains command only with string columns".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let res = chunked + .contains(&pattern, false) + .map_err(|e| ShellError::GenericError { + error: "Error searching in series".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Contains) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/string/mod.rs new file mode 100644 index 0000000000..f2fa19cbaf --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/mod.rs @@ -0,0 +1,19 @@ +mod concatenate; +mod contains; +mod replace; +mod replace_all; +mod str_lengths; +mod str_slice; +mod strftime; +mod to_lowercase; +mod to_uppercase; + +pub use concatenate::Concatenate; +pub use contains::Contains; +pub use replace::Replace; +pub use replace_all::ReplaceAll; +pub use str_lengths::StrLengths; +pub use str_slice::StrSlice; +pub use strftime::StrFTime; +pub use to_lowercase::ToLowerCase; +pub use to_uppercase::ToUpperCase; diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/replace.rs b/crates/nu_plugin_polars/src/dataframe/series/string/replace.rs new file mode 100644 index 0000000000..9a9e02e62c --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/replace.rs @@ -0,0 +1,132 @@ +use crate::{ + missing_flag_error, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct Replace; + +impl PluginCommand for Replace { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars replace" + } + + fn usage(&self) -> &str { + "Replace the leftmost (sub)string by a regex pattern." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required_named( + "pattern", + SyntaxShape::String, + "Regex pattern to be matched", + Some('p'), + ) + .required_named( + "replace", + SyntaxShape::String, + "replacing string", + Some('r'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Replaces string", + example: "[abc abc abc] | polars into-df | polars replace --pattern ab --replace AB", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("ABc"), + Value::test_string("ABc"), + Value::test_string("ABc"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let pattern: String = call + .get_flag("pattern")? + .ok_or_else(|| missing_flag_error("pattern", call.head))?; + let replace: String = call + .get_flag("replace")? + .ok_or_else(|| missing_flag_error("replace", call.head))?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error conversion to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let mut res = chunked + .replace(&pattern, &replace) + .map_err(|e| ShellError::GenericError { + error: "Error finding pattern other".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Replace) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/replace_all.rs b/crates/nu_plugin_polars/src/dataframe/series/string/replace_all.rs new file mode 100644 index 0000000000..5011d936f2 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/replace_all.rs @@ -0,0 +1,134 @@ +use crate::{ + missing_flag_error, + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct ReplaceAll; + +impl PluginCommand for ReplaceAll { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars replace-all" + } + + fn usage(&self) -> &str { + "Replace all (sub)strings by a regex pattern." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required_named( + "pattern", + SyntaxShape::String, + "Regex pattern to be matched", + Some('p'), + ) + .required_named( + "replace", + SyntaxShape::String, + "replacing string", + Some('r'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Replaces string", + example: + "[abac abac abac] | polars into-df | polars replace-all --pattern a --replace A", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("AbAc"), + Value::test_string("AbAc"), + Value::test_string("AbAc"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine_state: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let pattern: String = call + .get_flag("pattern")? + .ok_or_else(|| missing_flag_error("pattern", call.head))?; + let replace: String = call + .get_flag("replace")? + .ok_or_else(|| missing_flag_error("replace", call.head))?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error conversion to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + let mut res = + chunked + .replace_all(&pattern, &replace) + .map_err(|e| ShellError::GenericError { + error: "Error finding pattern other".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })?; + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine_state, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ReplaceAll) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs b/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs new file mode 100644 index 0000000000..28babbb14b --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs @@ -0,0 +1,98 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct StrLengths; + +impl PluginCommand for StrLengths { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars str-lengths" + } + + fn usage(&self) -> &str { + "Get lengths of all strings." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns string lengths", + example: "[a ab abc] | polars into-df | polars str-lengths", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-lengths command can only be used with string columns".into()), + inner: vec![], + })?; + + let res = chunked.as_ref().str_len_bytes().into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&StrLengths) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/str_slice.rs b/crates/nu_plugin_polars/src/dataframe/series/string/str_slice.rs new file mode 100644 index 0000000000..51313935d7 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/str_slice.rs @@ -0,0 +1,147 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::{ + prelude::{IntoSeries, NamedFrom, StringNameSpaceImpl}, + series::Series, +}; + +#[derive(Clone)] +pub struct StrSlice; + +impl PluginCommand for StrSlice { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars str-slice" + } + + fn usage(&self) -> &str { + "Slices the string from the start position until the selected length." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("start", SyntaxShape::Int, "start of slice") + .named("length", SyntaxShape::Int, "optional length", Some('l')) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Creates slices from the strings", + example: "[abcded abc321 abc123] | polars into-df | polars str-slice 1 --length 2", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("bc"), + Value::test_string("bc"), + Value::test_string("bc"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates slices from the strings without length", + example: "[abcded abc321 abc123] | polars into-df | polars str-slice 1", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("bcded"), + Value::test_string("bc321"), + Value::test_string("bc123"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let start: i64 = call.req(0)?; + let start = Series::new("", &[start]); + + let length: Option = call.get_flag("length")?; + let length = match length { + Some(v) => Series::new("", &[v as u64]), + None => Series::new_null("", 1), + }; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let chunked = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let res = chunked + .str_slice(&start, &length) + .map_err(|e| ShellError::GenericError { + error: "Dataframe Error".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .with_name(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&StrSlice) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/strftime.rs b/crates/nu_plugin_polars/src/dataframe/series/string/strftime.rs new file mode 100644 index 0000000000..0171be8544 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/strftime.rs @@ -0,0 +1,116 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::IntoSeries; + +#[derive(Clone)] +pub struct StrFTime; + +impl PluginCommand for StrFTime { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars strftime" + } + + fn usage(&self) -> &str { + "Formats date based on string rule." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("fmt", SyntaxShape::String, "Format rule") + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Formats date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); + let df = ([$dt $dt] | polars into-df); + $df | polars strftime "%Y/%m/%d""#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("2020/08/04"), + Value::test_string("2020/08/04"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let fmt: String = call.req(0)?; + + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting to date".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let res = casted + .strftime(&fmt) + .map_err(|e| ShellError::GenericError { + error: "Error formatting datetime".into(), + msg: e.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + .into_series(); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&StrFTime) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/to_lowercase.rs b/crates/nu_plugin_polars/src/dataframe/series/string/to_lowercase.rs new file mode 100644 index 0000000000..7ff59ab2e8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/to_lowercase.rs @@ -0,0 +1,103 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct ToLowerCase; + +impl PluginCommand for ToLowerCase { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars lowercase" + } + + fn usage(&self) -> &str { + "Lowercase the strings in the column." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to lowercase", + example: "[Abc aBc abC] | polars into-df | polars lowercase", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("abc"), + Value::test_string("abc"), + Value::test_string("abc"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let mut res = casted.to_lowercase(); + res.rename(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToLowerCase) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/to_uppercase.rs b/crates/nu_plugin_polars/src/dataframe/series/string/to_uppercase.rs new file mode 100644 index 0000000000..c6913d8427 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/to_uppercase.rs @@ -0,0 +1,107 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::{IntoSeries, StringNameSpaceImpl}; + +#[derive(Clone)] +pub struct ToUpperCase; + +impl PluginCommand for ToUpperCase { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars uppercase" + } + + fn usage(&self) -> &str { + "Uppercase the strings in the column." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["capitalize, caps, capital"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to uppercase", + example: "[Abc aBc abC] | polars into-df | polars uppercase", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + Value::test_string("ABC"), + Value::test_string("ABC"), + Value::test_string("ABC"), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let casted = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting to string".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let mut res = casted.to_uppercase(); + res.rename(series.name()); + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToUpperCase) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/unique.rs b/crates/nu_plugin_polars/src/dataframe/series/unique.rs new file mode 100644 index 0000000000..c95648159d --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/unique.rs @@ -0,0 +1,163 @@ +use crate::{ + dataframe::{utils::extract_strings, values::NuLazyFrame}, + values::{ + cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject, + PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::{IntoSeries, UniqueKeepStrategy}; + +#[derive(Clone)] +pub struct Unique; + +impl PluginCommand for Unique { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars unique" + } + + fn usage(&self) -> &str { + "Returns unique values from a dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .named( + "subset", + SyntaxShape::Any, + "Subset of column(s) to use to maintain rows (lazy df)", + Some('s'), + ) + .switch( + "last", + "Keeps last unique value. Default keeps first value (lazy df)", + Some('l'), + ) + .switch( + "maintain-order", + "Keep the same order as the original DataFrame (lazy df)", + Some('k'), + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns unique values from a series", + example: "[2 2 2 2 2] | polars into-df | polars unique", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new("0".to_string(), vec![Value::test_int(2)])], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is unique expression from a column", + example: "col a | unique", + result: None, + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyGroupBy, + ], + )), + } + .map_err(LabeledError::from) + } +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, +) -> Result { + let series = df.as_series(call.head)?; + + let res = series.unique().map_err(|e| ShellError::GenericError { + error: "Error calculating unique values".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; + to_pipeline_data(plugin, engine, call.head, df) +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let last = call.has_flag("last")?; + let maintain = call.has_flag("maintain-order")?; + + let subset: Option = call.get_flag("subset")?; + let subset = match subset { + Some(value) => Some(extract_strings(value)?), + None => None, + }; + + let strategy = if last { + UniqueKeepStrategy::Last + } else { + UniqueKeepStrategy::First + }; + + let lazy = lazy.to_polars(); + let lazy: NuLazyFrame = if maintain { + lazy.unique(subset, strategy).into() + } else { + lazy.unique_stable(subset, strategy).into() + }; + to_pipeline_data(plugin, engine, call.head, lazy) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Unique) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/value_counts.rs b/crates/nu_plugin_polars/src/dataframe/series/value_counts.rs new file mode 100644 index 0000000000..3fc7978f49 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/value_counts.rs @@ -0,0 +1,105 @@ +use crate::{ + values::{to_pipeline_data, CustomValueSupport}, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; + +use polars::prelude::SeriesMethods; + +#[derive(Clone)] +pub struct ValueCount; + +impl PluginCommand for ValueCount { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars value-counts" + } + + fn usage(&self) -> &str { + "Returns a dataframe with the counts for unique values in series." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Calculates value counts", + example: "[5 5 5 5 6 6] | polars into-df | polars value-counts", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![Value::test_int(5), Value::test_int(6)], + ), + Column::new( + "count".to_string(), + vec![Value::test_int(4), Value::test_int(2)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let series = df.as_series(call.head)?; + + let res = series + .value_counts(false, false) + .map_err(|e| ShellError::GenericError { + error: "Error calculating value counts values".into(), + msg: e.to_string(), + span: Some(call.head), + help: Some("The str-slice command can only be used with string columns".into()), + inner: vec![], + })?; + + let df: NuDataFrame = res.into(); + to_pipeline_data(plugin, engine, call.head, df) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ValueCount) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/stub.rs b/crates/nu_plugin_polars/src/dataframe/stub.rs new file mode 100644 index 0000000000..7b50d23e4e --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/stub.rs @@ -0,0 +1,51 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{Category, LabeledError, PipelineData, Signature, Type, Value}; + +use crate::PolarsPlugin; + +#[derive(Clone)] +pub struct PolarsCmd; + +impl PluginCommand for PolarsCmd { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars" + } + + fn usage(&self) -> &str { + "Operate with data in a dataframe format." + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("polars") + .category(Category::Custom("dataframe".into())) + .input_output_types(vec![(Type::Nothing, Type::String)]) + } + + fn extra_usage(&self) -> &str { + "You must use one of the following subcommands. Using this command as-is will only produce this help message." + } + + fn run( + &self, + _plugin: &Self::Plugin, + _engine: &EngineInterface, + call: &EvaluatedCall, + _input: PipelineData, + ) -> Result { + // todo - find a replacmeent for get_full_help + // Ok(Value::string( + // get_full_help( + // &PolarsCmd.signature(), + // &PolarsCmd.examples(), + // engine_state, + // stack, + // self.is_parser_keyword(), + // ), + // call.head, + // ) + // .into_pipeline_data()) + Ok(PipelineData::Value(Value::nothing(call.head), None)) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/utils.rs b/crates/nu_plugin_polars/src/dataframe/utils.rs new file mode 100644 index 0000000000..db99d550a9 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/utils.rs @@ -0,0 +1,16 @@ +use nu_protocol::{FromValue, ShellError, Value}; + +pub fn extract_strings(value: Value) -> Result, ShellError> { + let span = value.span(); + match ( + ::from_value(value.clone()), + as FromValue>::from_value(value), + ) { + (Ok(col), Err(_)) => Ok(vec![col]), + (Err(_), Ok(cols)) => Ok(cols), + _ => Err(ShellError::IncompatibleParametersSingle { + msg: "Expected a string or list of strings".into(), + span, + }), + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/mod.rs new file mode 100644 index 0000000000..6f984c6bf8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/mod.rs @@ -0,0 +1,345 @@ +mod nu_dataframe; +mod nu_expression; +mod nu_lazyframe; +mod nu_lazygroupby; +mod nu_schema; +mod nu_when; +pub mod utils; + +use std::{cmp::Ordering, fmt}; + +pub use nu_dataframe::{Axis, Column, NuDataFrame, NuDataFrameCustomValue}; +pub use nu_expression::{NuExpression, NuExpressionCustomValue}; +pub use nu_lazyframe::{NuLazyFrame, NuLazyFrameCustomValue}; +pub use nu_lazygroupby::{NuLazyGroupBy, NuLazyGroupByCustomValue}; +use nu_plugin::EngineInterface; +use nu_protocol::{ast::Operator, CustomValue, PipelineData, ShellError, Span, Spanned, Value}; +pub use nu_schema::{str_to_dtype, NuSchema}; +pub use nu_when::{NuWhen, NuWhenCustomValue, NuWhenType}; +use uuid::Uuid; + +use crate::{Cacheable, PolarsPlugin}; + +#[derive(Debug, Clone)] +pub enum PolarsPluginType { + NuDataFrame, + NuLazyFrame, + NuExpression, + NuLazyGroupBy, + NuWhen, +} + +impl fmt::Display for PolarsPluginType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NuDataFrame => write!(f, "NuDataFrame"), + Self::NuLazyFrame => write!(f, "NuLazyFrame"), + Self::NuExpression => write!(f, "NuExpression"), + Self::NuLazyGroupBy => write!(f, "NuLazyGroupBy"), + Self::NuWhen => write!(f, "NuWhen"), + } + } +} + +#[derive(Debug, Clone)] +pub enum PolarsPluginObject { + NuDataFrame(NuDataFrame), + NuLazyFrame(NuLazyFrame), + NuExpression(NuExpression), + NuLazyGroupBy(NuLazyGroupBy), + NuWhen(NuWhen), +} + +impl PolarsPluginObject { + pub fn try_from_value( + plugin: &PolarsPlugin, + value: &Value, + ) -> Result { + if NuDataFrame::can_downcast(value) { + NuDataFrame::try_from_value(plugin, value).map(PolarsPluginObject::NuDataFrame) + } else if NuLazyFrame::can_downcast(value) { + NuLazyFrame::try_from_value(plugin, value).map(PolarsPluginObject::NuLazyFrame) + } else if NuExpression::can_downcast(value) { + NuExpression::try_from_value(plugin, value).map(PolarsPluginObject::NuExpression) + } else if NuLazyGroupBy::can_downcast(value) { + NuLazyGroupBy::try_from_value(plugin, value).map(PolarsPluginObject::NuLazyGroupBy) + } else if NuWhen::can_downcast(value) { + NuWhen::try_from_value(plugin, value).map(PolarsPluginObject::NuWhen) + } else { + Err(cant_convert_err( + value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + PolarsPluginType::NuLazyGroupBy, + PolarsPluginType::NuWhen, + ], + )) + } + } + + pub fn try_from_pipeline( + plugin: &PolarsPlugin, + input: PipelineData, + span: Span, + ) -> Result { + let value = input.into_value(span); + Self::try_from_value(plugin, &value) + } + + pub fn get_type(&self) -> PolarsPluginType { + match self { + Self::NuDataFrame(_) => PolarsPluginType::NuDataFrame, + Self::NuLazyFrame(_) => PolarsPluginType::NuLazyFrame, + Self::NuExpression(_) => PolarsPluginType::NuExpression, + Self::NuLazyGroupBy(_) => PolarsPluginType::NuLazyGroupBy, + Self::NuWhen(_) => PolarsPluginType::NuWhen, + } + } + + pub fn id(&self) -> Uuid { + match self { + PolarsPluginObject::NuDataFrame(df) => df.id, + PolarsPluginObject::NuLazyFrame(lf) => lf.id, + PolarsPluginObject::NuExpression(e) => e.id, + PolarsPluginObject::NuLazyGroupBy(lg) => lg.id, + PolarsPluginObject::NuWhen(w) => w.id, + } + } +} + +#[derive(Debug, Clone)] +pub enum CustomValueType { + NuDataFrame(NuDataFrameCustomValue), + NuLazyFrame(NuLazyFrameCustomValue), + NuExpression(NuExpressionCustomValue), + NuLazyGroupBy(NuLazyGroupByCustomValue), + NuWhen(NuWhenCustomValue), +} + +impl CustomValueType { + pub fn id(&self) -> Uuid { + match self { + CustomValueType::NuDataFrame(df_cv) => df_cv.id, + CustomValueType::NuLazyFrame(lf_cv) => lf_cv.id, + CustomValueType::NuExpression(e_cv) => e_cv.id, + CustomValueType::NuLazyGroupBy(lg_cv) => lg_cv.id, + CustomValueType::NuWhen(w_cv) => w_cv.id, + } + } + + pub fn try_from_custom_value(val: Box) -> Result { + if let Some(df_cv) = val.as_any().downcast_ref::() { + Ok(CustomValueType::NuDataFrame(df_cv.clone())) + } else if let Some(lf_cv) = val.as_any().downcast_ref::() { + Ok(CustomValueType::NuLazyFrame(lf_cv.clone())) + } else if let Some(e_cv) = val.as_any().downcast_ref::() { + Ok(CustomValueType::NuExpression(e_cv.clone())) + } else if let Some(lg_cv) = val.as_any().downcast_ref::() { + Ok(CustomValueType::NuLazyGroupBy(lg_cv.clone())) + } else if let Some(w_cv) = val.as_any().downcast_ref::() { + Ok(CustomValueType::NuWhen(w_cv.clone())) + } else { + Err(ShellError::CantConvert { + to_type: "physical type".into(), + from_type: "value".into(), + span: Span::unknown(), + help: None, + }) + } + } +} + +pub fn cant_convert_err(value: &Value, types: &[PolarsPluginType]) -> ShellError { + let type_string = types + .iter() + .map(ToString::to_string) + .collect::>() + .join(", "); + + ShellError::CantConvert { + to_type: type_string, + from_type: value.get_type().to_string(), + span: value.span(), + help: None, + } +} + +pub trait PolarsPluginCustomValue: CustomValue { + type PolarsPluginObjectType: Clone; + + fn id(&self) -> &Uuid; + + fn internal(&self) -> &Option; + + fn custom_value_to_base_value( + &self, + plugin: &PolarsPlugin, + engine: &EngineInterface, + ) -> Result; + + fn custom_value_operation( + &self, + _plugin: &PolarsPlugin, + _engine: &EngineInterface, + _lhs_span: Span, + operator: Spanned, + _right: Value, + ) -> Result { + Err(ShellError::UnsupportedOperator { + operator: operator.item, + span: operator.span, + }) + } + + fn custom_value_follow_path_int( + &self, + _plugin: &PolarsPlugin, + _engine: &EngineInterface, + self_span: Span, + _index: Spanned, + ) -> Result { + Err(ShellError::IncompatiblePathAccess { + type_name: self.type_name(), + span: self_span, + }) + } + + fn custom_value_follow_path_string( + &self, + _plugin: &PolarsPlugin, + _engine: &EngineInterface, + self_span: Span, + _column_name: Spanned, + ) -> Result { + Err(ShellError::IncompatiblePathAccess { + type_name: self.type_name(), + span: self_span, + }) + } + + fn custom_value_partial_cmp( + &self, + _plugin: &PolarsPlugin, + _engine: &EngineInterface, + _other_value: Value, + ) -> Result, ShellError> { + Ok(None) + } +} + +/// Handles the ability for a PolarsObjectType implementations to convert between +/// their respective CustValue type. +/// PolarsPluginObjectType's (NuDataFrame, NuLazyFrame) should +/// implement this trait. +pub trait CustomValueSupport: Cacheable { + type CV: PolarsPluginCustomValue + CustomValue + 'static; + + fn get_type(&self) -> PolarsPluginType { + Self::get_type_static() + } + + fn get_type_static() -> PolarsPluginType; + + fn custom_value(self) -> Self::CV; + + fn base_value(self, span: Span) -> Result; + + fn into_value(self, span: Span) -> Value { + Value::custom(Box::new(self.custom_value()), span) + } + + fn try_from_custom_value(plugin: &PolarsPlugin, cv: &Self::CV) -> Result { + if let Some(internal) = cv.internal() { + Ok(internal.clone()) + } else { + Self::get_cached(plugin, cv.id())?.ok_or_else(|| ShellError::GenericError { + error: format!("Dataframe {:?} not found in cache", cv.id()), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) + } + } + + fn try_from_value(plugin: &PolarsPlugin, value: &Value) -> Result { + if let Value::Custom { val, .. } = value { + if let Some(cv) = val.as_any().downcast_ref::() { + Self::try_from_custom_value(plugin, cv) + } else { + Err(ShellError::CantConvert { + to_type: Self::get_type_static().to_string(), + from_type: value.get_type().to_string(), + span: value.span(), + help: None, + }) + } + } else { + Err(ShellError::CantConvert { + to_type: Self::get_type_static().to_string(), + from_type: value.get_type().to_string(), + span: value.span(), + help: None, + }) + } + } + + fn try_from_pipeline( + plugin: &PolarsPlugin, + input: PipelineData, + span: Span, + ) -> Result { + let value = input.into_value(span); + Self::try_from_value(plugin, &value) + } + + fn can_downcast(value: &Value) -> bool { + if let Value::Custom { val, .. } = value { + val.as_any().downcast_ref::().is_some() + } else { + false + } + } +} + +/// Wraps the cache and into_value calls. +/// This function also does mapping back and forth +/// between lazy and eager values and makes sure they +/// are cached appropriately. +pub fn cache_and_to_value( + plugin: &PolarsPlugin, + engine: &EngineInterface, + span: Span, + cv: impl CustomValueSupport, +) -> Result { + match cv.to_cache_value()? { + // if it was from a lazy value, make it lazy again + PolarsPluginObject::NuDataFrame(df) if df.from_lazy => { + let df = df.lazy(); + Ok(df.cache(plugin, engine)?.into_value(span)) + } + // if it was from an eager value, make it eager again + PolarsPluginObject::NuLazyFrame(lf) if lf.from_eager => { + let lf = lf.collect(span)?; + Ok(lf.cache(plugin, engine)?.into_value(span)) + } + _ => Ok(cv.cache(plugin, engine)?.into_value(span)), + } +} + +/// Caches the object, converts it to a it's CustomValue counterpart +/// And creates a pipeline data object out of it +#[inline] +pub fn to_pipeline_data( + plugin: &PolarsPlugin, + engine: &EngineInterface, + span: Span, + cv: impl CustomValueSupport, +) -> Result { + Ok(PipelineData::Value( + cache_and_to_value(plugin, engine, span, cv)?, + None, + )) +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs new file mode 100644 index 0000000000..df0854ffee --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs @@ -0,0 +1,889 @@ +use super::{operations::Axis, NuDataFrame}; +use nu_protocol::{ + ast::{Boolean, Comparison, Math, Operator}, + span, ShellError, Span, Spanned, Value, +}; +use num::Zero; +use polars::prelude::{ + BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, + NumOpsDispatchChecked, PolarsError, Series, StringNameSpaceImpl, +}; +use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; + +pub(super) fn between_dataframes( + operator: Spanned, + left: &Value, + lhs: &NuDataFrame, + right: &Value, + rhs: &NuDataFrame, +) -> Result { + let operation_span = span(&[left.span(), right.span()]); + match operator.item { + Operator::Math(Math::Plus) => lhs.append_df(rhs, Axis::Row, operation_span), + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + } +} + +pub(super) fn compute_between_series( + operator: Spanned, + left: &Value, + lhs: &Series, + right: &Value, + rhs: &Series, +) -> Result { + let operation_span = span(&[left.span(), right.span()]); + match operator.item { + Operator::Math(Math::Plus) => { + let mut res = lhs + rhs; + let name = format!("sum_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Math(Math::Minus) => { + let mut res = lhs - rhs; + let name = format!("sub_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Math(Math::Multiply) => { + let mut res = lhs * rhs; + let name = format!("mul_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Math(Math::Divide) => { + let res = lhs.checked_div(rhs); + match res { + Ok(mut res) => { + let name = format!("div_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + Err(e) => Err(ShellError::GenericError { + error: "Division error".into(), + msg: e.to_string(), + span: Some(right.span()), + help: None, + inner: vec![], + }), + } + } + Operator::Comparison(Comparison::Equal) => { + let name = format!("eq_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::equal)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Comparison(Comparison::NotEqual) => { + let name = format!("neq_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::not_equal)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Comparison(Comparison::LessThan) => { + let name = format!("lt_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::lt)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Comparison(Comparison::LessThanOrEqual) => { + let name = format!("lte_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::lt_eq)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Comparison(Comparison::GreaterThan) => { + let name = format!("gt_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::gt)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Comparison(Comparison::GreaterThanOrEqual) => { + let name = format!("gte_{}_{}", lhs.name(), rhs.name()); + let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::gt_eq)?; + NuDataFrame::try_from_series(res, operation_span) + } + Operator::Boolean(Boolean::And) => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitand(r).into_series(); + let name = format!("and_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + _ => Err(ShellError::GenericError { + error: "Incompatible types".into(), + msg: "unable to cast to boolean".into(), + span: Some(right.span()), + help: None, + inner: vec![], + }), + } + } + _ => Err(ShellError::IncompatibleParametersSingle { + msg: format!( + "Operation {} can only be done with boolean values", + operator.item + ), + span: operation_span, + }), + }, + Operator::Boolean(Boolean::Or) => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitor(r).into_series(); + let name = format!("or_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::try_from_series(res, operation_span) + } + _ => Err(ShellError::GenericError { + error: "Incompatible types".into(), + msg: "unable to cast to boolean".into(), + span: Some(right.span()), + help: None, + inner: vec![], + }), + } + } + _ => Err(ShellError::IncompatibleParametersSingle { + msg: format!( + "Operation {} can only be done with boolean values", + operator.item + ), + span: operation_span, + }), + }, + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + } +} + +fn compare_series<'s, F>( + lhs: &'s Series, + rhs: &'s Series, + name: &'s str, + span: Span, + f: F, +) -> Result +where + F: Fn(&'s Series, &'s Series) -> Result, PolarsError>, +{ + let mut res = f(lhs, rhs) + .map_err(|e| ShellError::GenericError { + error: "Equality error".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })? + .into_series(); + + res.rename(name); + Ok(res) +} + +pub(super) fn compute_series_single_value( + operator: Spanned, + left: &Value, + lhs: &NuDataFrame, + right: &Value, +) -> Result { + if !lhs.is_series() { + return Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }); + } + + let lhs_span = left.span(); + let lhs = lhs.as_series(lhs_span)?; + + match operator.item { + Operator::Math(Math::Plus) => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::add, lhs_span) + } + Value::Float { val, .. } => { + compute_series_float(&lhs, *val, >::add, lhs_span) + } + Value::String { val, .. } => add_string_to_series(&lhs, val, lhs_span), + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Math(Math::Minus) => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::sub, lhs_span) + } + Value::Float { val, .. } => { + compute_series_float(&lhs, *val, >::sub, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Math(Math::Multiply) => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::mul, lhs_span) + } + Value::Float { val, .. } => { + compute_series_float(&lhs, *val, >::mul, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Math(Math::Divide) => { + let span = right.span(); + match &right { + Value::Int { val, .. } => { + if *val == 0 { + Err(ShellError::DivisionByZero { span }) + } else { + compute_series_i64(&lhs, *val, >::div, lhs_span) + } + } + Value::Float { val, .. } => { + if val.is_zero() { + Err(ShellError::DivisionByZero { span }) + } else { + compute_series_float(&lhs, *val, >::div, lhs_span) + } + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + } + } + Operator::Comparison(Comparison::Equal) => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span), + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::equal, lhs_span) + } + Value::String { val, .. } => { + let equal_pattern = format!("^{}$", fancy_regex::escape(val)); + contains_series_pat(&lhs, &equal_pattern, lhs_span) + } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::equal, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::NotEqual) => match &right { + Value::Int { val, .. } => { + compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span) + } + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::not_equal, lhs_span) + } + Value::Date { val, .. } => compare_series_i64( + &lhs, + val.timestamp_millis(), + ChunkedArray::not_equal, + lhs_span, + ), + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::LessThan) => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt, lhs_span), + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::lt, lhs_span) + } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::LessThanOrEqual) => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt_eq, lhs_span), + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::lt_eq, lhs_span) + } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt_eq, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::GreaterThan) => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt, lhs_span), + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::gt, lhs_span) + } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::GreaterThanOrEqual) => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt_eq, lhs_span), + Value::Float { val, .. } => { + compare_series_float(&lhs, *val, ChunkedArray::gt_eq, lhs_span) + } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt_eq, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + // TODO: update this to do a regex match instead of a simple contains? + Operator::Comparison(Comparison::RegexMatch) => match &right { + Value::String { val, .. } => contains_series_pat(&lhs, val, lhs_span), + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::StartsWith) => match &right { + Value::String { val, .. } => { + let starts_with_pattern = format!("^{}", fancy_regex::escape(val)); + contains_series_pat(&lhs, &starts_with_pattern, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + Operator::Comparison(Comparison::EndsWith) => match &right { + Value::String { val, .. } => { + let ends_with_pattern = format!("{}$", fancy_regex::escape(val)); + contains_series_pat(&lhs, &ends_with_pattern, lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + }, + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type().to_string(), + lhs_span: left.span(), + rhs_ty: right.get_type().to_string(), + rhs_span: right.span(), + }), + } +} + +fn compute_series_i64( + series: &Series, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, i64) -> ChunkedArray, +{ + match series.dtype() { + DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { + let to_i64 = series.cast(&DataType::Int64); + + match to_i64 { + Ok(series) => { + let casted = series.i64(); + compute_casted_i64(casted, val, f, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + DataType::Int64 => { + let casted = series.i64(); + compute_casted_i64(casted, val, f, span) + } + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compute_casted_i64( + casted: Result<&ChunkedArray, PolarsError>, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, i64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted.clone(), val); + let res = res.into_series(); + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compute_series_float( + series: &Series, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, f64) -> ChunkedArray, +{ + match series.dtype() { + DataType::Float32 => { + let to_f64 = series.cast(&DataType::Float64); + + match to_f64 { + Ok(series) => { + let casted = series.f64(); + compute_casted_f64(casted, val, f, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + DataType::Float64 => { + let casted = series.f64(); + compute_casted_f64(casted, val, f, span) + } + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: format!( + "Series of type {} can not be used for operations with a float value", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compute_casted_f64( + casted: Result<&ChunkedArray, PolarsError>, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, f64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted.clone(), val); + let res = res.into_series(); + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compare_series_i64( + series: &Series, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, i64) -> ChunkedArray, +{ + match series.dtype() { + DataType::UInt32 | DataType::Int32 | DataType::UInt64 | DataType::Datetime(_, _) => { + let to_i64 = series.cast(&DataType::Int64); + + match to_i64 { + Ok(series) => { + let casted = series.i64(); + compare_casted_i64(casted, val, f, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + DataType::Date => { + let to_i64 = series.cast(&DataType::Int64); + + match to_i64 { + Ok(series) => { + let nanosecs_per_day: i64 = 24 * 60 * 60 * 1_000_000_000; + let casted = series + .i64() + .map(|chunked| chunked.mul(nanosecs_per_day)) + .expect("already checked for casting"); + compare_casted_i64(Ok(&casted), val, f, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + DataType::Int64 => { + let casted = series.i64(); + compare_casted_i64(casted, val, f, span) + } + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compare_casted_i64( + casted: Result<&ChunkedArray, PolarsError>, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, i64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted, val); + let res = res.into_series(); + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compare_series_float( + series: &Series, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, f64) -> ChunkedArray, +{ + match series.dtype() { + DataType::Float32 => { + let to_f64 = series.cast(&DataType::Float64); + + match to_f64 { + Ok(series) => { + let casted = series.f64(); + compare_casted_f64(casted, val, f, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to i64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + DataType::Float64 => { + let casted = series.f64(); + compare_casted_f64(casted, val, f, span) + } + _ => Err(ShellError::GenericError { + error: "Incorrect type".into(), + msg: format!( + "Series of type {} can not be used for operations with a float value", + series.dtype() + ), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn compare_casted_f64( + casted: Result<&ChunkedArray, PolarsError>, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, f64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted, val); + let res = res.into_series(); + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to f64".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result { + let casted = series.str(); + match casted { + Ok(casted) => { + let res = casted.contains(pat, false); + + match res { + Ok(res) => { + let res = res.into_series(); + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Error using contains".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to string".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn add_string_to_series(series: &Series, pat: &str, span: Span) -> Result { + let casted = series.str(); + match casted { + Ok(casted) => { + let res = casted + pat; + let res = res.into_series(); + + NuDataFrame::try_from_series(res, span) + } + Err(e) => Err(ShellError::GenericError { + error: "Unable to cast to string".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +#[cfg(test)] +mod test { + use super::*; + use nu_protocol::Span; + use polars::{prelude::NamedFrom, series::Series}; + + use crate::{dataframe::values::NuDataFrame, values::CustomValueSupport}; + + #[test] + fn test_compute_between_series_comparisons() { + let series = Series::new("c", &[1, 2]); + let df = NuDataFrame::try_from_series_vec(vec![series], Span::test_data()) + .expect("should be able to create a simple dataframe"); + + let c0 = df + .column("c", Span::test_data()) + .expect("should be able to get column c"); + + let c0_series = c0 + .as_series(Span::test_data()) + .expect("should be able to get series"); + + let c0_value = c0.into_value(Span::test_data()); + + let c1 = df + .column("c", Span::test_data()) + .expect("should be able to get column c"); + + let c1_series = c1 + .as_series(Span::test_data()) + .expect("should be able to get series"); + + let c1_value = c1.into_value(Span::test_data()); + + let op = Spanned { + item: Operator::Comparison(Comparison::NotEqual), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("neq_c_c", &[false, false])); + + let op = Spanned { + item: Operator::Comparison(Comparison::Equal), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("eq_c_c", &[true, true])); + + let op = Spanned { + item: Operator::Comparison(Comparison::LessThan), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("lt_c_c", &[false, false])); + + let op = Spanned { + item: Operator::Comparison(Comparison::LessThanOrEqual), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("lte_c_c", &[true, true])); + + let op = Spanned { + item: Operator::Comparison(Comparison::GreaterThan), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("gt_c_c", &[false, false])); + + let op = Spanned { + item: Operator::Comparison(Comparison::GreaterThanOrEqual), + span: Span::test_data(), + }; + let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) + .expect("compare should not fail"); + let result = result + .as_series(Span::test_data()) + .expect("should be convert to a series"); + assert_eq!(result, Series::new("gte_c_c", &[true, true])); + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs new file mode 100644 index 0000000000..9474e9cb78 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -0,0 +1,1437 @@ +use std::ops::{Deref, DerefMut}; + +use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; +use chrono_tz::Tz; +use indexmap::map::{Entry, IndexMap}; +use polars::chunked_array::builder::AnonymousOwnedListBuilder; +use polars::chunked_array::object::builder::ObjectChunkedBuilder; +use polars::chunked_array::ChunkedArray; +use polars::datatypes::AnyValue; +use polars::export::arrow::Either; +use polars::prelude::{ + DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type, + Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait, + ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, NewChunkedArray, + ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, UInt16Type, UInt32Type, + UInt64Type, UInt8Type, +}; + +use nu_protocol::{Record, ShellError, Span, Value}; + +use crate::dataframe::values::NuSchema; + +use super::{DataFrameValue, NuDataFrame}; + +const NANOS_PER_DAY: i64 = 86_400_000_000_000; + +// The values capacity is for the size of an vec. +// Since this is impossible to determine without traversing every value +// I just picked one. Since this is for converting back and forth +// between nushell tables the values shouldn't be too extremely large for +// practical reasons (~ a few thousand rows). +const VALUES_CAPACITY: usize = 10; + +macro_rules! value_to_primitive { + ($value:ident, u8) => { + $value.as_i64().map(|v| v as u8) + }; + ($value:ident, u16) => { + $value.as_i64().map(|v| v as u16) + }; + ($value:ident, u32) => { + $value.as_i64().map(|v| v as u32) + }; + ($value:ident, u64) => { + $value.as_i64().map(|v| v as u64) + }; + ($value:ident, i8) => { + $value.as_i64().map(|v| v as i8) + }; + ($value:ident, i16) => { + $value.as_i64().map(|v| v as i16) + }; + ($value:ident, i32) => { + $value.as_i64().map(|v| v as i32) + }; + ($value:ident, i64) => { + $value.as_i64() + }; + ($value:ident, f32) => { + $value.as_f64().map(|v| v as f32) + }; + ($value:ident, f64) => { + $value.as_f64() + }; +} + +#[derive(Debug)] +pub struct Column { + name: String, + values: Vec, +} + +impl Column { + pub fn new(name: String, values: Vec) -> Self { + Self { name, values } + } + + pub fn new_empty(name: String) -> Self { + Self { + name, + values: Vec::new(), + } + } + + pub fn name(&self) -> &str { + self.name.as_str() + } +} + +impl IntoIterator for Column { + type Item = Value; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.values.into_iter() + } +} + +impl Deref for Column { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.values + } +} + +impl DerefMut for Column { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.values + } +} + +#[derive(Debug)] +pub struct TypedColumn { + column: Column, + column_type: Option, +} + +impl TypedColumn { + fn new_empty(name: String) -> Self { + Self { + column: Column::new_empty(name), + column_type: None, + } + } +} + +impl Deref for TypedColumn { + type Target = Column; + + fn deref(&self) -> &Self::Target { + &self.column + } +} + +impl DerefMut for TypedColumn { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.column + } +} + +pub type ColumnMap = IndexMap; + +pub fn create_column( + series: &Series, + from_row: usize, + to_row: usize, + span: Span, +) -> Result { + let size = to_row - from_row; + let values = series_to_values(series, Some(from_row), Some(size), span)?; + Ok(Column::new(series.name().into(), values)) +} + +// Adds a separator to the vector of values using the column names from the +// dataframe to create the Values Row +pub fn add_separator(values: &mut Vec, df: &DataFrame, span: Span) { + let mut record = Record::new(); + + record.push("index", Value::string("...", span)); + + for name in df.get_column_names() { + record.push(name, Value::string("...", span)) + } + + values.push(Value::record(record, span)); +} + +// Inserting the values found in a Value::List or Value::Record +pub fn insert_record( + column_values: &mut ColumnMap, + record: Record, + maybe_schema: &Option, +) -> Result<(), ShellError> { + for (col, value) in record { + insert_value(value, col, column_values, maybe_schema)?; + } + + Ok(()) +} + +pub fn insert_value( + value: Value, + key: String, + column_values: &mut ColumnMap, + maybe_schema: &Option, +) -> Result<(), ShellError> { + let col_val = match column_values.entry(key.clone()) { + Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())), + Entry::Occupied(entry) => entry.into_mut(), + }; + + // Checking that the type for the value is the same + // for the previous value in the column + if col_val.values.is_empty() { + if let Some(schema) = maybe_schema { + if let Some(field) = schema.schema.get_field(&key) { + col_val.column_type = Some(field.data_type().clone()); + } + } + + if col_val.column_type.is_none() { + col_val.column_type = Some(value_to_data_type(&value)); + } + + col_val.values.push(value); + } else { + let prev_value = &col_val.values[col_val.values.len() - 1]; + + match (&prev_value, &value) { + (Value::Int { .. }, Value::Int { .. }) + | (Value::Float { .. }, Value::Float { .. }) + | (Value::String { .. }, Value::String { .. }) + | (Value::Bool { .. }, Value::Bool { .. }) + | (Value::Date { .. }, Value::Date { .. }) + | (Value::Filesize { .. }, Value::Filesize { .. }) + | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), + (Value::List { .. }, _) => { + col_val.column_type = Some(value_to_data_type(&value)); + col_val.values.push(value); + } + _ => { + col_val.column_type = Some(DataType::Object("Value", None)); + col_val.values.push(value); + } + } + } + + Ok(()) +} + +fn value_to_data_type(value: &Value) -> DataType { + match &value { + Value::Int { .. } => DataType::Int64, + Value::Float { .. } => DataType::Float64, + Value::String { .. } => DataType::String, + Value::Bool { .. } => DataType::Boolean, + Value::Date { .. } => DataType::Date, + Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds), + Value::Filesize { .. } => DataType::Int64, + Value::List { vals, .. } => { + // We need to determined the type inside of the list. + // Since Value::List does not have any kind of + // type information, we need to look inside the list. + // This will cause errors if lists have inconsistent types. + // Basically, if a list column needs to be converted to dataframe, + // needs to have consistent types. + let list_type = vals + .iter() + .filter(|v| !matches!(v, Value::Nothing { .. })) + .map(value_to_data_type) + .nth(1) + .unwrap_or(DataType::Object("Value", None)); + + DataType::List(Box::new(list_type)) + } + _ => DataType::Object("Value", None), + } +} + +fn typed_column_to_series(name: &str, column: TypedColumn) -> Result { + if let Some(column_type) = &column.column_type { + match column_type { + DataType::Float32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_f64().map(|v| v as f32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Float64 => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_f64()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt8 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u8)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt16 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u16)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::UInt64 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as u64)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int8 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i8)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int16 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i16)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int32 => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| v as i32)) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Int64 => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_i64()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Boolean => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_bool()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::String => { + let series_values: Result, _> = + column.values.iter().map(|v| v.coerce_string()).collect(); + Ok(Series::new(name, series_values?)) + } + DataType::Object(_, _) => value_to_series(name, &column.values), + DataType::Duration(time_unit) => { + let series_values: Result, _> = column + .values + .iter() + .map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit))) + .collect(); + Ok(Series::new(name, series_values?)) + } + DataType::List(list_type) => { + match input_type_list_to_series(name, list_type.as_ref(), &column.values) { + Ok(series) => Ok(series), + Err(_) => { + // An error case will occur when there are lists of mixed types. + // If this happens, fallback to object list + input_type_list_to_series( + name, + &DataType::Object("unknown", None), + &column.values, + ) + } + } + } + DataType::Date => { + let it = column.values.iter().map(|v| { + if let Value::Date { val, .. } = &v { + Some(val.timestamp_nanos_opt().unwrap_or_default()) + } else { + None + } + }); + + let res: DatetimeChunked = ChunkedArray::::from_iter_options(name, it) + .into_datetime(TimeUnit::Nanoseconds, None); + + Ok(res.into_series()) + } + DataType::Datetime(tu, maybe_tz) => { + let dates = column + .values + .iter() + .map(|v| { + if let Value::Date { val, .. } = &v { + // If there is a timezone specified, make sure + // the value is converted to it + Ok(maybe_tz + .as_ref() + .map(|tz| tz.parse::().map(|tz| val.with_timezone(&tz))) + .transpose() + .map_err(|e| ShellError::GenericError { + error: "Error parsing timezone".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })? + .and_then(|dt| dt.timestamp_nanos_opt()) + .map(|nanos| nanos_from_timeunit(nanos, *tu))) + } else { + Ok(None) + } + }) + .collect::>, ShellError>>()?; + + let res: DatetimeChunked = + ChunkedArray::::from_iter_options(name, dates.into_iter()) + .into_datetime(*tu, maybe_tz.clone()); + + Ok(res.into_series()) + } + DataType::Struct(fields) => { + let schema = Some(NuSchema::new(Schema::from_iter(fields.clone()))); + let mut structs: Vec = Vec::new(); + + for v in column.values.iter() { + let mut column_values: ColumnMap = IndexMap::new(); + let record = v.as_record()?; + insert_record(&mut column_values, record.clone(), &schema)?; + let df = from_parsed_columns(column_values)?; + structs.push(df.as_series(Span::unknown())?); + } + + let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| { + ShellError::GenericError { + error: format!("Error creating struct: {e}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + } + })?; + Ok(chunked.into_series()) + } + _ => Err(ShellError::GenericError { + error: format!("Error creating dataframe: Unsupported type: {column_type:?}"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } else { + Err(ShellError::GenericError { + error: "Passed a type column with no type".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) + } +} + +// The ColumnMap has the parsed data from the StreamInput +// This data can be used to create a Series object that can initialize +// the dataframe based on the type of data that is found +pub fn from_parsed_columns(column_values: ColumnMap) -> Result { + let mut df_series: Vec = Vec::new(); + for (name, column) in column_values { + let series = typed_column_to_series(&name, column)?; + df_series.push(series); + } + + DataFrame::new(df_series) + .map(|df| NuDataFrame::new(false, df)) + .map_err(|e| ShellError::GenericError { + error: "Error creating dataframe".into(), + msg: e.to_string(), + span: None, + help: None, + inner: vec![], + }) +} + +fn value_to_series(name: &str, values: &[Value]) -> Result { + let mut builder = ObjectChunkedBuilder::::new(name, values.len()); + + for v in values { + builder.append_value(DataFrameValue::new(v.clone())); + } + + let res = builder.finish(); + Ok(res.into_series()) +} + +fn input_type_list_to_series( + name: &str, + data_type: &DataType, + values: &[Value], +) -> Result { + let inconsistent_error = |_| ShellError::GenericError { + error: format!( + "column {name} contains a list with inconsistent types: Expecting: {data_type:?}" + ), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }; + + macro_rules! primitive_list_series { + ($list_type:ty, $vec_type:tt) => {{ + let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new( + name, + values.len(), + VALUES_CAPACITY, + data_type.clone(), + ); + + for v in values { + let value_list = v + .as_list()? + .iter() + .map(|v| value_to_primitive!(v, $vec_type)) + .collect::, _>>() + .map_err(inconsistent_error)?; + builder.append_iter_values(value_list.iter().copied()); + } + let res = builder.finish(); + Ok(res.into_series()) + }}; + } + + match *data_type { + // list of boolean values + DataType::Boolean => { + let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); + for v in values { + let value_list = v + .as_list()? + .iter() + .map(|v| v.as_bool()) + .collect::, _>>() + .map_err(inconsistent_error)?; + builder.append_iter(value_list.iter().map(|v| Some(*v))); + } + let res = builder.finish(); + Ok(res.into_series()) + } + DataType::Duration(_) => primitive_list_series!(Int64Type, i64), + DataType::UInt8 => primitive_list_series!(UInt8Type, u8), + DataType::UInt16 => primitive_list_series!(UInt16Type, u16), + DataType::UInt32 => primitive_list_series!(UInt32Type, u32), + DataType::UInt64 => primitive_list_series!(UInt64Type, u64), + DataType::Int8 => primitive_list_series!(Int8Type, i8), + DataType::Int16 => primitive_list_series!(Int16Type, i16), + DataType::Int32 => primitive_list_series!(Int32Type, i32), + DataType::Int64 => primitive_list_series!(Int64Type, i64), + DataType::Float32 => primitive_list_series!(Float32Type, f32), + DataType::Float64 => primitive_list_series!(Float64Type, f64), + DataType::String => { + let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); + for v in values { + let value_list = v + .as_list()? + .iter() + .map(|v| v.coerce_string()) + .collect::, _>>() + .map_err(inconsistent_error)?; + builder.append_values_iter(value_list.iter().map(AsRef::as_ref)); + } + let res = builder.finish(); + Ok(res.into_series()) + } + DataType::Date => { + let mut builder = AnonymousOwnedListBuilder::new( + name, + values.len(), + Some(DataType::Datetime(TimeUnit::Nanoseconds, None)), + ); + for (i, v) in values.iter().enumerate() { + let list_name = i.to_string(); + + let it = v.as_list()?.iter().map(|v| { + if let Value::Date { val, .. } = &v { + Some(val.timestamp_nanos_opt().unwrap_or_default()) + } else { + None + } + }); + let dt_chunked = ChunkedArray::::from_iter_options(&list_name, it) + .into_datetime(TimeUnit::Nanoseconds, None); + + builder + .append_series(&dt_chunked.into_series()) + .map_err(|e| ShellError::GenericError { + error: "Error appending to series".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })? + } + let res = builder.finish(); + Ok(res.into_series()) + } + DataType::List(ref sub_list_type) => { + Ok(input_type_list_to_series(name, sub_list_type, values)?) + } + // treat everything else as an object + _ => Ok(value_to_series(name, values)?), + } +} + +fn series_to_values( + series: &Series, + maybe_from_row: Option, + maybe_size: Option, + span: Span, +) -> Result, ShellError> { + match series.dtype() { + DataType::Null => { + let it = std::iter::repeat(Value::nothing(span)); + let values = if let Some(size) = maybe_size { + Either::Left(it.take(size)) + } else { + Either::Right(it) + } + .collect::>(); + + Ok(values) + } + DataType::UInt8 => { + let casted = series.u8().map_err(|e| ShellError::GenericError { + error: "Error casting column to u8".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::UInt16 => { + let casted = series.u16().map_err(|e| ShellError::GenericError { + error: "Error casting column to u16".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::UInt32 => { + let casted = series.u32().map_err(|e| ShellError::GenericError { + error: "Error casting column to u32".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::UInt64 => { + let casted = series.u64().map_err(|e| ShellError::GenericError { + error: "Error casting column to u64".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Int8 => { + let casted = series.i8().map_err(|e| ShellError::GenericError { + error: "Error casting column to i8".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Int16 => { + let casted = series.i16().map_err(|e| ShellError::GenericError { + error: "Error casting column to i16".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Int32 => { + let casted = series.i32().map_err(|e| ShellError::GenericError { + error: "Error casting column to i32".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a as i64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Int64 => { + let casted = series.i64().map_err(|e| ShellError::GenericError { + error: "Error casting column to i64".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::int(a, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Float32 => { + let casted = series.f32().map_err(|e| ShellError::GenericError { + error: "Error casting column to f32".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::float(a as f64, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Float64 => { + let casted = series.f64().map_err(|e| ShellError::GenericError { + error: "Error casting column to f64".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::float(a, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Boolean => { + let casted = series.bool().map_err(|e| ShellError::GenericError { + error: "Error casting column to bool".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::bool(a, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::String => { + let casted = series.str().map_err(|e| ShellError::GenericError { + error: "Error casting column to string".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => Value::string(a.to_string(), span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + DataType::Object(x, _) => { + let casted = series + .as_any() + .downcast_ref::>>(); + + match casted { + None => Err(ShellError::GenericError { + error: "Error casting object from series".into(), + msg: "".into(), + span: None, + help: Some(format!("Object not supported for conversion: {x}")), + inner: vec![], + }), + Some(ca) => { + let it = ca.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) + { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => a.get_value(), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + } + } + DataType::List(x) => { + let casted = series.as_any().downcast_ref::>(); + match casted { + None => Err(ShellError::GenericError { + error: "Error casting list from series".into(), + msg: "".into(), + span: None, + help: Some(format!("List not supported for conversion: {x}")), + inner: vec![], + }), + Some(ca) => { + let it = ca.into_iter(); + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|ca| { + let sublist: Vec = if let Some(ref s) = ca { + series_to_values(s, None, None, Span::unknown())? + } else { + // empty item + vec![] + }; + Ok(Value::list(sublist, span)) + }) + .collect::, ShellError>>() + } + } + } + DataType::Date => { + let casted = series.date().map_err(|e| ShellError::GenericError { + error: "Error casting column to date".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => { + let nanos = nanos_per_day(a); + let datetime = datetime_from_epoch_nanos(nanos, &None, span)?; + Ok(Value::date(datetime, span)) + } + None => Ok(Value::nothing(span)), + }) + .collect::, ShellError>>()?; + Ok(values) + } + DataType::Datetime(time_unit, tz) => { + let casted = series.datetime().map_err(|e| ShellError::GenericError { + error: "Error casting column to datetime".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(a) => { + // elapsed time in nano/micro/milliseconds since 1970-01-01 + let nanos = nanos_from_timeunit(a, *time_unit); + let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; + Ok(Value::date(datetime, span)) + } + None => Ok(Value::nothing(span)), + }) + .collect::, ShellError>>()?; + Ok(values) + } + DataType::Struct(polar_fields) => { + let casted = series.struct_().map_err(|e| ShellError::GenericError { + error: "Error casting column to struct".into(), + msg: "".to_string(), + span: None, + help: Some(e.to_string()), + inner: Vec::new(), + })?; + let it = casted.into_iter(); + let values: Result, ShellError> = + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|any_values| { + let record = polar_fields + .iter() + .zip(any_values) + .map(|(field, val)| { + any_value_to_value(val, span).map(|val| (field.name.to_string(), val)) + }) + .collect::>()?; + + Ok(Value::record(record, span)) + }) + .collect(); + values + } + DataType::Time => { + let casted = + series + .timestamp(TimeUnit::Nanoseconds) + .map_err(|e| ShellError::GenericError { + error: "Error casting column to time".into(), + msg: "".into(), + span: None, + help: Some(e.to_string()), + inner: vec![], + })?; + + let it = casted.into_iter(); + let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|v| match v { + Some(nanoseconds) => Value::duration(nanoseconds, span), + None => Value::nothing(span), + }) + .collect::>(); + + Ok(values) + } + e => Err(ShellError::GenericError { + error: "Error creating Dataframe".into(), + msg: "".to_string(), + span: None, + help: Some(format!("Value not supported in nushell: {e}")), + inner: vec![], + }), + } +} + +fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { + match any_value { + AnyValue::Null => Ok(Value::nothing(span)), + AnyValue::Boolean(b) => Ok(Value::bool(*b, span)), + AnyValue::String(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int64(i) => Ok(Value::int(*i, span)), + AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)), + AnyValue::Float64(f) => Ok(Value::float(*f, span)), + AnyValue::Date(d) => { + let nanos = nanos_per_day(*d); + datetime_from_epoch_nanos(nanos, &None, span) + .map(|datetime| Value::date(datetime, span)) + } + AnyValue::Datetime(a, time_unit, tz) => { + let nanos = nanos_from_timeunit(*a, *time_unit); + datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span)) + } + AnyValue::Duration(a, time_unit) => { + let nanos = match time_unit { + TimeUnit::Nanoseconds => *a, + TimeUnit::Microseconds => *a * 1_000, + TimeUnit::Milliseconds => *a * 1_000_000, + }; + Ok(Value::duration(nanos, span)) + } + // AnyValue::Time represents the current time since midnight. + // Unfortunately, there is no timezone related information. + // Given this, calculate the current date from UTC and add the time. + AnyValue::Time(nanos) => time_from_midnight(*nanos, span), + AnyValue::List(series) => { + series_to_values(series, None, None, span).map(|values| Value::list(values, span)) + } + AnyValue::Struct(_idx, _struct_array, _s_fields) => { + // This should convert to a StructOwned object. + let static_value = + any_value + .clone() + .into_static() + .map_err(|e| ShellError::GenericError { + error: "Cannot convert polars struct to static value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: Vec::new(), + })?; + any_value_to_value(&static_value, span) + } + AnyValue::StructOwned(struct_tuple) => { + let record = struct_tuple + .1 + .iter() + .zip(&struct_tuple.0) + .map(|(field, val)| { + any_value_to_value(val, span).map(|val| (field.name.to_string(), val)) + }) + .collect::>()?; + + Ok(Value::record(record, span)) + } + AnyValue::StringOwned(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)), + AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)), + e => Err(ShellError::GenericError { + error: "Error creating Value".into(), + msg: "".to_string(), + span: None, + help: Some(format!("Value not supported in nushell: {e}")), + inner: Vec::new(), + }), + } +} + +fn nanos_per_day(days: i32) -> i64 { + days as i64 * NANOS_PER_DAY +} + +fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { + a * match time_unit { + TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds + TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds + TimeUnit::Nanoseconds => 1, // Already in nanoseconds + } +} + +fn datetime_from_epoch_nanos( + nanos: i64, + timezone: &Option, + span: Span, +) -> Result, ShellError> { + let tz: Tz = if let Some(polars_tz) = timezone { + polars_tz + .parse::() + .map_err(|_| ShellError::GenericError { + error: format!("Could not parse polars timezone: {polars_tz}"), + msg: "".to_string(), + span: Some(span), + help: None, + inner: vec![], + })? + } else { + Tz::UTC + }; + + Ok(tz.timestamp_nanos(nanos).fixed_offset()) +} + +fn time_from_midnight(nanos: i64, span: Span) -> Result { + let today = Utc::now().date_naive(); + NaiveTime::from_hms_opt(0, 0, 0) // midnight + .map(|time| time + Duration::nanoseconds(nanos)) // current time + .map(|time| today.and_time(time)) // current date and time + .and_then(|datetime| { + FixedOffset::east_opt(0) // utc + .map(|offset| { + DateTime::::from_naive_utc_and_offset(datetime, offset) + }) + }) + .map(|datetime| Value::date(datetime, span)) // current date and time + .ok_or(ShellError::CantConvert { + to_type: "datetime".to_string(), + from_type: "polars time".to_string(), + span, + help: Some("Could not convert polars time of {nanos} to datetime".to_string()), + }) +} + +#[cfg(test)] +mod tests { + use indexmap::indexmap; + use nu_protocol::record; + use polars::export::arrow::array::{BooleanArray, PrimitiveArray}; + use polars::prelude::Field; + use polars_io::prelude::StructArray; + + use super::*; + + #[test] + fn test_parsed_column_string_list() -> Result<(), Box> { + let values = vec![ + Value::list( + vec![Value::string("bar".to_string(), Span::test_data())], + Span::test_data(), + ), + Value::list( + vec![Value::string("baz".to_string(), Span::test_data())], + Span::test_data(), + ), + ]; + let column = Column { + name: "foo".to_string(), + values: values.clone(), + }; + let typed_column = TypedColumn { + column, + column_type: Some(DataType::List(Box::new(DataType::String))), + }; + + let column_map = indexmap!("foo".to_string() => typed_column); + let parsed_df = from_parsed_columns(column_map)?; + let parsed_columns = parsed_df.columns(Span::test_data())?; + assert_eq!(parsed_columns.len(), 1); + let column = parsed_columns + .first() + .expect("There should be a first value in columns"); + assert_eq!(column.name(), "foo"); + assert_eq!(column.values, values); + + Ok(()) + } + + #[test] + fn test_any_value_to_value() -> Result<(), Box> { + let span = Span::test_data(); + assert_eq!( + any_value_to_value(&AnyValue::Null, span)?, + Value::nothing(span) + ); + + let test_bool = true; + assert_eq!( + any_value_to_value(&AnyValue::Boolean(test_bool), span)?, + Value::bool(test_bool, span) + ); + + let test_str = "foo"; + assert_eq!( + any_value_to_value(&AnyValue::String(test_str), span)?, + Value::string(test_str.to_string(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::StringOwned(test_str.into()), span)?, + Value::string(test_str.to_owned(), span) + ); + + let tests_uint8 = 4; + assert_eq!( + any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?, + Value::int(tests_uint8 as i64, span) + ); + + let tests_uint16 = 233; + assert_eq!( + any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?, + Value::int(tests_uint16 as i64, span) + ); + + let tests_uint32 = 897688233; + assert_eq!( + any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?, + Value::int(tests_uint32 as i64, span) + ); + + let tests_uint64 = 903225135897388233; + assert_eq!( + any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?, + Value::int(tests_uint64 as i64, span) + ); + + let tests_float32 = 903225135897388233.3223353; + assert_eq!( + any_value_to_value(&AnyValue::Float32(tests_float32), span)?, + Value::float(tests_float32 as f64, span) + ); + + let tests_float64 = 9064251358973882322333.64233533232; + assert_eq!( + any_value_to_value(&AnyValue::Float64(tests_float64), span)?, + Value::float(tests_float64, span) + ); + + let test_days = 10_957; + let comparison_date = Utc + .with_ymd_and_hms(2000, 1, 1, 0, 0, 0) + .unwrap() + .fixed_offset(); + assert_eq!( + any_value_to_value(&AnyValue::Date(test_days), span)?, + Value::date(comparison_date, span) + ); + + let test_millis = 946_684_800_000; + assert_eq!( + any_value_to_value( + &AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None), + span + )?, + Value::date(comparison_date, span) + ); + + let test_duration_millis = 99_999; + let test_duration_micros = 99_999_000; + let test_duration_nanos = 99_999_000_000; + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + + let test_binary = b"sdf2332f32q3f3afwaf3232f32"; + assert_eq!( + any_value_to_value(&AnyValue::Binary(test_binary), span)?, + Value::binary(test_binary.to_vec(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?, + Value::binary(test_binary.to_vec(), span) + ); + + let test_time_nanos = 54_000_000_000_000; + let test_time = DateTime::::from_naive_utc_and_offset( + Utc::now() + .date_naive() + .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()), + FixedOffset::east_opt(0).unwrap(), + ); + assert_eq!( + any_value_to_value(&AnyValue::Time(test_time_nanos), span)?, + Value::date(test_time, span) + ); + + let test_list_series = Series::new("int series", &[1, 2, 3]); + let comparison_list_series = Value::list( + vec![ + Value::int(1, span), + Value::int(2, span), + Value::int(3, span), + ], + span, + ); + assert_eq!( + any_value_to_value(&AnyValue::List(test_list_series), span)?, + comparison_list_series + ); + + let field_value_0 = AnyValue::Int32(1); + let field_value_1 = AnyValue::Boolean(true); + let values = vec![field_value_0, field_value_1]; + let field_name_0 = "num_field"; + let field_name_1 = "bool_field"; + let fields = vec![ + Field::new(field_name_0, DataType::Int32), + Field::new(field_name_1, DataType::Boolean), + ]; + let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone()))); + let comparison_owned_record = Value::test_record(record!( + field_name_0 => Value::int(1, span), + field_name_1 => Value::bool(true, span), + )); + assert_eq!( + any_value_to_value(&test_owned_struct, span)?, + comparison_owned_record.clone() + ); + + let test_int_arr = PrimitiveArray::from([Some(1_i32)]); + let test_bool_arr = BooleanArray::from([Some(true)]); + let test_struct_arr = StructArray::new( + DataType::Struct(fields.clone()).to_arrow(true), + vec![Box::new(test_int_arr), Box::new(test_bool_arr)], + None, + ); + assert_eq!( + any_value_to_value( + &AnyValue::Struct(0, &test_struct_arr, fields.as_slice()), + span + )?, + comparison_owned_record + ); + + Ok(()) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/custom_value.rs new file mode 100644 index 0000000000..a76683c114 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/custom_value.rs @@ -0,0 +1,122 @@ +use std::cmp::Ordering; + +use nu_plugin::EngineInterface; +use nu_protocol::{CustomValue, ShellError, Span, Spanned, Value}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::{ + values::{CustomValueSupport, PolarsPluginCustomValue}, + Cacheable, PolarsPlugin, +}; + +use super::NuDataFrame; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct NuDataFrameCustomValue { + pub id: Uuid, + #[serde(skip)] + pub dataframe: Option, +} + +// CustomValue implementation for NuDataFrame +#[typetag::serde] +impl CustomValue for NuDataFrameCustomValue { + fn clone_value(&self, span: nu_protocol::Span) -> Value { + Value::custom(Box::new(self.clone()), span) + } + + fn type_name(&self) -> String { + "NuDataFrameCustomValue".into() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(Value::string( + "NuDataFrameValue: custom_value_to_base_value should've been called", + span, + )) + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn notify_plugin_on_drop(&self) -> bool { + true + } +} + +impl PolarsPluginCustomValue for NuDataFrameCustomValue { + type PolarsPluginObjectType = NuDataFrame; + + fn id(&self) -> &Uuid { + &self.id + } + + fn internal(&self) -> &Option { + &self.dataframe + } + + fn custom_value_to_base_value( + &self, + plugin: &crate::PolarsPlugin, + _engine: &nu_plugin::EngineInterface, + ) -> Result { + let df = NuDataFrame::try_from_custom_value(plugin, self)?; + df.base_value(Span::unknown()) + } + + fn custom_value_operation( + &self, + plugin: &crate::PolarsPlugin, + engine: &nu_plugin::EngineInterface, + lhs_span: Span, + operator: nu_protocol::Spanned, + right: Value, + ) -> Result { + let df = NuDataFrame::try_from_custom_value(plugin, self)?; + Ok(df + .compute_with_value(plugin, lhs_span, operator.item, operator.span, &right)? + .cache(plugin, engine)? + .into_value(lhs_span)) + } + + fn custom_value_follow_path_int( + &self, + plugin: &PolarsPlugin, + _engine: &EngineInterface, + _self_span: Span, + index: Spanned, + ) -> Result { + let df = NuDataFrame::try_from_custom_value(plugin, self)?; + df.get_value(index.item, index.span) + } + + fn custom_value_follow_path_string( + &self, + plugin: &PolarsPlugin, + engine: &EngineInterface, + self_span: Span, + column_name: Spanned, + ) -> Result { + let df = NuDataFrame::try_from_custom_value(plugin, self)?; + let column = df.column(&column_name.item, self_span)?; + Ok(column.cache(plugin, engine)?.into_value(self_span)) + } + + fn custom_value_partial_cmp( + &self, + plugin: &PolarsPlugin, + _engine: &EngineInterface, + other_value: Value, + ) -> Result, ShellError> { + let df = NuDataFrame::try_from_custom_value(plugin, self)?; + let other = NuDataFrame::try_from_value_coerce(plugin, &other_value, other_value.span())?; + let res = df.is_equal(&other); + Ok(res) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs new file mode 100644 index 0000000000..ea121994c8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs @@ -0,0 +1,576 @@ +mod between_values; +mod conversion; +mod custom_value; +mod operations; + +pub use conversion::{Column, ColumnMap}; +pub use operations::Axis; + +use indexmap::map::IndexMap; +use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value}; +use polars::prelude::{DataFrame, DataType, IntoLazy, PolarsObject, Series}; +use polars_plan::prelude::{lit, Expr, Null}; +use polars_utils::total_ord::TotalEq; +use std::{cmp::Ordering, collections::HashSet, fmt::Display, hash::Hasher, sync::Arc}; +use uuid::Uuid; + +use crate::{Cacheable, PolarsPlugin}; + +pub use self::custom_value::NuDataFrameCustomValue; + +use super::{ + cant_convert_err, nu_schema::NuSchema, utils::DEFAULT_ROWS, CustomValueSupport, NuLazyFrame, + PolarsPluginObject, PolarsPluginType, +}; + +// DataFrameValue is an encapsulation of Nushell Value that can be used +// to define the PolarsObject Trait. The polars object trait allows to +// create dataframes with mixed datatypes +#[derive(Clone, Debug)] +pub struct DataFrameValue(Value); + +impl DataFrameValue { + fn new(value: Value) -> Self { + Self(value) + } + + fn get_value(&self) -> Value { + self.0.clone() + } +} + +impl Display for DataFrameValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.get_type()) + } +} + +impl Default for DataFrameValue { + fn default() -> Self { + Self(Value::nothing(Span::unknown())) + } +} + +impl PartialEq for DataFrameValue { + fn eq(&self, other: &Self) -> bool { + self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq) + } +} +impl Eq for DataFrameValue {} + +impl std::hash::Hash for DataFrameValue { + fn hash(&self, state: &mut H) { + match &self.0 { + Value::Nothing { .. } => 0.hash(state), + Value::Int { val, .. } => val.hash(state), + Value::String { val, .. } => val.hash(state), + // TODO. Define hash for the rest of types + _ => {} + } + } +} + +impl TotalEq for DataFrameValue { + fn tot_eq(&self, other: &Self) -> bool { + self == other + } +} + +impl PolarsObject for DataFrameValue { + fn type_name() -> &'static str { + "object" + } +} + +#[derive(Debug, Default, Clone)] +pub struct NuDataFrame { + pub id: Uuid, + pub df: Arc, + pub from_lazy: bool, +} + +impl AsRef for NuDataFrame { + fn as_ref(&self) -> &polars::prelude::DataFrame { + &self.df + } +} + +impl From for NuDataFrame { + fn from(df: DataFrame) -> Self { + Self::new(false, df) + } +} + +impl NuDataFrame { + pub fn new(from_lazy: bool, df: DataFrame) -> Self { + let id = Uuid::new_v4(); + Self { + id, + df: Arc::new(df), + from_lazy, + } + } + + pub fn to_polars(&self) -> DataFrame { + (*self.df).clone() + } + + pub fn lazy(&self) -> NuLazyFrame { + NuLazyFrame::new(true, self.to_polars().lazy()) + } + + pub fn try_from_series(series: Series, span: Span) -> Result { + match DataFrame::new(vec![series]) { + Ok(dataframe) => Ok(NuDataFrame::new(false, dataframe)), + Err(e) => Err(ShellError::GenericError { + error: "Error creating dataframe".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }), + } + } + + pub fn try_from_iter( + plugin: &PolarsPlugin, + iter: T, + maybe_schema: Option, + ) -> Result + where + T: Iterator, + { + // Dictionary to store the columnar data extracted from + // the input. During the iteration we check if the values + // have different type + let mut column_values: ColumnMap = IndexMap::new(); + + for value in iter { + match value { + Value::Custom { .. } => return Self::try_from_value(plugin, &value), + Value::List { vals, .. } => { + let record = vals + .into_iter() + .enumerate() + .map(|(i, val)| (format!("{i}"), val)) + .collect(); + + conversion::insert_record(&mut column_values, record, &maybe_schema)? + } + Value::Record { val: record, .. } => { + conversion::insert_record(&mut column_values, *record, &maybe_schema)? + } + _ => { + let key = "0".to_string(); + conversion::insert_value(value, key, &mut column_values, &maybe_schema)? + } + } + } + + let df = conversion::from_parsed_columns(column_values)?; + add_missing_columns(df, &maybe_schema, Span::unknown()) + } + + pub fn try_from_series_vec(columns: Vec, span: Span) -> Result { + let dataframe = DataFrame::new(columns).map_err(|e| ShellError::GenericError { + error: "Error creating dataframe".into(), + msg: format!("Unable to create DataFrame: {e}"), + span: Some(span), + help: None, + inner: vec![], + })?; + + Ok(Self::new(false, dataframe)) + } + + pub fn try_from_columns( + columns: Vec, + maybe_schema: Option, + ) -> Result { + let mut column_values: ColumnMap = IndexMap::new(); + + for column in columns { + let name = column.name().to_string(); + for value in column { + conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?; + } + } + + let df = conversion::from_parsed_columns(column_values)?; + add_missing_columns(df, &maybe_schema, Span::unknown()) + } + + pub fn fill_list_nan(list: Vec, list_span: Span, fill: Value) -> Value { + let newlist = list + .into_iter() + .map(|value| { + let span = value.span(); + match value { + Value::Float { val, .. } => { + if val.is_nan() { + fill.clone() + } else { + value + } + } + Value::List { vals, .. } => Self::fill_list_nan(vals, span, fill.clone()), + _ => value, + } + }) + .collect::>(); + Value::list(newlist, list_span) + } + + pub fn columns(&self, span: Span) -> Result, ShellError> { + let height = self.df.height(); + self.df + .get_columns() + .iter() + .map(|col| conversion::create_column(col, 0, height, span)) + .collect::, ShellError>>() + } + + pub fn column(&self, column: &str, span: Span) -> Result { + let s = self.df.column(column).map_err(|_| { + let possibilities = self + .df + .get_column_names() + .iter() + .map(|name| name.to_string()) + .collect::>(); + + let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string()); + ShellError::DidYouMean { + suggestion: option, + span, + } + })?; + + let df = DataFrame::new(vec![s.clone()]).map_err(|e| ShellError::GenericError { + error: "Error creating dataframe".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + Ok(Self::new(false, df)) + } + + pub fn is_series(&self) -> bool { + self.df.width() == 1 + } + + pub fn as_series(&self, span: Span) -> Result { + if !self.is_series() { + return Err(ShellError::GenericError { + error: "Error using as series".into(), + msg: "dataframe has more than one column".into(), + span: Some(span), + help: None, + inner: vec![], + }); + } + + let series = self + .df + .get_columns() + .first() + .expect("We have already checked that the width is 1"); + + Ok(series.clone()) + } + + pub fn get_value(&self, row: usize, span: Span) -> Result { + let series = self.as_series(span)?; + let column = conversion::create_column(&series, row, row + 1, span)?; + + if column.len() == 0 { + Err(ShellError::AccessEmptyContent { span }) + } else { + let value = column + .into_iter() + .next() + .expect("already checked there is a value"); + Ok(value) + } + } + + // Print is made out a head and if the dataframe is too large, then a tail + pub fn print(&self, span: Span) -> Result, ShellError> { + let df = &self.df; + let size: usize = 20; + + if df.height() > size { + let sample_size = size / 2; + let mut values = self.head(Some(sample_size), span)?; + conversion::add_separator(&mut values, df, span); + let remaining = df.height() - sample_size; + let tail_size = remaining.min(sample_size); + let mut tail_values = self.tail(Some(tail_size), span)?; + values.append(&mut tail_values); + + Ok(values) + } else { + Ok(self.head(Some(size), span)?) + } + } + + pub fn height(&self) -> usize { + self.df.height() + } + + pub fn head(&self, rows: Option, span: Span) -> Result, ShellError> { + let to_row = rows.unwrap_or(5); + let values = self.to_rows(0, to_row, span)?; + + Ok(values) + } + + pub fn tail(&self, rows: Option, span: Span) -> Result, ShellError> { + let df = &self.df; + let to_row = df.height(); + let size = rows.unwrap_or(DEFAULT_ROWS); + let from_row = to_row.saturating_sub(size); + + let values = self.to_rows(from_row, to_row, span)?; + + Ok(values) + } + + pub fn to_rows( + &self, + from_row: usize, + to_row: usize, + span: Span, + ) -> Result, ShellError> { + let df = &self.df; + let upper_row = to_row.min(df.height()); + + let mut size: usize = 0; + let columns = self + .df + .get_columns() + .iter() + .map( + |col| match conversion::create_column(col, from_row, upper_row, span) { + Ok(col) => { + size = col.len(); + Ok(col) + } + Err(e) => Err(e), + }, + ) + .collect::, ShellError>>()?; + + let mut iterators = columns + .into_iter() + .map(|col| (col.name().to_string(), col.into_iter())) + .collect::)>>(); + + let values = (0..size) + .map(|i| { + let mut record = Record::new(); + + record.push("index", Value::int((i + from_row) as i64, span)); + + for (name, col) in &mut iterators { + record.push(name.clone(), col.next().unwrap_or(Value::nothing(span))); + } + + Value::record(record, span) + }) + .collect::>(); + + Ok(values) + } + + // Dataframes are considered equal if they have the same shape, column name and values + pub fn is_equal(&self, other: &Self) -> Option { + if self.as_ref().width() == 0 { + // checking for empty dataframe + return None; + } + + if self.as_ref().get_column_names() != other.as_ref().get_column_names() { + // checking both dataframes share the same names + return None; + } + + if self.as_ref().height() != other.as_ref().height() { + // checking both dataframes have the same row size + return None; + } + + // sorting dataframe by the first column + let column_names = self.as_ref().get_column_names(); + let first_col = column_names + .first() + .expect("already checked that dataframe is different than 0"); + + // if unable to sort, then unable to compare + let lhs = match self.as_ref().sort(vec![*first_col], false, false) { + Ok(df) => df, + Err(_) => return None, + }; + + let rhs = match other.as_ref().sort(vec![*first_col], false, false) { + Ok(df) => df, + Err(_) => return None, + }; + + for name in self.as_ref().get_column_names() { + let self_series = lhs.column(name).expect("name from dataframe names"); + + let other_series = rhs + .column(name) + .expect("already checked that name in other"); + + // Casting needed to compare other numeric types with nushell numeric type. + // In nushell we only have i64 integer numeric types and any array created + // with nushell untagged primitives will be of type i64 + let self_series = match self_series.dtype() { + DataType::UInt32 | DataType::Int32 if *other_series.dtype() == DataType::Int64 => { + match self_series.cast(&DataType::Int64) { + Ok(series) => series, + Err(_) => return None, + } + } + _ => self_series.clone(), + }; + + let other_series = match other_series.dtype() { + DataType::UInt32 | DataType::Int32 if *self_series.dtype() == DataType::Int64 => { + match other_series.cast(&DataType::Int64) { + Ok(series) => series, + Err(_) => return None, + } + } + _ => other_series.clone(), + }; + + if !self_series.equals(&other_series) { + return None; + } + } + + Some(Ordering::Equal) + } + + pub fn schema(&self) -> NuSchema { + NuSchema::new(self.df.schema()) + } + + /// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame. + /// So, if the pipeline type is a NuLazyFrame it will be collected and returned as NuDataFrame. + pub fn try_from_value_coerce( + plugin: &PolarsPlugin, + value: &Value, + span: Span, + ) -> Result { + match PolarsPluginObject::try_from_value(plugin, value)? { + PolarsPluginObject::NuDataFrame(df) => Ok(df), + PolarsPluginObject::NuLazyFrame(lazy) => lazy.collect(span), + _ => Err(cant_convert_err( + value, + &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], + )), + } + } + + /// This differs from try_from_pipeline as it will attempt to coerce the type into a NuDataFrame. + /// So, if the pipeline type is a NuLazyFrame it will be collected and returned as NuDataFrame. + pub fn try_from_pipeline_coerce( + plugin: &PolarsPlugin, + input: PipelineData, + span: Span, + ) -> Result { + let value = input.into_value(span); + Self::try_from_value_coerce(plugin, &value, span) + } +} + +fn add_missing_columns( + df: NuDataFrame, + maybe_schema: &Option, + span: Span, +) -> Result { + // If there are fields that are in the schema, but not in the dataframe + // add them to the dataframe. + if let Some(schema) = maybe_schema { + let fields = df.df.fields(); + let df_field_names: HashSet<&str> = fields.iter().map(|f| f.name().as_str()).collect(); + + let missing: Vec<(&str, &DataType)> = schema + .schema + .iter() + .filter_map(|(name, dtype)| { + let name = name.as_str(); + if !df_field_names.contains(name) { + Some((name, dtype)) + } else { + None + } + }) + .collect(); + + // todo - fix + let missing_exprs: Vec = missing + .iter() + .map(|(name, dtype)| lit(Null {}).cast((*dtype).to_owned()).alias(name)) + .collect(); + + let df = if !missing.is_empty() { + let lazy: NuLazyFrame = df.lazy().to_polars().with_columns(missing_exprs).into(); + lazy.collect(span)? + } else { + df + }; + Ok(df) + } else { + Ok(df) + } +} + +impl Cacheable for NuDataFrame { + fn cache_id(&self) -> &Uuid { + &self.id + } + + fn to_cache_value(&self) -> Result { + Ok(PolarsPluginObject::NuDataFrame(self.clone())) + } + + fn from_cache_value(cv: PolarsPluginObject) -> Result { + match cv { + PolarsPluginObject::NuDataFrame(df) => Ok(df), + _ => Err(ShellError::GenericError { + error: "Cache value is not a dataframe".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } +} + +impl CustomValueSupport for NuDataFrame { + type CV = NuDataFrameCustomValue; + + fn custom_value(self) -> Self::CV { + NuDataFrameCustomValue { + id: self.id, + dataframe: Some(self), + } + } + + fn base_value(self, span: Span) -> Result { + let vals = self.print(span)?; + Ok(Value::list(vals, span)) + } + + fn get_type_static() -> PolarsPluginType { + PolarsPluginType::NuDataFrame + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs new file mode 100644 index 0000000000..ecdcf73595 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs @@ -0,0 +1,212 @@ +use nu_protocol::{ast::Operator, ShellError, Span, Spanned, Value}; +use polars::prelude::{DataFrame, Series}; + +use crate::values::CustomValueSupport; +use crate::PolarsPlugin; + +use super::between_values::{ + between_dataframes, compute_between_series, compute_series_single_value, +}; + +use super::NuDataFrame; + +pub enum Axis { + Row, + Column, +} + +impl NuDataFrame { + pub fn compute_with_value( + &self, + plugin: &PolarsPlugin, + lhs_span: Span, + operator: Operator, + op_span: Span, + right: &Value, + ) -> Result { + let rhs_span = right.span(); + match right { + Value::Custom { .. } => { + let rhs = NuDataFrame::try_from_value(plugin, right)?; + + match (self.is_series(), rhs.is_series()) { + (true, true) => { + let lhs = &self + .as_series(lhs_span) + .expect("Already checked that is a series"); + let rhs = &rhs + .as_series(rhs_span) + .expect("Already checked that is a series"); + + if lhs.dtype() != rhs.dtype() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("datatype {}", lhs.dtype()), + left_span: lhs_span, + right_message: format!("datatype {}", lhs.dtype()), + right_span: rhs_span, + }); + } + + if lhs.len() != rhs.len() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("len {}", lhs.len()), + left_span: lhs_span, + right_message: format!("len {}", rhs.len()), + right_span: rhs_span, + }); + } + + let op = Spanned { + item: operator, + span: op_span, + }; + + compute_between_series( + op, + &NuDataFrame::default().into_value(lhs_span), + lhs, + right, + rhs, + ) + } + _ => { + if self.df.height() != rhs.df.height() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("rows {}", self.df.height()), + left_span: lhs_span, + right_message: format!("rows {}", rhs.df.height()), + right_span: rhs_span, + }); + } + + let op = Spanned { + item: operator, + span: op_span, + }; + + between_dataframes( + op, + &NuDataFrame::default().into_value(lhs_span), + self, + right, + &rhs, + ) + } + } + } + _ => { + let op = Spanned { + item: operator, + span: op_span, + }; + + compute_series_single_value( + op, + &NuDataFrame::default().into_value(lhs_span), + self, + right, + ) + } + } + } + + pub fn append_df( + &self, + other: &NuDataFrame, + axis: Axis, + span: Span, + ) -> Result { + match axis { + Axis::Row => { + let mut columns: Vec<&str> = Vec::new(); + + let new_cols = self + .df + .get_columns() + .iter() + .chain(other.df.get_columns()) + .map(|s| { + let name = if columns.contains(&s.name()) { + format!("{}_{}", s.name(), "x") + } else { + columns.push(s.name()); + s.name().to_string() + }; + + let mut series = s.clone(); + series.rename(&name); + series + }) + .collect::>(); + + let df_new = DataFrame::new(new_cols).map_err(|e| ShellError::GenericError { + error: "Error creating dataframe".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; + + Ok(NuDataFrame::new(false, df_new)) + } + Axis::Column => { + if self.df.width() != other.df.width() { + return Err(ShellError::IncompatibleParametersSingle { + msg: "Dataframes with different number of columns".into(), + span, + }); + } + + if !self + .df + .get_column_names() + .iter() + .all(|col| other.df.get_column_names().contains(col)) + { + return Err(ShellError::IncompatibleParametersSingle { + msg: "Dataframes with different columns names".into(), + span, + }); + } + + let new_cols = self + .df + .get_columns() + .iter() + .map(|s| { + let other_col = other + .df + .column(s.name()) + .expect("Already checked that dataframes have same columns"); + + let mut tmp = s.clone(); + let res = tmp.append(other_col); + + match res { + Ok(s) => Ok(s.clone()), + Err(e) => Err({ + ShellError::GenericError { + error: "Error appending dataframe".into(), + msg: format!("Unable to append: {e}"), + span: Some(span), + help: None, + inner: vec![], + } + }), + } + }) + .collect::, ShellError>>()?; + + let df_new = DataFrame::new(new_cols).map_err(|e| ShellError::GenericError { + error: "Error appending dataframe".into(), + msg: format!("Unable to append dataframes: {e}"), + span: Some(span), + help: None, + inner: vec![], + })?; + + Ok(NuDataFrame::new(false, df_new)) + } + } + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs new file mode 100644 index 0000000000..8f5f478c8c --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs @@ -0,0 +1,229 @@ +use crate::{ + values::{CustomValueSupport, PolarsPluginCustomValue}, + Cacheable, PolarsPlugin, +}; +use std::ops::{Add, Div, Mul, Rem, Sub}; + +use super::NuExpression; +use nu_plugin::EngineInterface; +use nu_protocol::{ + ast::{Comparison, Math, Operator}, + CustomValue, ShellError, Span, Type, Value, +}; +use polars::prelude::Expr; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +const TYPE_NAME: &str = "NuExpression"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NuExpressionCustomValue { + pub id: Uuid, + #[serde(skip)] + pub expr: Option, +} + +// CustomValue implementation for NuDataFrame +#[typetag::serde] +impl CustomValue for NuExpressionCustomValue { + fn clone_value(&self, span: nu_protocol::Span) -> Value { + let cloned = self.clone(); + Value::custom(Box::new(cloned), span) + } + + fn type_name(&self) -> String { + TYPE_NAME.into() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(Value::string( + "NuExpressionCustomValue: custom_value_to_base_value should've been called", + span, + )) + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn notify_plugin_on_drop(&self) -> bool { + true + } +} + +fn compute_with_value( + (plugin, engine): (&PolarsPlugin, &EngineInterface), + left: &NuExpression, + lhs_span: Span, + operator: Operator, + op: Span, + right: &Value, +) -> Result { + let rhs_span = right.span(); + match right { + Value::Custom { val: rhs, .. } => { + let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { + ShellError::DowncastNotPossible { + msg: "Unable to create expression".into(), + span: rhs_span, + } + })?; + + match rhs.as_ref() { + polars::prelude::Expr::Literal(..) => with_operator( + (plugin, engine), + operator, + left, + rhs, + lhs_span, + right.span(), + op, + ), + _ => Err(ShellError::TypeMismatch { + err_message: "Only literal expressions or number".into(), + span: right.span(), + }), + } + } + _ => { + let rhs = NuExpression::try_from_value(plugin, right)?; + with_operator( + (plugin, engine), + operator, + left, + &rhs, + lhs_span, + right.span(), + op, + ) + } + } +} + +fn with_operator( + (plugin, engine): (&PolarsPlugin, &EngineInterface), + operator: Operator, + left: &NuExpression, + right: &NuExpression, + lhs_span: Span, + rhs_span: Span, + op_span: Span, +) -> Result { + match operator { + Operator::Math(Math::Plus) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Add::add) + } + Operator::Math(Math::Minus) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Sub::sub) + } + Operator::Math(Math::Multiply) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Mul::mul) + } + Operator::Math(Math::Divide) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Div::div) + } + Operator::Math(Math::Modulo) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Rem::rem) + } + Operator::Math(Math::FloorDivision) => { + apply_arithmetic(plugin, engine, left, right, lhs_span, Div::div) + } + Operator::Comparison(Comparison::Equal) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::eq) + .cache(plugin, engine)? + .into_value(lhs_span)), + Operator::Comparison(Comparison::NotEqual) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::neq) + .cache(plugin, engine)? + .into_value(lhs_span)), + Operator::Comparison(Comparison::GreaterThan) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::gt) + .cache(plugin, engine)? + .into_value(lhs_span)), + Operator::Comparison(Comparison::GreaterThanOrEqual) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::gt_eq) + .cache(plugin, engine)? + .into_value(lhs_span)), + Operator::Comparison(Comparison::LessThan) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::lt) + .cache(plugin, engine)? + .into_value(lhs_span)), + Operator::Comparison(Comparison::LessThanOrEqual) => Ok(left + .clone() + .apply_with_expr(right.clone(), Expr::lt_eq) + .cache(plugin, engine)? + .into_value(lhs_span)), + _ => Err(ShellError::OperatorMismatch { + op_span, + lhs_ty: Type::Custom(TYPE_NAME.into()).to_string(), + lhs_span, + rhs_ty: Type::Custom(TYPE_NAME.into()).to_string(), + rhs_span, + }), + } +} + +fn apply_arithmetic( + plugin: &PolarsPlugin, + engine: &EngineInterface, + left: &NuExpression, + right: &NuExpression, + span: Span, + f: F, +) -> Result +where + F: Fn(Expr, Expr) -> Expr, +{ + let expr: NuExpression = f(left.as_ref().clone(), right.as_ref().clone()).into(); + + Ok(expr.cache(plugin, engine)?.into_value(span)) +} + +impl PolarsPluginCustomValue for NuExpressionCustomValue { + type PolarsPluginObjectType = NuExpression; + + fn custom_value_operation( + &self, + plugin: &crate::PolarsPlugin, + engine: &nu_plugin::EngineInterface, + lhs_span: Span, + operator: nu_protocol::Spanned, + right: Value, + ) -> Result { + let expr = NuExpression::try_from_custom_value(plugin, self)?; + compute_with_value( + (plugin, engine), + &expr, + lhs_span, + operator.item, + operator.span, + &right, + ) + } + + fn custom_value_to_base_value( + &self, + plugin: &crate::PolarsPlugin, + _engine: &nu_plugin::EngineInterface, + ) -> Result { + let expr = NuExpression::try_from_custom_value(plugin, self)?; + expr.base_value(Span::unknown()) + } + + fn id(&self) -> &Uuid { + &self.id + } + + fn internal(&self) -> &Option { + &self.expr + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs new file mode 100644 index 0000000000..a6e26efc8d --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs @@ -0,0 +1,495 @@ +mod custom_value; + +use nu_protocol::{record, ShellError, Span, Value}; +use polars::prelude::{col, AggExpr, Expr, Literal}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use uuid::Uuid; + +use crate::{Cacheable, PolarsPlugin}; + +pub use self::custom_value::NuExpressionCustomValue; + +use super::{CustomValueSupport, PolarsPluginObject, PolarsPluginType}; + +// Polars Expression wrapper for Nushell operations +// Object is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Default, Clone, Debug)] +pub struct NuExpression { + pub id: Uuid, + expr: Option, +} + +// Mocked serialization of the LazyFrame object +impl Serialize for NuExpression { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +// Mocked deserialization of the LazyFrame object +impl<'de> Deserialize<'de> for NuExpression { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(NuExpression::default()) + } +} + +// Referenced access to the real LazyFrame +impl AsRef for NuExpression { + fn as_ref(&self) -> &polars::prelude::Expr { + // The only case when there cannot be an expr is if it is created + // using the default function or if created by deserializing something + self.expr.as_ref().expect("there should always be a frame") + } +} + +impl AsMut for NuExpression { + fn as_mut(&mut self) -> &mut polars::prelude::Expr { + // The only case when there cannot be an expr is if it is created + // using the default function or if created by deserializing something + self.expr.as_mut().expect("there should always be a frame") + } +} + +impl From for NuExpression { + fn from(expr: Expr) -> Self { + Self::new(Some(expr)) + } +} + +impl NuExpression { + fn new(expr: Option) -> Self { + Self { + id: Uuid::new_v4(), + expr, + } + } + + pub fn to_polars(self) -> Expr { + self.expr.expect("Expression cannot be none to convert") + } + + pub fn apply_with_expr(self, other: NuExpression, f: F) -> Self + where + F: Fn(Expr, Expr) -> Expr, + { + let expr = self + .expr + .expect("Lazy expression must not be empty to apply"); + let other = other + .expr + .expect("Lazy expression must not be empty to apply"); + + f(expr, other).into() + } + + pub fn to_value(&self, span: Span) -> Result { + expr_to_value(self.as_ref(), span) + } + + // Convenient function to extract multiple Expr that could be inside a nushell Value + pub fn extract_exprs(plugin: &PolarsPlugin, value: Value) -> Result, ShellError> { + ExtractedExpr::extract_exprs(plugin, value).map(ExtractedExpr::into_exprs) + } +} + +#[derive(Debug)] +// Enum to represent the parsing of the expressions from Value +enum ExtractedExpr { + Single(Expr), + List(Vec), +} + +impl ExtractedExpr { + fn into_exprs(self) -> Vec { + match self { + Self::Single(expr) => vec![expr], + Self::List(expressions) => expressions + .into_iter() + .flat_map(ExtractedExpr::into_exprs) + .collect(), + } + } + + fn extract_exprs(plugin: &PolarsPlugin, value: Value) -> Result { + match value { + Value::String { val, .. } => Ok(ExtractedExpr::Single(col(val.as_str()))), + Value::Custom { .. } => NuExpression::try_from_value(plugin, &value) + .map(NuExpression::to_polars) + .map(ExtractedExpr::Single), + Value::List { vals, .. } => vals + .into_iter() + .map(|x| Self::extract_exprs(plugin, x)) + .collect::, ShellError>>() + .map(ExtractedExpr::List), + x => Err(ShellError::CantConvert { + to_type: "expression".into(), + from_type: x.get_type().to_string(), + span: x.span(), + help: None, + }), + } + } +} + +pub fn expr_to_value(expr: &Expr, span: Span) -> Result { + match expr { + Expr::Alias(expr, alias) => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "alias" => Value::string(alias.as_ref(), span), + }, + span, + )), + Expr::Column(name) => Ok(Value::record( + record! { + "expr" => Value::string("column", span), + "value" => Value::string(name.to_string(), span), + }, + span, + )), + Expr::Columns(columns) => { + let value = columns.iter().map(|col| Value::string(col, span)).collect(); + Ok(Value::record( + record! { + "expr" => Value::string("columns", span), + "value" => Value::list(value, span), + }, + span, + )) + } + Expr::Literal(literal) => Ok(Value::record( + record! { + "expr" => Value::string("literal", span), + "value" => Value::string(format!("{literal:?}"), span), + }, + span, + )), + Expr::BinaryExpr { left, op, right } => Ok(Value::record( + record! { + "left" => expr_to_value(left, span)?, + "op" => Value::string(format!("{op:?}"), span), + "right" => expr_to_value(right, span)?, + }, + span, + )), + Expr::Ternary { + predicate, + truthy, + falsy, + } => Ok(Value::record( + record! { + "predicate" => expr_to_value(predicate.as_ref(), span)?, + "truthy" => expr_to_value(truthy.as_ref(), span)?, + "falsy" => expr_to_value(falsy.as_ref(), span)?, + }, + span, + )), + Expr::Agg(agg_expr) => { + let value = match agg_expr { + AggExpr::Min { input: expr, .. } + | AggExpr::Max { input: expr, .. } + | AggExpr::Median(expr) + | AggExpr::NUnique(expr) + | AggExpr::First(expr) + | AggExpr::Last(expr) + | AggExpr::Mean(expr) + | AggExpr::Implode(expr) + | AggExpr::Count(expr, _) + | AggExpr::Sum(expr) + | AggExpr::AggGroups(expr) + | AggExpr::Std(expr, _) + | AggExpr::Var(expr, _) => expr_to_value(expr.as_ref(), span), + AggExpr::Quantile { + expr, + quantile, + interpol, + } => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "quantile" => expr_to_value(quantile.as_ref(), span)?, + "interpol" => Value::string(format!("{interpol:?}"), span), + }, + span, + )), + }; + + Ok(Value::record( + record! { + "expr" => Value::string("agg", span), + "value" => value?, + }, + span, + )) + } + Expr::Len => Ok(Value::record( + record! { "expr" => Value::string("count", span) }, + span, + )), + Expr::Wildcard => Ok(Value::record( + record! { "expr" => Value::string("wildcard", span) }, + span, + )), + Expr::Explode(expr) => Ok(Value::record( + record! { "expr" => expr_to_value(expr.as_ref(), span)? }, + span, + )), + Expr::KeepName(expr) => Ok(Value::record( + record! { "expr" => expr_to_value(expr.as_ref(), span)? }, + span, + )), + Expr::Nth(i) => Ok(Value::record( + record! { "expr" => Value::int(*i, span) }, + span, + )), + Expr::DtypeColumn(dtypes) => { + let vals = dtypes + .iter() + .map(|d| Value::string(format!("{d}"), span)) + .collect(); + + Ok(Value::list(vals, span)) + } + Expr::Sort { expr, options } => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "options" => Value::string(format!("{options:?}"), span), + }, + span, + )), + Expr::Cast { + expr, + data_type, + strict, + } => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "dtype" => Value::string(format!("{data_type:?}"), span), + "strict" => Value::bool(*strict, span), + }, + span, + )), + Expr::Gather { + expr, + idx, + returns_scalar: _, + } => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "idx" => expr_to_value(idx.as_ref(), span)?, + }, + span, + )), + Expr::SortBy { + expr, + by, + descending, + } => { + let by: Result, ShellError> = + by.iter().map(|b| expr_to_value(b, span)).collect(); + let descending: Vec = descending.iter().map(|r| Value::bool(*r, span)).collect(); + + Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "by" => Value::list(by?, span), + "descending" => Value::list(descending, span), + }, + span, + )) + } + Expr::Filter { input, by } => Ok(Value::record( + record! { + "input" => expr_to_value(input.as_ref(), span)?, + "by" => expr_to_value(by.as_ref(), span)?, + }, + span, + )), + Expr::Slice { + input, + offset, + length, + } => Ok(Value::record( + record! { + "input" => expr_to_value(input.as_ref(), span)?, + "offset" => expr_to_value(offset.as_ref(), span)?, + "length" => expr_to_value(length.as_ref(), span)?, + }, + span, + )), + Expr::Exclude(expr, excluded) => { + let excluded = excluded + .iter() + .map(|e| Value::string(format!("{e:?}"), span)) + .collect(); + + Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "excluded" => Value::list(excluded, span), + }, + span, + )) + } + Expr::RenameAlias { expr, function } => Ok(Value::record( + record! { + "expr" => expr_to_value(expr.as_ref(), span)?, + "function" => Value::string(format!("{function:?}"), span), + }, + span, + )), + Expr::AnonymousFunction { + input, + function, + output_type, + options, + } => { + let input: Result, ShellError> = + input.iter().map(|e| expr_to_value(e, span)).collect(); + Ok(Value::record( + record! { + "input" => Value::list(input?, span), + "function" => Value::string(format!("{function:?}"), span), + "output_type" => Value::string(format!("{output_type:?}"), span), + "options" => Value::string(format!("{options:?}"), span), + }, + span, + )) + } + Expr::Function { + input, + function, + options, + } => { + let input: Result, ShellError> = + input.iter().map(|e| expr_to_value(e, span)).collect(); + Ok(Value::record( + record! { + "input" => Value::list(input?, span), + "function" => Value::string(format!("{function:?}"), span), + "options" => Value::string(format!("{options:?}"), span), + }, + span, + )) + } + Expr::Window { + function, + partition_by, + options, + } => { + let partition_by: Result, ShellError> = partition_by + .iter() + .map(|e| expr_to_value(e, span)) + .collect(); + + Ok(Value::record( + record! { + "function" => expr_to_value(function, span)?, + "partition_by" => Value::list(partition_by?, span), + "options" => Value::string(format!("{options:?}"), span), + }, + span, + )) + } + Expr::SubPlan(_, _) => Err(ShellError::UnsupportedInput { + msg: "Expressions of type SubPlan are not yet supported".to_string(), + input: format!("Expression is {expr:?}"), + msg_span: span, + input_span: Span::unknown(), + }), + // the parameter polars_plan::dsl::selector::Selector is not publicly exposed. + // I am not sure what we can meaningfully do with this at this time. + Expr::Selector(_) => Err(ShellError::UnsupportedInput { + msg: "Expressions of type Selector to Nu Values is not yet supported".to_string(), + input: format!("Expression is {expr:?}"), + msg_span: span, + input_span: Span::unknown(), + }), + } +} + +impl Cacheable for NuExpression { + fn cache_id(&self) -> &Uuid { + &self.id + } + + fn to_cache_value(&self) -> Result { + Ok(PolarsPluginObject::NuExpression(self.clone())) + } + + fn from_cache_value(cv: PolarsPluginObject) -> Result { + match cv { + PolarsPluginObject::NuExpression(df) => Ok(df), + _ => Err(ShellError::GenericError { + error: "Cache value is not an expression".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } +} + +impl CustomValueSupport for NuExpression { + type CV = NuExpressionCustomValue; + + fn custom_value(self) -> Self::CV { + NuExpressionCustomValue { + id: self.id, + expr: Some(self), + } + } + + fn get_type_static() -> PolarsPluginType { + PolarsPluginType::NuExpression + } + + fn try_from_value(plugin: &PolarsPlugin, value: &Value) -> Result { + match value { + Value::Custom { val, .. } => { + if let Some(cv) = val.as_any().downcast_ref::() { + Self::try_from_custom_value(plugin, cv) + } else { + Err(ShellError::CantConvert { + to_type: Self::get_type_static().to_string(), + from_type: value.get_type().to_string(), + span: value.span(), + help: None, + }) + } + } + Value::String { val, .. } => Ok(val.to_owned().lit().into()), + Value::Int { val, .. } => Ok(val.to_owned().lit().into()), + Value::Bool { val, .. } => Ok(val.to_owned().lit().into()), + Value::Float { val, .. } => Ok(val.to_owned().lit().into()), + x => Err(ShellError::CantConvert { + to_type: "lazy expression".into(), + from_type: x.get_type().to_string(), + span: x.span(), + help: None, + }), + } + } + + fn can_downcast(value: &Value) -> bool { + match value { + Value::Custom { val, .. } => val.as_any().downcast_ref::().is_some(), + Value::List { vals, .. } => vals.iter().all(Self::can_downcast), + Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => { + true + } + _ => false, + } + } + + fn base_value(self, _span: Span) -> Result { + self.to_value(Span::unknown()) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs new file mode 100644 index 0000000000..731d210dd8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs @@ -0,0 +1,86 @@ +use std::cmp::Ordering; + +use nu_plugin::EngineInterface; +use nu_protocol::{CustomValue, ShellError, Span, Value}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use crate::{ + values::{CustomValueSupport, NuDataFrame, PolarsPluginCustomValue}, + PolarsPlugin, +}; + +use super::NuLazyFrame; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NuLazyFrameCustomValue { + pub id: Uuid, + #[serde(skip)] + pub lazyframe: Option, +} + +// CustomValue implementation for NuDataFrame +#[typetag::serde] +impl CustomValue for NuLazyFrameCustomValue { + fn clone_value(&self, span: nu_protocol::Span) -> Value { + Value::custom(Box::new(self.clone()), span) + } + + fn type_name(&self) -> String { + "NuLazyFrameCustomValue".into() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(Value::string( + "NuLazyFrameCustomValue: custom_value_to_base_value should've been called", + span, + )) + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn notify_plugin_on_drop(&self) -> bool { + true + } +} + +impl PolarsPluginCustomValue for NuLazyFrameCustomValue { + type PolarsPluginObjectType = NuLazyFrame; + + fn custom_value_to_base_value( + &self, + plugin: &crate::PolarsPlugin, + _engine: &nu_plugin::EngineInterface, + ) -> Result { + let lazy = NuLazyFrame::try_from_custom_value(plugin, self)?; + lazy.base_value(Span::unknown()) + } + + fn id(&self) -> &Uuid { + &self.id + } + + fn internal(&self) -> &Option { + &self.lazyframe + } + + fn custom_value_partial_cmp( + &self, + plugin: &PolarsPlugin, + _engine: &EngineInterface, + other_value: Value, + ) -> Result, ShellError> { + // to compare, we need to convert to NuDataframe + let df = NuLazyFrame::try_from_custom_value(plugin, self)?; + let df = df.collect(other_value.span())?; + let other = NuDataFrame::try_from_value_coerce(plugin, &other_value, other_value.span())?; + let res = df.is_equal(&other); + Ok(res) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs new file mode 100644 index 0000000000..75f4791032 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs @@ -0,0 +1,169 @@ +mod custom_value; + +use crate::{Cacheable, PolarsPlugin}; + +use super::{ + cant_convert_err, CustomValueSupport, NuDataFrame, NuExpression, NuSchema, PolarsPluginObject, + PolarsPluginType, +}; +use core::fmt; +use nu_protocol::{record, PipelineData, ShellError, Span, Value}; +use polars::prelude::{Expr, IntoLazy, LazyFrame}; +use std::sync::Arc; +use uuid::Uuid; + +pub use custom_value::NuLazyFrameCustomValue; + +// Lazyframe wrapper for Nushell operations +// Polars LazyFrame is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Default, Clone)] +pub struct NuLazyFrame { + pub id: Uuid, + pub lazy: Arc, + pub from_eager: bool, +} + +impl fmt::Debug for NuLazyFrame { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuLazyframe") + } +} + +impl From for NuLazyFrame { + fn from(lazy_frame: LazyFrame) -> Self { + NuLazyFrame::new(false, lazy_frame) + } +} + +impl NuLazyFrame { + pub fn new(from_eager: bool, lazy: LazyFrame) -> Self { + Self { + id: Uuid::new_v4(), + lazy: Arc::new(lazy), + from_eager, + } + } + + pub fn from_dataframe(df: NuDataFrame) -> Self { + let lazy = df.as_ref().clone().lazy(); + NuLazyFrame::new(true, lazy) + } + + pub fn to_polars(&self) -> LazyFrame { + (*self.lazy).clone() + } + + pub fn collect(self, span: Span) -> Result { + self.to_polars() + .collect() + .map_err(|e| ShellError::GenericError { + error: "Error collecting lazy frame".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + }) + .map(|df| NuDataFrame::new(!self.from_eager, df)) + } + + pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self + where + F: Fn(LazyFrame, Expr) -> LazyFrame, + { + let df = self.to_polars(); + let expr = expr.to_polars(); + let new_frame = f(df, expr); + Self::new(self.from_eager, new_frame) + } + + pub fn schema(&self) -> Result { + let internal_schema = self.lazy.schema().map_err(|e| ShellError::GenericError { + error: "Error getting schema from lazy frame".into(), + msg: e.to_string(), + span: None, + help: None, + inner: vec![], + })?; + Ok(internal_schema.into()) + } + + /// Get a NuLazyFrame from the value. This differs from try_from_value as it will coerce a + /// NuDataFrame into a NuLazyFrame + pub fn try_from_value_coerce( + plugin: &PolarsPlugin, + value: &Value, + ) -> Result { + match PolarsPluginObject::try_from_value(plugin, value)? { + PolarsPluginObject::NuDataFrame(df) => Ok(df.lazy()), + PolarsPluginObject::NuLazyFrame(lazy) => Ok(lazy), + _ => Err(cant_convert_err( + value, + &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], + )), + } + } + + /// This differs from try_from_pipeline as it will attempt to coerce the type into a NuDataFrame. + /// So, if the pipeline type is a NuLazyFrame it will be collected and returned as NuDataFrame. + pub fn try_from_pipeline_coerce( + plugin: &PolarsPlugin, + input: PipelineData, + span: Span, + ) -> Result { + let value = input.into_value(span); + Self::try_from_value_coerce(plugin, &value) + } +} + +impl Cacheable for NuLazyFrame { + fn cache_id(&self) -> &Uuid { + &self.id + } + + fn to_cache_value(&self) -> Result { + Ok(PolarsPluginObject::NuLazyFrame(self.clone())) + } + + fn from_cache_value(cv: PolarsPluginObject) -> Result { + match cv { + PolarsPluginObject::NuLazyFrame(df) => Ok(df), + _ => Err(ShellError::GenericError { + error: "Cache value is not a lazyframe".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } +} + +impl CustomValueSupport for NuLazyFrame { + type CV = NuLazyFrameCustomValue; + + fn custom_value(self) -> Self::CV { + NuLazyFrameCustomValue { + id: self.id, + lazyframe: Some(self), + } + } + + fn get_type_static() -> PolarsPluginType { + PolarsPluginType::NuLazyFrame + } + + fn base_value(self, span: Span) -> Result { + let optimized_plan = self + .lazy + .describe_optimized_plan() + .unwrap_or_else(|_| "".to_string()); + Ok(Value::record( + record! { + "plan" => Value::string(self.lazy.describe_plan(), span), + "optimized_plan" => Value::string(optimized_plan, span), + }, + span, + )) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/custom_value.rs new file mode 100644 index 0000000000..03327259bb --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/custom_value.rs @@ -0,0 +1,63 @@ +use crate::values::{CustomValueSupport, PolarsPluginCustomValue}; + +use super::NuLazyGroupBy; +use nu_protocol::{CustomValue, ShellError, Span, Value}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct NuLazyGroupByCustomValue { + pub id: Uuid, + #[serde(skip)] + pub groupby: Option, +} + +#[typetag::serde] +impl CustomValue for NuLazyGroupByCustomValue { + fn clone_value(&self, span: nu_protocol::Span) -> Value { + Value::custom(Box::new(self.clone()), span) + } + + fn type_name(&self) -> String { + "NuLazyGroupByCustomValue".into() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(Value::string( + "NuLazyGroupByCustomValue: custom_value_to_base_value should've been called", + span, + )) + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn notify_plugin_on_drop(&self) -> bool { + true + } +} + +impl PolarsPluginCustomValue for NuLazyGroupByCustomValue { + type PolarsPluginObjectType = NuLazyGroupBy; + + fn custom_value_to_base_value( + &self, + plugin: &crate::PolarsPlugin, + _engine: &nu_plugin::EngineInterface, + ) -> Result { + NuLazyGroupBy::try_from_custom_value(plugin, self)?.base_value(Span::unknown()) + } + + fn id(&self) -> &Uuid { + &self.id + } + + fn internal(&self) -> &Option { + &self.groupby + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs new file mode 100644 index 0000000000..8540d13c6f --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs @@ -0,0 +1,92 @@ +mod custom_value; + +use core::fmt; +use nu_protocol::{record, ShellError, Span, Value}; +use polars::prelude::LazyGroupBy; +use std::sync::Arc; +use uuid::Uuid; + +use crate::Cacheable; + +pub use self::custom_value::NuLazyGroupByCustomValue; + +use super::{CustomValueSupport, NuSchema, PolarsPluginObject, PolarsPluginType}; + +// Lazyframe wrapper for Nushell operations +// Polars LazyFrame is behind and Option to allow easy implementation of +// the Deserialize trait +#[derive(Clone)] +pub struct NuLazyGroupBy { + pub id: Uuid, + pub group_by: Arc, + pub schema: NuSchema, + pub from_eager: bool, +} + +impl fmt::Debug for NuLazyGroupBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuLazyGroupBy") + } +} + +impl NuLazyGroupBy { + pub fn new(group_by: LazyGroupBy, from_eager: bool, schema: NuSchema) -> Self { + NuLazyGroupBy { + id: Uuid::new_v4(), + group_by: Arc::new(group_by), + from_eager, + schema, + } + } + + pub fn to_polars(&self) -> LazyGroupBy { + (*self.group_by).clone() + } +} + +impl Cacheable for NuLazyGroupBy { + fn cache_id(&self) -> &Uuid { + &self.id + } + + fn to_cache_value(&self) -> Result { + Ok(PolarsPluginObject::NuLazyGroupBy(self.clone())) + } + + fn from_cache_value(cv: PolarsPluginObject) -> Result { + match cv { + PolarsPluginObject::NuLazyGroupBy(df) => Ok(df), + _ => Err(ShellError::GenericError { + error: "Cache value is not a group by".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } +} + +impl CustomValueSupport for NuLazyGroupBy { + type CV = NuLazyGroupByCustomValue; + + fn custom_value(self) -> Self::CV { + NuLazyGroupByCustomValue { + id: self.id, + groupby: Some(self), + } + } + + fn get_type_static() -> PolarsPluginType { + PolarsPluginType::NuLazyGroupBy + } + + fn base_value(self, _span: nu_protocol::Span) -> Result { + Ok(Value::record( + record! { + "LazyGroupBy" => Value::string("apply aggregation to complete execution plan", Span::unknown()) + }, + Span::unknown(), + )) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs new file mode 100644 index 0000000000..f684b8bb38 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs @@ -0,0 +1,383 @@ +use std::sync::Arc; + +use nu_protocol::{ShellError, Span, Value}; +use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}; + +#[derive(Debug, Clone)] +pub struct NuSchema { + pub schema: SchemaRef, +} + +impl NuSchema { + pub fn new(schema: Schema) -> Self { + Self { + schema: Arc::new(schema), + } + } +} + +impl TryFrom<&Value> for NuSchema { + type Error = ShellError; + fn try_from(value: &Value) -> Result { + let schema = value_to_schema(value, Span::unknown())?; + Ok(Self::new(schema)) + } +} + +impl From for Value { + fn from(schema: NuSchema) -> Self { + fields_to_value(schema.schema.iter_fields(), Span::unknown()) + } +} + +impl From for SchemaRef { + fn from(val: NuSchema) -> Self { + Arc::clone(&val.schema) + } +} + +impl From for NuSchema { + fn from(val: SchemaRef) -> Self { + Self { schema: val } + } +} + +fn fields_to_value(fields: impl Iterator, span: Span) -> Value { + let record = fields + .map(|field| { + let col = field.name().to_string(); + let val = dtype_to_value(field.data_type(), span); + (col, val) + }) + .collect(); + + Value::record(record, Span::unknown()) +} + +fn dtype_to_value(dtype: &DataType, span: Span) -> Value { + match dtype { + DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span), + _ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span), + } +} + +fn value_to_schema(value: &Value, span: Span) -> Result { + let fields = value_to_fields(value, span)?; + let schema = Schema::from_iter(fields); + Ok(schema) +} + +fn value_to_fields(value: &Value, span: Span) -> Result, ShellError> { + let fields = value + .as_record()? + .into_iter() + .map(|(col, val)| match val { + Value::Record { .. } => { + let fields = value_to_fields(val, span)?; + let dtype = DataType::Struct(fields); + Ok(Field::new(col, dtype)) + } + _ => { + let dtype = str_to_dtype(&val.coerce_string()?, span)?; + Ok(Field::new(col, dtype)) + } + }) + .collect::, ShellError>>()?; + Ok(fields) +} + +pub fn str_to_dtype(dtype: &str, span: Span) -> Result { + match dtype { + "bool" => Ok(DataType::Boolean), + "u8" => Ok(DataType::UInt8), + "u16" => Ok(DataType::UInt16), + "u32" => Ok(DataType::UInt32), + "u64" => Ok(DataType::UInt64), + "i8" => Ok(DataType::Int8), + "i16" => Ok(DataType::Int16), + "i32" => Ok(DataType::Int32), + "i64" => Ok(DataType::Int64), + "f32" => Ok(DataType::Float32), + "f64" => Ok(DataType::Float64), + "str" => Ok(DataType::String), + "binary" => Ok(DataType::Binary), + "date" => Ok(DataType::Date), + "time" => Ok(DataType::Time), + "null" => Ok(DataType::Null), + "unknown" => Ok(DataType::Unknown), + "object" => Ok(DataType::Object("unknown", None)), + _ if dtype.starts_with("list") => { + let dtype = dtype + .trim_start_matches("list") + .trim_start_matches('<') + .trim_end_matches('>') + .trim(); + let dtype = str_to_dtype(dtype, span)?; + Ok(DataType::List(Box::new(dtype))) + } + _ if dtype.starts_with("datetime") => { + let dtype = dtype + .trim_start_matches("datetime") + .trim_start_matches('<') + .trim_end_matches('>'); + let mut split = dtype.split(','); + let next = split + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time unit".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let time_unit = str_to_time_unit(next, span)?; + let next = split + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time zone".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let timezone = if "*" == next { + None + } else { + Some(next.to_string()) + }; + Ok(DataType::Datetime(time_unit, timezone)) + } + _ if dtype.starts_with("duration") => { + let inner = dtype.trim_start_matches("duration<").trim_end_matches('>'); + let next = inner + .split(',') + .next() + .ok_or_else(|| ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Missing time unit".into(), + span: Some(span), + help: None, + inner: vec![], + })? + .trim(); + let time_unit = str_to_time_unit(next, span)?; + Ok(DataType::Duration(time_unit)) + } + _ => Err(ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: format!("Unknown type: {dtype}"), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +fn str_to_time_unit(ts_string: &str, span: Span) -> Result { + match ts_string { + "ms" => Ok(TimeUnit::Milliseconds), + "us" | "μs" => Ok(TimeUnit::Microseconds), + "ns" => Ok(TimeUnit::Nanoseconds), + _ => Err(ShellError::GenericError { + error: "Invalid polars data type".into(), + msg: "Invalid time unit".into(), + span: Some(span), + help: None, + inner: vec![], + }), + } +} + +#[cfg(test)] +mod test { + + use nu_protocol::record; + + use super::*; + + #[test] + fn test_value_to_schema() { + let address = record! { + "street" => Value::test_string("str"), + "city" => Value::test_string("str"), + }; + + let value = Value::test_record(record! { + "name" => Value::test_string("str"), + "age" => Value::test_string("i32"), + "address" => Value::test_record(address) + }); + + let schema = value_to_schema(&value, Span::unknown()).unwrap(); + let expected = Schema::from_iter(vec![ + Field::new("name", DataType::String), + Field::new("age", DataType::Int32), + Field::new( + "address", + DataType::Struct(vec![ + Field::new("street", DataType::String), + Field::new("city", DataType::String), + ]), + ), + ]); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_to_schema_simple_types() { + let dtype = "bool"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Boolean; + assert_eq!(schema, expected); + + let dtype = "u8"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt8; + assert_eq!(schema, expected); + + let dtype = "u16"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt16; + assert_eq!(schema, expected); + + let dtype = "u32"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt32; + assert_eq!(schema, expected); + + let dtype = "u64"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::UInt64; + assert_eq!(schema, expected); + + let dtype = "i8"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int8; + assert_eq!(schema, expected); + + let dtype = "i16"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int16; + assert_eq!(schema, expected); + + let dtype = "i32"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int32; + assert_eq!(schema, expected); + + let dtype = "i64"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Int64; + assert_eq!(schema, expected); + + let dtype = "str"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::String; + assert_eq!(schema, expected); + + let dtype = "binary"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Binary; + assert_eq!(schema, expected); + + let dtype = "date"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Date; + assert_eq!(schema, expected); + + let dtype = "time"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Time; + assert_eq!(schema, expected); + + let dtype = "null"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Null; + assert_eq!(schema, expected); + + let dtype = "unknown"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Unknown; + assert_eq!(schema, expected); + + let dtype = "object"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Object("unknown", None); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_schema_datetime() { + let dtype = "datetime"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Milliseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Microseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime<μs, *>"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Microseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Nanoseconds, None); + assert_eq!(schema, expected); + + let dtype = "datetime"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into())); + assert_eq!(schema, expected); + + let dtype = "invalid"; + let schema = str_to_dtype(dtype, Span::unknown()); + assert!(schema.is_err()) + } + + #[test] + fn test_dtype_str_schema_duration() { + let dtype = "duration"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Milliseconds); + assert_eq!(schema, expected); + + let dtype = "duration"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Microseconds); + assert_eq!(schema, expected); + + let dtype = "duration<μs>"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Microseconds); + assert_eq!(schema, expected); + + let dtype = "duration"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::Duration(TimeUnit::Nanoseconds); + assert_eq!(schema, expected); + } + + #[test] + fn test_dtype_str_to_schema_list_types() { + let dtype = "list"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Int32)); + assert_eq!(schema, expected); + + let dtype = "list>"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds))); + assert_eq!(schema, expected); + + let dtype = "list>"; + let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); + let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None))); + assert_eq!(schema, expected); + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_when/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_when/custom_value.rs new file mode 100644 index 0000000000..5d64bbf011 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_when/custom_value.rs @@ -0,0 +1,65 @@ +use crate::values::{CustomValueSupport, PolarsPluginCustomValue}; + +use super::NuWhen; +use nu_protocol::{CustomValue, ShellError, Span, Value}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct NuWhenCustomValue { + pub id: uuid::Uuid, + #[serde(skip)] + pub when: Option, +} + +// CustomValue implementation for NuWhen +#[typetag::serde] +impl CustomValue for NuWhenCustomValue { + fn clone_value(&self, span: nu_protocol::Span) -> Value { + Value::custom(Box::new(self.clone()), span) + } + + fn type_name(&self) -> String { + "NuWhenCustomValue".into() + } + + fn to_base_value(&self, span: Span) -> Result { + Ok(Value::string( + "NuWhenCustomValue: custom_value_to_base_value should've been called", + span, + )) + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn notify_plugin_on_drop(&self) -> bool { + true + } +} + +impl PolarsPluginCustomValue for NuWhenCustomValue { + type PolarsPluginObjectType = NuWhen; + + fn custom_value_to_base_value( + &self, + plugin: &crate::PolarsPlugin, + _engine: &nu_plugin::EngineInterface, + ) -> Result { + let when = NuWhen::try_from_custom_value(plugin, self)?; + when.base_value(Span::unknown()) + } + + fn id(&self) -> &Uuid { + &self.id + } + + fn internal(&self) -> &Option { + &self.when + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_when/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_when/mod.rs new file mode 100644 index 0000000000..89ee748454 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_when/mod.rs @@ -0,0 +1,128 @@ +mod custom_value; + +use core::fmt; +use nu_protocol::{ShellError, Span, Value}; +use polars::prelude::{ChainedThen, Then}; +use serde::{Serialize, Serializer}; +use uuid::Uuid; + +use crate::Cacheable; + +pub use self::custom_value::NuWhenCustomValue; + +use super::{CustomValueSupport, PolarsPluginObject, PolarsPluginType}; + +#[derive(Debug, Clone)] +pub struct NuWhen { + pub id: Uuid, + pub when_type: NuWhenType, +} + +#[derive(Clone)] +pub enum NuWhenType { + Then(Box), + ChainedThen(ChainedThen), +} + +// Mocked serialization of the LazyFrame object +impl Serialize for NuWhen { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +impl fmt::Debug for NuWhenType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuWhen") + } +} + +impl From for NuWhenType { + fn from(then: Then) -> Self { + NuWhenType::Then(Box::new(then)) + } +} + +impl From for NuWhenType { + fn from(chained_when: ChainedThen) -> Self { + NuWhenType::ChainedThen(chained_when) + } +} + +impl From for NuWhen { + fn from(when_type: NuWhenType) -> Self { + Self::new(when_type) + } +} + +impl From for NuWhen { + fn from(then: Then) -> Self { + Self::new(then.into()) + } +} + +impl From for NuWhen { + fn from(chained_then: ChainedThen) -> Self { + Self::new(chained_then.into()) + } +} + +impl NuWhen { + pub fn new(when_type: NuWhenType) -> Self { + Self { + id: Uuid::new_v4(), + when_type, + } + } +} + +impl Cacheable for NuWhen { + fn cache_id(&self) -> &Uuid { + &self.id + } + + fn to_cache_value(&self) -> Result { + Ok(PolarsPluginObject::NuWhen(self.clone())) + } + + fn from_cache_value(cv: PolarsPluginObject) -> Result { + match cv { + PolarsPluginObject::NuWhen(when) => Ok(when), + _ => Err(ShellError::GenericError { + error: "Cache value is not a dataframe".into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }), + } + } +} + +impl CustomValueSupport for NuWhen { + type CV = NuWhenCustomValue; + + fn custom_value(self) -> Self::CV { + NuWhenCustomValue { + id: self.id, + when: Some(self), + } + } + + fn get_type_static() -> PolarsPluginType { + PolarsPluginType::NuWhen + } + + fn base_value(self, _span: nu_protocol::Span) -> Result { + let val: String = match self.when_type { + NuWhenType::Then(_) => "whenthen".into(), + NuWhenType::ChainedThen(_) => "whenthenthen".into(), + }; + + let value = Value::string(val, Span::unknown()); + Ok(value) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/utils.rs b/crates/nu_plugin_polars/src/dataframe/values/utils.rs new file mode 100644 index 0000000000..17e641cadc --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/values/utils.rs @@ -0,0 +1,90 @@ +use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value}; + +// Default value used when selecting rows from dataframe +pub const DEFAULT_ROWS: usize = 5; + +// Converts a Vec to a Vec> with a Span marking the whole +// location of the columns for error referencing +// todo - fix +#[allow(dead_code)] +pub(crate) fn convert_columns( + columns: Vec, + span: Span, +) -> Result<(Vec>, Span), ShellError> { + // First column span + let mut col_span = columns + .first() + .ok_or_else(|| ShellError::GenericError { + error: "Empty column list".into(), + msg: "Empty list found for command".into(), + span: Some(span), + help: None, + inner: vec![], + }) + .map(|v| v.span())?; + + let res = columns + .into_iter() + .map(|value| { + let span = value.span(); + match value { + Value::String { val, .. } => { + col_span = span_join(&[col_span, span]); + Ok(Spanned { item: val, span }) + } + _ => Err(ShellError::GenericError { + error: "Incorrect column format".into(), + msg: "Only string as column name".into(), + span: Some(span), + help: None, + inner: vec![], + }), + } + }) + .collect::>, _>>()?; + + Ok((res, col_span)) +} + +// Converts a Vec to a Vec with a Span marking the whole +// location of the columns for error referencing +// todo - fix +#[allow(dead_code)] +pub(crate) fn convert_columns_string( + columns: Vec, + span: Span, +) -> Result<(Vec, Span), ShellError> { + // First column span + let mut col_span = columns + .first() + .ok_or_else(|| ShellError::GenericError { + error: "Empty column list".into(), + msg: "Empty list found for command".into(), + span: Some(span), + help: None, + inner: vec![], + }) + .map(|v| v.span())?; + + let res = columns + .into_iter() + .map(|value| { + let span = value.span(); + match value { + Value::String { val, .. } => { + col_span = span_join(&[col_span, span]); + Ok(val) + } + _ => Err(ShellError::GenericError { + error: "Incorrect column format".into(), + msg: "Only string as column name".into(), + span: Some(span), + help: None, + inner: vec![], + }), + } + }) + .collect::, _>>()?; + + Ok((res, col_span)) +} diff --git a/crates/nu_plugin_polars/src/lib.rs b/crates/nu_plugin_polars/src/lib.rs new file mode 100644 index 0000000000..3b8679b606 --- /dev/null +++ b/crates/nu_plugin_polars/src/lib.rs @@ -0,0 +1,207 @@ +use std::cmp::Ordering; + +pub use cache::{Cache, Cacheable}; +use dataframe::{stub::PolarsCmd, values::CustomValueType}; +use nu_plugin::{EngineInterface, Plugin, PluginCommand}; + +mod cache; +pub mod dataframe; +pub use dataframe::*; +use nu_protocol::{ast::Operator, CustomValue, LabeledError, Spanned, Value}; + +use crate::{ + eager::eager_commands, expressions::expr_commands, lazy::lazy_commands, + series::series_commands, values::PolarsPluginCustomValue, +}; + +#[macro_export] +macro_rules! plugin_debug { + ($($arg:tt)*) => {{ + if std::env::var("POLARS_PLUGIN_DEBUG") + .ok() + .filter(|x| x == "1" || x == "true") + .is_some() { + eprintln!($($arg)*); + } + }}; +} + +#[derive(Default)] +pub struct PolarsPlugin { + pub(crate) cache: Cache, + /// For testing purposes only + pub(crate) disable_cache_drop: bool, +} + +impl Plugin for PolarsPlugin { + fn commands(&self) -> Vec>> { + let mut commands: Vec>> = vec![Box::new(PolarsCmd)]; + commands.append(&mut eager_commands()); + commands.append(&mut lazy_commands()); + commands.append(&mut expr_commands()); + commands.append(&mut series_commands()); + commands + } + + fn custom_value_dropped( + &self, + engine: &EngineInterface, + custom_value: Box, + ) -> Result<(), LabeledError> { + if !self.disable_cache_drop { + let id = CustomValueType::try_from_custom_value(custom_value)?.id(); + let _ = self.cache.remove(Some(engine), &id); + } + Ok(()) + } + + fn custom_value_to_base_value( + &self, + engine: &EngineInterface, + custom_value: Spanned>, + ) -> Result { + let result = match CustomValueType::try_from_custom_value(custom_value.item)? { + CustomValueType::NuDataFrame(cv) => cv.custom_value_to_base_value(self, engine), + CustomValueType::NuLazyFrame(cv) => cv.custom_value_to_base_value(self, engine), + CustomValueType::NuExpression(cv) => cv.custom_value_to_base_value(self, engine), + CustomValueType::NuLazyGroupBy(cv) => cv.custom_value_to_base_value(self, engine), + CustomValueType::NuWhen(cv) => cv.custom_value_to_base_value(self, engine), + }; + Ok(result?) + } + + fn custom_value_operation( + &self, + engine: &EngineInterface, + left: Spanned>, + operator: Spanned, + right: Value, + ) -> Result { + let result = match CustomValueType::try_from_custom_value(left.item)? { + CustomValueType::NuDataFrame(cv) => { + cv.custom_value_operation(self, engine, left.span, operator, right) + } + CustomValueType::NuLazyFrame(cv) => { + cv.custom_value_operation(self, engine, left.span, operator, right) + } + CustomValueType::NuExpression(cv) => { + cv.custom_value_operation(self, engine, left.span, operator, right) + } + CustomValueType::NuLazyGroupBy(cv) => { + cv.custom_value_operation(self, engine, left.span, operator, right) + } + CustomValueType::NuWhen(cv) => { + cv.custom_value_operation(self, engine, left.span, operator, right) + } + }; + Ok(result?) + } + + fn custom_value_follow_path_int( + &self, + engine: &EngineInterface, + custom_value: Spanned>, + index: Spanned, + ) -> Result { + let result = match CustomValueType::try_from_custom_value(custom_value.item)? { + CustomValueType::NuDataFrame(cv) => { + cv.custom_value_follow_path_int(self, engine, custom_value.span, index) + } + CustomValueType::NuLazyFrame(cv) => { + cv.custom_value_follow_path_int(self, engine, custom_value.span, index) + } + CustomValueType::NuExpression(cv) => { + cv.custom_value_follow_path_int(self, engine, custom_value.span, index) + } + CustomValueType::NuLazyGroupBy(cv) => { + cv.custom_value_follow_path_int(self, engine, custom_value.span, index) + } + CustomValueType::NuWhen(cv) => { + cv.custom_value_follow_path_int(self, engine, custom_value.span, index) + } + }; + Ok(result?) + } + + fn custom_value_follow_path_string( + &self, + engine: &EngineInterface, + custom_value: Spanned>, + column_name: Spanned, + ) -> Result { + let result = match CustomValueType::try_from_custom_value(custom_value.item)? { + CustomValueType::NuDataFrame(cv) => { + cv.custom_value_follow_path_string(self, engine, custom_value.span, column_name) + } + CustomValueType::NuLazyFrame(cv) => { + cv.custom_value_follow_path_string(self, engine, custom_value.span, column_name) + } + CustomValueType::NuExpression(cv) => { + cv.custom_value_follow_path_string(self, engine, custom_value.span, column_name) + } + CustomValueType::NuLazyGroupBy(cv) => { + cv.custom_value_follow_path_string(self, engine, custom_value.span, column_name) + } + CustomValueType::NuWhen(cv) => { + cv.custom_value_follow_path_string(self, engine, custom_value.span, column_name) + } + }; + Ok(result?) + } + + fn custom_value_partial_cmp( + &self, + engine: &EngineInterface, + custom_value: Box, + other_value: Value, + ) -> Result, LabeledError> { + let result = match CustomValueType::try_from_custom_value(custom_value)? { + CustomValueType::NuDataFrame(cv) => { + cv.custom_value_partial_cmp(self, engine, other_value) + } + CustomValueType::NuLazyFrame(cv) => { + cv.custom_value_partial_cmp(self, engine, other_value) + } + CustomValueType::NuExpression(cv) => { + cv.custom_value_partial_cmp(self, engine, other_value) + } + CustomValueType::NuLazyGroupBy(cv) => { + cv.custom_value_partial_cmp(self, engine, other_value) + } + CustomValueType::NuWhen(cv) => cv.custom_value_partial_cmp(self, engine, other_value), + }; + Ok(result?) + } +} + +#[cfg(test)] +pub mod test { + use super::*; + use crate::values::PolarsPluginObject; + use nu_command::IntoDatetime; + use nu_plugin_test_support::PluginTest; + use nu_protocol::ShellError; + + pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> { + let mut plugin = PolarsPlugin::default(); + plugin.disable_cache_drop = true; + let examples = command.examples(); + + // we need to cache values in the examples + for example in &examples { + if let Some(ref result) = example.result { + // if it's a polars plugin object, try to cache it + if let Ok(obj) = PolarsPluginObject::try_from_value(&plugin, result) { + let id = obj.id(); + plugin.cache.insert(None, id, obj).unwrap(); + } + } + } + + PluginTest::new("polars", plugin.into())? + .add_decl(Box::new(IntoDatetime))? + .test_examples(&examples)?; + + Ok(()) + } +} diff --git a/crates/nu_plugin_polars/src/main.rs b/crates/nu_plugin_polars/src/main.rs new file mode 100644 index 0000000000..e060d7cd6d --- /dev/null +++ b/crates/nu_plugin_polars/src/main.rs @@ -0,0 +1,6 @@ +use nu_plugin::{serve_plugin, MsgPackSerializer}; +use nu_plugin_polars::PolarsPlugin; + +fn main() { + serve_plugin(&PolarsPlugin::default(), MsgPackSerializer {}) +}