diff --git a/Cargo.lock b/Cargo.lock index f431a9635c..16942f9c40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.3", + "once_cell", + "version_check 0.9.3", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -36,6 +47,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "ansi-cut" version = "0.1.1" @@ -65,12 +91,58 @@ dependencies = [ "winapi", ] +[[package]] +name = "anyhow" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d9ff5d688f1c13395289f67db01d4826b46dd694e7580accdc3e8430f2d98e" + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + [[package]] name = "arrayvec" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "arrow-format" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7da2d9660bfaebbdb0a44a33b3bd1dcb5a952fafa02c0dfc6a51ea471fef2a" +dependencies = [ + "flatbuffers", +] + +[[package]] +name = "arrow2" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395" +dependencies = [ + "ahash", + "arrow-format", + "base64", + "chrono", + "csv", + "futures", + "hash_hasher", + "indexmap", + "lexical-core", + "multiversion", + "num-traits", + "parquet2", + "serde", + "serde_json", + "simdutf8", + "streaming-iterator", + "strength_reduce", +] + [[package]] name = "as-slice" version = "0.1.5" @@ -97,6 +169,38 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-stream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171374e7e3b2504e0e5236e3b59260560f9fe94bfe9ac39ba5e4e929c5590625" +dependencies = [ + "async-stream-impl", + "futures-core", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "648ed8c8d2ce5409ccd57453d9d1b214b342a0d69376a6feda1fd6cae3299308" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44318e776df68115a881de9a8fd1b9e53368d7a4a5ce4cc48517da3393233a5e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atty" version = "0.2.14" @@ -129,6 +233,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + [[package]] name = "bitflags" version = "1.3.2" @@ -144,6 +254,47 @@ dependencies = [ "typenum", ] +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + +[[package]] +name = "blake2b_simd" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + +[[package]] +name = "brotli" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71cb90ade945043d3d53597b2fc359bb063db8ade2bcffe7997351d0756e9d50" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bstr" version = "0.2.17" @@ -212,6 +363,9 @@ name = "cc" version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" +dependencies = [ + "jobserver", +] [[package]] name = "cfg-if" @@ -288,6 +442,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "core-foundation-sys" version = "0.8.3" @@ -347,22 +507,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "crossterm" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486d44227f71a1ef39554c0dc47e44b9f4139927c75043312690c3f476d1d788" -dependencies = [ - "bitflags", - "crossterm_winapi 0.8.0", - "libc", - "mio", - "parking_lot", - "signal-hook", - "signal-hook-mio", - "winapi", -] - [[package]] name = "crossterm" version = "0.22.1" @@ -370,7 +514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c85525306c4291d1b73ce93c8acf9c339f9b213aef6c1d85c3830cbf1c16325c" dependencies = [ "bitflags", - "crossterm_winapi 0.9.0", + "crossterm_winapi", "libc", "mio", "parking_lot", @@ -380,15 +524,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "crossterm_winapi" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6966607622438301997d3dac0d2f6e9a90c68bb6bc1785ea98456ab93c0507" -dependencies = [ - "winapi", -] - [[package]] name = "crossterm_winapi" version = "0.9.0" @@ -398,6 +533,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "csv" version = "1.1.6" @@ -464,6 +605,26 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "dirs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +dependencies = [ + "libc", + "redox_users 0.3.5", + "winapi", +] + +[[package]] +name = "dirs" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -474,6 +635,17 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03d86534ed367a67548dc68113a0f5db55432fdfbb6e6f9d77704397d95d5780" +dependencies = [ + "libc", + "redox_users 0.4.0", + "winapi", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -481,7 +653,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users", + "redox_users 0.4.0", "winapi", ] @@ -538,7 +710,7 @@ name = "engine-q" version = "0.1.0" dependencies = [ "assert_cmd", - "crossterm 0.22.1", + "crossterm", "ctrlc", "dialoguer", "miette", @@ -558,6 +730,32 @@ dependencies = [ "tempfile", ] +[[package]] +name = "erased-serde" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" +dependencies = [ + "serde", +] + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "flatbuffers" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" +dependencies = [ + "bitflags", + "smallvec", + "thiserror", +] + [[package]] name = "flate2" version = "1.0.22" @@ -586,6 +784,100 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d" + +[[package]] +name = "futures-executor" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377" + +[[package]] +name = "futures-macro" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb" +dependencies = [ + "autocfg", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11" + +[[package]] +name = "futures-task" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99" + +[[package]] +name = "futures-util" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481" +dependencies = [ + "autocfg", + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", +] + [[package]] name = "generic-array" version = "0.12.4" @@ -614,6 +906,17 @@ dependencies = [ "version_check 0.9.3", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.3" @@ -622,7 +925,18 @@ checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.10.0+wasi-snapshot-preview1", +] + +[[package]] +name = "ghost" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -646,11 +960,21 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hash_hasher" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", + "rayon", +] [[package]] name = "heapless" @@ -707,6 +1031,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "integer-encoding" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90c11140ffea82edce8dcd74137ce9324ec24b3cf0175fc9d7e29164da9915b8" +dependencies = [ + "async-trait", + "futures-util", +] + +[[package]] +name = "inventory" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1367fed6750ff2a5bcb967a631528303bb85631f167a75eb1bf7762d57eb7678" +dependencies = [ + "ctor", + "ghost", +] + [[package]] name = "is_ci" version = "1.1.1" @@ -728,6 +1072,15 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -735,10 +1088,83 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "libc" -version = "0.2.107" +name = "lexical" +version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" +checksum = "c34e981f88d060a67815388470172638f1af16b3a12e581cb75142f190161bf9" +dependencies = [ + "lexical-core", +] + +[[package]] +name = "lexical-core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3926d8f156019890be4abe5fd3785e0cff1001e06f59c597641fd513a5a284" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4d066d004fa762d9da995ed21aa8845bb9f6e4265f540d716fb4b315197bf0e" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c92badda8cc0fc4f3d3cc1c30aaefafb830510c8781ce4e8669881f3ed53ac" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff669ccaae16ee33af90dc51125755efed17f1309626ba5c12052512b11e291" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5186948c7b297abaaa51560f2581dae625e5ce7dfc2d8fdc56345adb6dc576" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece956492e0e40fd95ef8658a34d53a3b8c2015762fdcaaff2167b28de1f56ef" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119" [[package]] name = "linked-hash-map" @@ -770,12 +1196,32 @@ dependencies = [ [[package]] name = "lscolors" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd0aa49b10c47f9a4391a99198b5e65c74f9ca771c0dcc856bb75a3f46c8627d" +checksum = "9dd58d8727f3035fa6d5272f16b519741fd4875936b99d8a7cde21291b7d9174" dependencies = [ "ansi_term", - "crossterm 0.21.0", + "crossterm", +] + +[[package]] +name = "lz4" +version = "1.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aac20ed6991e01bf6a2e68cc73df2b389707403662a8ba89f68511fb340f724c" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +dependencies = [ + "cc", + "libc", ] [[package]] @@ -790,6 +1236,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.4" @@ -864,11 +1319,31 @@ dependencies = [ [[package]] name = "miow" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" +checksum = "8c9b9524d5b9d60d55bd3f6ca13180cfc06b5d7e54df308c8842d3f66c914cc4" dependencies = [ - "winapi", + "windows-sys", +] + +[[package]] +name = "multiversion" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" +dependencies = [ + "multiversion-macros", +] + +[[package]] +name = "multiversion-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -964,6 +1439,7 @@ dependencies = [ "lscolors", "meval", "nu-ansi-term 0.39.0", + "nu-dataframe", "nu-engine", "nu-json", "nu-parser", @@ -971,6 +1447,7 @@ dependencies = [ "nu-protocol", "nu-table", "nu-term-grid", + "polars", "rand", "rayon", "regex", @@ -986,6 +1463,19 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "nu-dataframe" +version = "0.1.0" +dependencies = [ + "chrono", + "indexmap", + "nu-json", + "nu-protocol", + "num", + "polars", + "serde", +] + [[package]] name = "nu-engine" version = "0.1.0" @@ -1048,9 +1538,11 @@ dependencies = [ "chrono-humanize", "im", "miette", + "nu-json", "serde", "serde_json", "thiserror", + "typetag", ] [[package]] @@ -1082,6 +1574,40 @@ dependencies = [ "semver", ] +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.44" @@ -1092,6 +1618,29 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.14" @@ -1126,6 +1675,15 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + [[package]] name = "output_vt100" version = "0.1.2" @@ -1167,11 +1725,42 @@ dependencies = [ "cfg-if", "instant", "libc", - "redox_syscall", + "redox_syscall 0.2.10", "smallvec", "winapi", ] +[[package]] +name = "parquet-format-async-temp" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03abc2f9c83fe9ceec83f47c76cc071bfd56caba33794340330f35623ab1f544" +dependencies = [ + "async-trait", + "byteorder", + "futures", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "parquet2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d" +dependencies = [ + "async-stream", + "bitpacking", + "brotli", + "flate2", + "futures", + "lz4", + "parquet-format-async-temp", + "snap", + "streaming-decompression", + "zstd", +] + [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -1235,6 +1824,103 @@ dependencies = [ "uncased", ] +[[package]] +name = "pin-project-lite" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "polars" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598" +dependencies = [ + "polars-core", + "polars-io", + "polars-lazy", +] + +[[package]] +name = "polars-arrow" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99" +dependencies = [ + "arrow2", + "num", + "thiserror", +] + +[[package]] +name = "polars-core" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6" +dependencies = [ + "ahash", + "anyhow", + "arrow2", + "chrono", + "hashbrown", + "itertools", + "lazy_static", + "num", + "num_cpus", + "polars-arrow", + "prettytable-rs", + "rayon", + "regex", + "serde", + "serde_json", + "thiserror", + "unsafe_unwrap", +] + +[[package]] +name = "polars-io" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957" +dependencies = [ + "ahash", + "anyhow", + "arrow2", + "csv-core", + "dirs 3.0.2", + "lazy_static", + "lexical", + "memchr", + "memmap2", + "num", + "num_cpus", + "polars-arrow", + "polars-core", + "rayon", + "regex", + "simdutf8", +] + +[[package]] +name = "polars-lazy" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61" +dependencies = [ + "ahash", + "itertools", + "polars-arrow", + "polars-core", + "polars-io", + "rayon", +] + [[package]] name = "ppv-lite86" version = "0.2.15" @@ -1280,6 +1966,32 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "prettytable-rs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd04b170004fa2daccf418a7f8253aaf033c27760b5f225889024cf66d7ac2e" +dependencies = [ + "atty", + "csv", + "encode_unicode", + "lazy_static", + "term", + "unicode-width", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro-nested" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" + [[package]] name = "proc-macro2" version = "1.0.32" @@ -1342,7 +2054,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ - "getrandom", + "getrandom 0.2.3", ] [[package]] @@ -1388,6 +2100,12 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + [[package]] name = "redox_syscall" version = "0.2.10" @@ -1397,14 +2115,25 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +dependencies = [ + "getrandom 0.1.16", + "redox_syscall 0.1.57", + "rust-argon2", +] + [[package]] name = "redox_users" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom", - "redox_syscall", + "getrandom 0.2.3", + "redox_syscall 0.2.10", ] [[package]] @@ -1413,7 +2142,7 @@ version = "0.2.0" source = "git+https://github.com/nushell/reedline?branch=main#c11aef2d9b4eaf0c762f1349f641534597815295" dependencies = [ "chrono", - "crossterm 0.22.1", + "crossterm", "nu-ansi-term 0.39.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde", "unicode-segmentation", @@ -1452,6 +2181,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "rust-argon2" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" +dependencies = [ + "base64", + "blake2b_simd", + "constant_time_eq", + "crossbeam-utils", +] + [[package]] name = "rustc-demangle" version = "0.1.21" @@ -1510,10 +2251,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.70" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3" +checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19" dependencies = [ + "indexmap", "itoa", "ryu", "serde", @@ -1582,6 +2324,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970da16e7c682fa90a261cf0724dee241c9f7831635ecc4e988ae8f3b505559" + [[package]] name = "siphasher" version = "0.3.7" @@ -1598,6 +2346,12 @@ dependencies = [ "typenum", ] +[[package]] +name = "slab" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" + [[package]] name = "smallvec" version = "1.7.0" @@ -1610,12 +2364,45 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" +[[package]] +name = "snap" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" + [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "streaming-decompression" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bc687acd5dc742c4a7094f2927a8614a68e4743ef682e7a2f9f0f711656cc92" +dependencies = [ + "fallible-streaming-iterator", +] + +[[package]] +name = "streaming-iterator" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "303235c177994a476226b80d076bd333b7b560fb05bd242a10609d11b07f81f5" + +[[package]] +name = "strength_reduce" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3ff2f71c82567c565ba4b3009a9350a96a7269eaa4001ebedae926230bc2254" + [[package]] name = "strip-ansi-escapes" version = "0.1.1" @@ -1688,11 +2475,22 @@ dependencies = [ "cfg-if", "libc", "rand", - "redox_syscall", + "redox_syscall 0.2.10", "remove_dir_all", "winapi", ] +[[package]] +name = "term" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" +dependencies = [ + "byteorder", + "dirs 1.0.5", + "winapi", +] + [[package]] name = "term_size" version = "0.3.2" @@ -1757,7 +2555,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", - "wasi", + "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] @@ -1795,6 +2593,30 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" +[[package]] +name = "typetag" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4080564c5b2241b5bff53ab610082234e0c57b0417f4bd10596f183001505b8a" +dependencies = [ + "erased-serde", + "inventory", + "once_cell", + "serde", + "typetag-impl", +] + +[[package]] +name = "typetag-impl" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e60147782cc30833c05fba3bab1d9b5771b2685a2557672ac96fa5d154099c0e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "ucd-trie" version = "0.1.3" @@ -1837,6 +2659,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "unsafe_unwrap" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1230ec65f13e0f9b28d789da20d2d419511893ea9dac2c1f4ef67b8b14e5da80" + [[package]] name = "utf8-width" version = "0.1.5" @@ -1891,6 +2719,12 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -1919,6 +2753,49 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82ca39602d5cbfa692c4b67e3bcbb2751477355141c1ed434c94da4186836ff6" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52695a41e536859d5308cc613b4a022261a274390b25bd29dfff4bf08505f3c2" + +[[package]] +name = "windows_i686_gnu" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f54725ac23affef038fecb177de6c9bf065787c2f432f79e3c373da92f3e1d8a" + +[[package]] +name = "windows_i686_msvc" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d5158a43cc43623c0729d1ad6647e62fa384a3d135fd15108d37c683461f64" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc31f409f565611535130cfe7ee8e6655d3fa99c1c61013981e491921b5ce954" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f2b8c7cbd3bfdddd9ab98769f9746a7fad1bca236554cd032b78d768bc0e89f" + [[package]] name = "yaml-rust" version = "0.4.5" @@ -1945,3 +2822,32 @@ dependencies = [ "flate2", "thiserror", ] + +[[package]] +name = "zstd" +version = "0.9.0+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "4.1.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "1.6.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" +dependencies = [ + "cc", + "libc", +] diff --git a/Cargo.toml b/Cargo.toml index 4ea6555a4a..12eb7e1ad0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "crates/nu-command", "crates/nu-protocol", "crates/nu-plugin", + "crates/nu-dataframe", "crates/nu_plugin_inc", ] @@ -37,7 +38,9 @@ ctrlc = "3.2.1" [features] plugin = ["nu-plugin", "nu-parser/plugin", "nu-command/plugin", "nu-protocol/plugin"] -default = ["plugin"] +custom = ["nu-command/custom", "nu-protocol/custom"] +dataframe = ["custom", "nu-command/dataframe"] +default = ["plugin", "custom"] [dev-dependencies] tempfile = "3.2.0" diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 005d650825..73708e2171 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -13,8 +13,8 @@ nu-protocol = { path = "../nu-protocol" } nu-table = { path = "../nu-table" } nu-term-grid = { path = "../nu-term-grid" } nu-parser = { path = "../nu-parser" } +nu-dataframe = { path = "../nu-dataframe", optional = true } nu-ansi-term = { path = "../nu-ansi-term" } - trash = { version = "1.3.0", optional = true } unicode-segmentation = "1.8.0" @@ -45,6 +45,13 @@ itertools = "0.10.0" calamine = "0.18.0" rand = "0.8" +[dependencies.polars] +version = "0.17.0" +optional = true +features = ["default", "parquet", "json"] + [features] trash-support = ["trash"] plugin = ["nu-parser/plugin"] +custom = ["nu-protocol/custom"] +dataframe = ["custom", "nu-dataframe", "polars"] diff --git a/crates/nu-command/src/dataframe/mod.rs b/crates/nu-command/src/dataframe/mod.rs new file mode 100644 index 0000000000..b9cd1bbcfc --- /dev/null +++ b/crates/nu-command/src/dataframe/mod.rs @@ -0,0 +1,5 @@ +mod open; +mod to_df; + +pub use open::OpenDataFrame; +pub use to_df::ToDataFrame; diff --git a/crates/nu-command/src/dataframe/open.rs b/crates/nu-command/src/dataframe/open.rs new file mode 100644 index 0000000000..1c926f1e61 --- /dev/null +++ b/crates/nu-command/src/dataframe/open.rs @@ -0,0 +1,195 @@ +use std::{fs::File, path::PathBuf}; + +use nu_dataframe::NuDataFrame; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, +}; + +use polars::prelude::{CsvEncoding, CsvReader, JsonReader, ParquetReader, SerReader}; + +#[derive(Clone)] +pub struct OpenDataFrame; + +impl Command for OpenDataFrame { + fn name(&self) -> &str { + "open-df" + } + + fn usage(&self) -> &str { + "Opens csv, json or parquet file to create dataframe" + } + + fn signature(&self) -> Signature { + Signature::build("open-df") + .required( + "file", + SyntaxShape::Filepath, + "file path to load values from", + ) + .named( + "delimiter", + SyntaxShape::String, + "file delimiter character. CSV file", + Some('d'), + ) + .switch( + "no-header", + "Indicates if file doesn't have header. CSV file", + None, + ) + .named( + "infer-schema", + SyntaxShape::Number, + "Number of rows to infer the schema of the file. CSV file", + None, + ) + .named( + "skip-rows", + SyntaxShape::Number, + "Number of rows to skip from file. CSV file", + None, + ) + .named( + "columns", + SyntaxShape::List(Box::new(SyntaxShape::String)), + "Columns to be selected from csv file. CSV file", + None, + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Takes a file name and creates a dataframe", + example: "dataframe open test.csv", + result: None, + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + command(engine_state, stack, call) + } +} + +fn command( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, +) -> Result { + let span = call.head; + let file: Spanned = call.req(engine_state, stack, 0)?; + + let df = match file.item.extension() { + Some(e) => match e.to_str() { + Some("csv") => from_csv(engine_state, stack, call), + Some("parquet") => from_parquet(engine_state, stack, call), + Some("json") => from_json(engine_state, stack, call), + _ => Err(ShellError::FileNotFoundCustom( + "Not a csv, parquet or json file".into(), + file.span, + )), + }, + None => Err(ShellError::FileNotFoundCustom( + "File without extension".into(), + file.span, + )), + }?; + + Ok(PipelineData::Value(NuDataFrame::dataframe_into_value( + df, span, + ))) +} + +fn from_parquet( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, +) -> Result { + let file: Spanned = call.req(engine_state, stack, 0)?; + let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; + let reader = ParquetReader::new(r); + + reader + .finish() + .map_err(|e| ShellError::InternalError(format!("{:?}", e))) +} + +fn from_json( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, +) -> Result { + let file: Spanned = call.req(engine_state, stack, 0)?; + + let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; + + let reader = JsonReader::new(r); + + reader + .finish() + .map_err(|e| ShellError::InternalError(e.to_string())) +} + +fn from_csv( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, +) -> Result { + let file: Spanned = call.req(engine_state, stack, 0)?; + let delimiter: Option> = call.get_flag(engine_state, stack, "delimiter")?; + let no_header: bool = call.has_flag("no_header"); + let infer_schema: Option = call.get_flag(engine_state, stack, "infer_schema")?; + let skip_rows: Option = call.get_flag(engine_state, stack, "skip_rows")?; + let columns: Option> = call.get_flag(engine_state, stack, "columns")?; + + let csv_reader = CsvReader::from_path(&file.item) + .map_err(|e| ShellError::InternalError(e.to_string()))? + .with_encoding(CsvEncoding::LossyUtf8); + + let csv_reader = match delimiter { + None => csv_reader, + Some(d) => { + if d.item.len() != 1 { + return Err(ShellError::InternalError( + "Delimiter has to be one char".into(), + )); + } else { + let delimiter = match d.item.chars().next() { + Some(d) => d as u8, + None => unreachable!(), + }; + csv_reader.with_delimiter(delimiter) + } + } + }; + + let csv_reader = csv_reader.has_header(!no_header); + + let csv_reader = match infer_schema { + None => csv_reader, + Some(r) => csv_reader.infer_schema(Some(r)), + }; + + let csv_reader = match skip_rows { + None => csv_reader, + Some(r) => csv_reader.with_skip_rows(r), + }; + + let csv_reader = match columns { + None => csv_reader, + Some(columns) => csv_reader.with_columns(Some(columns)), + }; + + csv_reader + .finish() + .map_err(|e| ShellError::InternalError(e.to_string())) +} diff --git a/crates/nu-command/src/dataframe/to_df.rs b/crates/nu-command/src/dataframe/to_df.rs new file mode 100644 index 0000000000..a9cd5034ed --- /dev/null +++ b/crates/nu-command/src/dataframe/to_df.rs @@ -0,0 +1,59 @@ +use nu_dataframe::NuDataFrame; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, +}; + +#[derive(Clone)] +pub struct ToDataFrame; + +impl Command for ToDataFrame { + fn name(&self) -> &str { + "to-df" + } + + fn usage(&self) -> &str { + "Converts a List, Table or Dictionary into a dataframe" + } + + fn signature(&self) -> Signature { + Signature::build("to-df").category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Takes a dictionary and creates a dataframe", + example: "[[a b];[1 2] [3 4]] | to-df", + result: None, + }, + Example { + description: "Takes a list of tables and creates a dataframe", + example: "[[1 2 a] [3 4 b] [5 6 c]] | to-df", + result: None, + }, + Example { + description: "Takes a list and creates a dataframe", + example: "[a b c] | to-df", + result: None, + }, + Example { + description: "Takes a list of booleans and creates a dataframe", + example: "[$true $true $false] | to-df", + result: None, + }, + ] + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let df = NuDataFrame::try_from_iter(input.into_iter())?; + Ok(PipelineData::Value(NuDataFrame::to_value(df, call.head))) + } +} diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 9121ef05d0..e5bc4a0a44 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -143,6 +143,9 @@ pub fn create_default_context() -> EngineState { #[cfg(feature = "plugin")] bind_command!(Register); + #[cfg(feature = "dataframe")] + bind_command!(OpenDataFrame, ToDataFrame); + // This is a WIP proof of concept bind_command!(ListGitBranches, Git, GitCheckout, Source); diff --git a/crates/nu-command/src/formats/to/json.rs b/crates/nu-command/src/formats/to/json.rs index 6988af9028..15804f1ec3 100644 --- a/crates/nu-command/src/formats/to/json.rs +++ b/crates/nu-command/src/formats/to/json.rs @@ -70,8 +70,6 @@ pub fn value_to_json_value(v: &Value) -> Result { Value::List { vals, .. } => nu_json::Value::Array(json_list(vals)?), Value::Error { error } => return Err(error.clone()), Value::Block { .. } | Value::Range { .. } => nu_json::Value::Null, - #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_json::Value::Null, Value::Binary { val, .. } => { nu_json::Value::Array(val.iter().map(|x| nu_json::Value::U64(*x as u64)).collect()) } @@ -82,6 +80,8 @@ pub fn value_to_json_value(v: &Value) -> Result { } nu_json::Value::Object(m) } + #[cfg(feature = "custom")] + Value::CustomValue { val, .. } => val.to_json(), }) } diff --git a/crates/nu-command/src/lib.rs b/crates/nu-command/src/lib.rs index 667d99be09..bb7e60d9d3 100644 --- a/crates/nu-command/src/lib.rs +++ b/crates/nu-command/src/lib.rs @@ -13,6 +13,9 @@ mod strings; mod system; mod viewers; +#[cfg(feature = "dataframe")] +mod dataframe; + pub use conversions::*; pub use core_commands::*; pub use date::*; @@ -27,3 +30,6 @@ pub use math::*; pub use strings::*; pub use system::*; pub use viewers::*; + +#[cfg(feature = "dataframe")] +pub use dataframe::*; diff --git a/crates/nu-command/src/viewers/table.rs b/crates/nu-command/src/viewers/table.rs index 38d16efdab..16a08a24cc 100644 --- a/crates/nu-command/src/viewers/table.rs +++ b/crates/nu-command/src/viewers/table.rs @@ -107,6 +107,11 @@ impl Command for Table { .into_pipeline_data()) } PipelineData::Value(Value::Error { error }) => Err(error), + #[cfg(feature = "custom")] + PipelineData::Value(Value::CustomValue { val, span }) => { + let base_pipeline = val.to_base_value(span)?.into_pipeline_data(); + self.run(engine_state, stack, call, base_pipeline) + } x => Ok(x), } } diff --git a/crates/nu-dataframe/Cargo.toml b/crates/nu-dataframe/Cargo.toml new file mode 100644 index 0000000000..86b86f3b82 --- /dev/null +++ b/crates/nu-dataframe/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "nu-dataframe" +version = "0.1.0" +edition = "2018" + +[dependencies] +chrono = { version="0.4.19", features=["serde"] } +serde = {version = "1.0.130", features = ["derive"]} +num = "0.4.0" +nu-protocol = { path = "../nu-protocol", features = ["custom"] } +nu-json = { path = "../nu-json"} +indexmap = { version="1.7.0", features=["serde-1"] } +polars = { version = "0.17.0", features = ["default", "serde", "object", "checked_arithmetic", "strings"] } + diff --git a/crates/nu-dataframe/README.md b/crates/nu-dataframe/README.md new file mode 100644 index 0000000000..e20fd519fd --- /dev/null +++ b/crates/nu-dataframe/README.md @@ -0,0 +1,3 @@ +# nu-dataframe + +The nu-dataframe crate holds the definitions of the dataframe structure diff --git a/crates/nu-dataframe/src/between_values.rs b/crates/nu-dataframe/src/between_values.rs new file mode 100644 index 0000000000..25e7135c29 --- /dev/null +++ b/crates/nu-dataframe/src/between_values.rs @@ -0,0 +1,568 @@ +use super::{operations::Axis, NuDataFrame}; + +use nu_protocol::{ast::Operator, ShellError, Span, Spanned, Value}; +use num::Zero; +use polars::prelude::{ + BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, + NumOpsDispatchChecked, PolarsError, Series, +}; +use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; + +pub fn between_dataframes( + operator: Spanned, + left: Value, + lhs: &NuDataFrame, + right: &Value, + rhs: &NuDataFrame, + operation_span: Span, +) -> Result { + match operator.item { + Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) { + Ok(df) => Ok(df.to_value(operation_span)), + Err(e) => Err(e), + }, + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + } +} + +pub fn compute_between_series( + operator: Spanned, + left: Value, + lhs: &Series, + right: &Value, + rhs: &Series, + operation_span: Span, +) -> Result { + match operator.item { + Operator::Plus => { + let mut res = lhs + rhs; + let name = format!("sum_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::Minus => { + let mut res = lhs - rhs; + let name = format!("sub_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::Multiply => { + let mut res = lhs * rhs; + let name = format!("mul_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::Divide => { + let res = lhs.checked_div(rhs); + match res { + Ok(mut res) => { + let name = format!("div_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + Operator::Equal => { + let mut res = Series::eq(lhs, rhs).into_series(); + let name = format!("eq_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::NotEqual => { + let mut res = Series::neq(lhs, rhs).into_series(); + let name = format!("neq_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::LessThan => { + let mut res = Series::lt(lhs, rhs).into_series(); + let name = format!("lt_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::LessThanOrEqual => { + let mut res = Series::lt_eq(lhs, rhs).into_series(); + let name = format!("lte_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::GreaterThan => { + let mut res = Series::gt(lhs, rhs).into_series(); + let name = format!("gt_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::GreaterThanOrEqual => { + let mut res = Series::gt_eq(lhs, rhs).into_series(); + let name = format!("gte_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + Operator::And => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitand(r).into_series(); + let name = format!("and_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + _ => Err(ShellError::InternalError( + "unable to cast to boolean".into(), + )), + } + } + _ => Err(ShellError::IncompatibleParametersSingle( + format!( + "Operation {} can only be done with boolean values", + operator.item + ), + operation_span, + )), + }, + Operator::Or => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitor(r).into_series(); + let name = format!("or_{}_{}", lhs.name(), rhs.name()); + res.rename(&name); + NuDataFrame::series_to_value(res, operation_span) + } + _ => Err(ShellError::InternalError( + "unable to cast to boolean".into(), + )), + } + } + _ => Err(ShellError::IncompatibleParametersSingle( + format!( + "Operation {} can only be done with boolean values", + operator.item + ), + operation_span, + )), + }, + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + } +} + +pub fn compute_series_single_value( + operator: Spanned, + lhs: &NuDataFrame, + lhs_span: &Span, + left: Value, + right: &Value, +) -> Result { + if !lhs.is_series() { + return Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }); + } + + let lhs = lhs.as_series(*lhs_span)?; + + match operator.item { + Operator::Plus => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::add, *lhs_span) + } + Value::Float { val, .. } => { + compute_series_decimal(&lhs, *val, >::add, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::Minus => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::sub, *lhs_span) + } + Value::Float { val, .. } => { + compute_series_decimal(&lhs, *val, >::sub, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::Multiply => match &right { + Value::Int { val, .. } => { + compute_series_i64(&lhs, *val, >::mul, *lhs_span) + } + Value::Float { val, .. } => { + compute_series_decimal(&lhs, *val, >::mul, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::Divide => match &right { + Value::Int { val, span } => { + if *val == 0 { + Err(ShellError::DivisionByZero(*span)) + } else { + compute_series_i64(&lhs, *val, >::div, *lhs_span) + } + } + Value::Float { val, span } => { + if val.is_zero() { + Err(ShellError::DivisionByZero(*span)) + } else { + compute_series_decimal(&lhs, *val, >::div, *lhs_span) + } + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::Equal => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, *lhs_span), + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::eq, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::NotEqual => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, *lhs_span), + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::neq, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::LessThan => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt, *lhs_span), + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::lt, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::LessThanOrEqual => match &right { + Value::Int { val, .. } => { + compare_series_i64(&lhs, *val, ChunkedArray::lt_eq, *lhs_span) + } + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::lt_eq, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::GreaterThan => match &right { + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt, *lhs_span), + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::gt, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::GreaterThanOrEqual => match &right { + Value::Int { val, .. } => { + compare_series_i64(&lhs, *val, ChunkedArray::gt_eq, *lhs_span) + } + Value::Float { val, .. } => { + compare_series_decimal(&lhs, *val, ChunkedArray::gt_eq, *lhs_span) + } + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + Operator::Contains => match &right { + Value::String { val, .. } => contains_series_pat(&lhs, val, *lhs_span), + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + }, + _ => Err(ShellError::OperatorMismatch { + op_span: operator.span, + lhs_ty: left.get_type(), + lhs_span: left.span()?, + rhs_ty: right.get_type(), + rhs_span: right.span()?, + }), + } +} + +fn compute_series_i64(series: &Series, val: i64, f: F, span: Span) -> Result +where + F: Fn(ChunkedArray, i64) -> ChunkedArray, +{ + match series.dtype() { + DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { + let to_i64 = series.cast(&DataType::Int64); + + match to_i64 { + Ok(series) => { + let casted = series.i64(); + compute_casted_i64(casted, val, f, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + DataType::Int64 => { + let casted = series.i64(); + compute_casted_i64(casted, val, f, span) + } + _ => Err(ShellError::InternalError(format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ))), + } +} + +fn compute_casted_i64( + casted: Result<&ChunkedArray, PolarsError>, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, i64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted.clone(), val); + let res = res.into_series(); + NuDataFrame::series_to_value(res, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} + +fn compute_series_decimal( + series: &Series, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, f64) -> ChunkedArray, +{ + match series.dtype() { + DataType::Float32 => { + let to_f64 = series.cast(&DataType::Float64); + + match to_f64 { + Ok(series) => { + let casted = series.f64(); + compute_casted_f64(casted, val, f, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + DataType::Float64 => { + let casted = series.f64(); + compute_casted_f64(casted, val, f, span) + } + _ => Err(ShellError::InternalError(format!( + "Series of type {} can not be used for operations with a decimal value", + series.dtype() + ))), + } +} + +fn compute_casted_f64( + casted: Result<&ChunkedArray, PolarsError>, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(ChunkedArray, f64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted.clone(), val); + let res = res.into_series(); + NuDataFrame::series_to_value(res, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} + +fn compare_series_i64(series: &Series, val: i64, f: F, span: Span) -> Result +where + F: Fn(&ChunkedArray, i64) -> ChunkedArray, +{ + match series.dtype() { + DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { + let to_i64 = series.cast(&DataType::Int64); + + match to_i64 { + Ok(series) => { + let casted = series.i64(); + compare_casted_i64(casted, val, f, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + DataType::Int64 => { + let casted = series.i64(); + compare_casted_i64(casted, val, f, span) + } + _ => Err(ShellError::InternalError(format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ))), + } +} + +fn compare_casted_i64( + casted: Result<&ChunkedArray, PolarsError>, + val: i64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, i64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted, val); + let res = res.into_series(); + NuDataFrame::series_to_value(res, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} + +fn compare_series_decimal( + series: &Series, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, f64) -> ChunkedArray, +{ + match series.dtype() { + DataType::Float32 => { + let to_f64 = series.cast(&DataType::Float64); + + match to_f64 { + Ok(series) => { + let casted = series.f64(); + compare_casted_f64(casted, val, f, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + DataType::Float64 => { + let casted = series.f64(); + compare_casted_f64(casted, val, f, span) + } + _ => Err(ShellError::InternalError(format!( + "Series of type {} can not be used for operations with a decimal value", + series.dtype() + ))), + } +} + +fn compare_casted_f64( + casted: Result<&ChunkedArray, PolarsError>, + val: f64, + f: F, + span: Span, +) -> Result +where + F: Fn(&ChunkedArray, f64) -> ChunkedArray, +{ + match casted { + Ok(casted) => { + let res = f(casted, val); + let res = res.into_series(); + NuDataFrame::series_to_value(res, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} + +fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result { + let casted = series.utf8(); + match casted { + Ok(casted) => { + let res = casted.contains(pat); + + match res { + Ok(res) => { + let res = res.into_series(); + NuDataFrame::series_to_value(res, span) + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} diff --git a/crates/nu-dataframe/src/conversion.rs b/crates/nu-dataframe/src/conversion.rs new file mode 100644 index 0000000000..fdd834f179 --- /dev/null +++ b/crates/nu-dataframe/src/conversion.rs @@ -0,0 +1,537 @@ +use super::NuDataFrame; +use crate::DataFrameValue; +use chrono::{DateTime, FixedOffset, NaiveDateTime}; +use indexmap::map::{Entry, IndexMap}; +use nu_protocol::{ShellError, Span, Value}; +use polars::chunked_array::object::builder::ObjectChunkedBuilder; +use polars::chunked_array::ChunkedArray; +use polars::prelude::{ + DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, + ObjectType, PolarsNumericType, Series, +}; +use std::ops::{Deref, DerefMut}; + +const SECS_PER_DAY: i64 = 86_400; + +#[derive(Debug)] +pub struct Column { + name: String, + values: Vec, +} + +impl Column { + pub fn new(name: String, values: Vec) -> Self { + Self { name, values } + } + + pub fn new_empty(name: String) -> Self { + Self { + name, + values: Vec::new(), + } + } + + pub fn name(&self) -> &str { + self.name.as_str() + } + + pub fn iter(&self) -> impl Iterator { + self.values.iter() + } +} + +impl IntoIterator for Column { + type Item = Value; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.values.into_iter() + } +} + +impl Deref for Column { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.values + } +} + +impl DerefMut for Column { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.values + } +} + +#[derive(Debug)] +pub enum InputType { + Integer, + Float, + String, + Boolean, + Object, + Date, + Duration, +} + +#[derive(Debug)] +pub struct TypedColumn { + column: Column, + column_type: Option, +} + +impl TypedColumn { + fn new_empty(name: String) -> Self { + Self { + column: Column::new_empty(name), + column_type: None, + } + } +} + +impl Deref for TypedColumn { + type Target = Column; + + fn deref(&self) -> &Self::Target { + &self.column + } +} + +impl DerefMut for TypedColumn { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.column + } +} + +pub type ColumnMap = IndexMap; + +pub fn create_column( + series: &Series, + from_row: usize, + to_row: usize, +) -> Result { + let size = to_row - from_row; + match series.dtype() { + DataType::Null => { + let values = std::iter::repeat(Value::Nothing { + span: Span::unknown(), + }) + .take(size) + .collect::>(); + + Ok(Column::new(series.name().into(), values)) + } + DataType::UInt8 => { + let casted = series + .u8() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::UInt16 => { + let casted = series + .u16() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::UInt32 => { + let casted = series + .u32() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::UInt64 => { + let casted = series + .u64() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Int8 => { + let casted = series + .i8() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Int16 => { + let casted = series + .i16() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Int32 => { + let casted = series + .i32() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Int64 => { + let casted = series + .i64() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Float32 => { + let casted = series + .f32() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Float64 => { + let casted = series + .f64() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + Ok(column_from_casted(casted, from_row, size)) + } + DataType::Boolean => { + let casted = series + .bool() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => Value::Bool { + val: a, + span: Span::unknown(), + }, + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(casted.name().into(), values)) + } + DataType::Utf8 => { + let casted = series + .utf8() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => Value::String { + val: a.into(), + span: Span::unknown(), + }, + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(casted.name().into(), values)) + } + DataType::Object(x) => { + let casted = series + .as_any() + .downcast_ref::>>(); + + match casted { + None => Err(ShellError::InternalError(format!( + "Object not supported for conversion: {}", + x + ))), + Some(ca) => { + let values = ca + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => a.get_value(), + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(ca.name().into(), values)) + } + } + } + DataType::Date => { + let casted = series + .date() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => { + // elapsed time in day since 1970-01-01 + let seconds = a as i64 * SECS_PER_DAY; + let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0); + + // Zero length offset + let offset = FixedOffset::east(0); + let datetime = DateTime::::from_utc(naive_datetime, offset); + + Value::Date { + val: datetime, + span: Span::unknown(), + } + } + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(casted.name().into(), values)) + } + DataType::Datetime => { + let casted = series + .datetime() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => { + // elapsed time in milliseconds since 1970-01-01 + let seconds = a / 1000; + let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0); + + // Zero length offset + let offset = FixedOffset::east(0); + let datetime = DateTime::::from_utc(naive_datetime, offset); + + Value::Date { + val: datetime, + span: Span::unknown(), + } + } + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(casted.name().into(), values)) + } + DataType::Time => { + let casted = series + .time() + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(nanoseconds) => Value::Duration { + val: nanoseconds, + span: Span::unknown(), + }, + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Ok(Column::new(casted.name().into(), values)) + } + e => Err(ShellError::InternalError(format!( + "Value not supported in nushell: {}", + e + ))), + } +} + +fn column_from_casted(casted: &ChunkedArray, from_row: usize, size: usize) -> Column +where + T: PolarsNumericType, + T::Native: Into, +{ + let values = casted + .into_iter() + .skip(from_row) + .take(size) + .map(|v| match v { + Some(a) => a.into(), + None => Value::Nothing { + span: Span::unknown(), + }, + }) + .collect::>(); + + Column::new(casted.name().into(), values) +} + +// Adds a separator to the vector of values using the column names from the +// dataframe to create the Values Row +pub fn add_separator(values: &mut Vec, df: &DataFrame) { + let mut cols = vec![]; + let mut vals = vec![]; + + for name in df.get_column_names() { + cols.push(name.to_string()); + vals.push(Value::String { + val: "...".into(), + span: Span::unknown(), + }) + } + + let extra_record = Value::Record { + cols, + vals, + span: Span::unknown(), + }; + + values.push(extra_record); +} + +// Inserting the values found in a Value::List +pub fn insert_record( + column_values: &mut ColumnMap, + cols: &[String], + values: &[Value], +) -> Result<(), ShellError> { + for (col, value) in cols.iter().zip(values.iter()) { + insert_value(value.clone(), col.clone(), column_values)?; + } + + Ok(()) +} + +pub fn insert_value( + value: Value, + key: String, + column_values: &mut ColumnMap, +) -> Result<(), ShellError> { + let col_val = match column_values.entry(key.clone()) { + Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)), + Entry::Occupied(entry) => entry.into_mut(), + }; + + // Checking that the type for the value is the same + // for the previous value in the column + if col_val.values.is_empty() { + match &value { + Value::Int { .. } => { + col_val.column_type = Some(InputType::Integer); + } + Value::Float { .. } => { + col_val.column_type = Some(InputType::Float); + } + Value::String { .. } => { + col_val.column_type = Some(InputType::String); + } + Value::Bool { .. } => { + col_val.column_type = Some(InputType::Boolean); + } + Value::Date { .. } => { + col_val.column_type = Some(InputType::Date); + } + Value::Duration { .. } => { + col_val.column_type = Some(InputType::Duration); + } + _ => col_val.column_type = Some(InputType::Object), + } + col_val.values.push(value); + } else { + let prev_value = &col_val.values[col_val.values.len() - 1]; + + match (&prev_value, &value) { + (Value::Int { .. }, Value::Int { .. }) + | (Value::Float { .. }, Value::Float { .. }) + | (Value::String { .. }, Value::String { .. }) + | (Value::Bool { .. }, Value::Bool { .. }) + | (Value::Date { .. }, Value::Date { .. }) + | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), + _ => { + col_val.column_type = Some(InputType::Object); + col_val.values.push(value); + } + } + } + + Ok(()) +} + +// The ColumnMap has the parsed data from the StreamInput +// This data can be used to create a Series object that can initialize +// the dataframe based on the type of data that is found +pub fn from_parsed_columns(column_values: ColumnMap) -> Result { + let mut df_series: Vec = Vec::new(); + for (name, column) in column_values { + if let Some(column_type) = &column.column_type { + match column_type { + InputType::Float => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_f64()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::Integer => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_i64()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::String => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_string()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::Boolean => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_bool()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::Object => { + let mut builder = + ObjectChunkedBuilder::::new(&name, column.values.len()); + + for v in &column.values { + builder.append_value(DataFrameValue::new(v.clone())); + } + + let res = builder.finish(); + df_series.push(res.into_series()) + } + InputType::Date => { + let it = column.values.iter().map(|v| { + if let Value::Date { val, .. } = &v { + Some(val.timestamp_millis()) + } else { + None + } + }); + + let res: DatetimeChunked = + ChunkedArray::::new_from_opt_iter(&name, it).into(); + + df_series.push(res.into_series()) + } + InputType::Duration => { + let it = column.values.iter().map(|v| { + if let Value::Duration { val, .. } = &v { + Some(*val) + } else { + None + } + }); + + let res = ChunkedArray::::new_from_opt_iter(&name, it); + + df_series.push(res.into_series()) + } + } + } + } + + match DataFrame::new(df_series) { + Ok(df) => Ok(NuDataFrame::new(df)), + Err(e) => Err(ShellError::InternalError(e.to_string())), + } +} diff --git a/crates/nu-dataframe/src/custom_value.rs b/crates/nu-dataframe/src/custom_value.rs new file mode 100644 index 0000000000..6ffb0673cf --- /dev/null +++ b/crates/nu-dataframe/src/custom_value.rs @@ -0,0 +1,59 @@ +use crate::NuDataFrame; +use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Value}; + +// CustomValue implementation for NuDataFrame +impl CustomValue for NuDataFrame { + fn typetag_name(&self) -> &'static str { + "dataframe" + } + + fn typetag_deserialize(&self) { + unimplemented!("typetag_deserialize") + } + + fn clone_value(&self, span: nu_protocol::Span) -> Value { + let cloned = NuDataFrame(self.0.clone()); + + Value::CustomValue { + val: Box::new(cloned), + span, + } + } + + fn value_string(&self) -> String { + self.typetag_name().to_string() + } + + fn to_base_value(&self, span: Span) -> Result { + let vals = self.print()?; + + Ok(Value::List { vals, span }) + } + + fn to_json(&self) -> nu_json::Value { + nu_json::Value::Null + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn follow_path_int(&self, count: usize, span: Span) -> Result { + self.get_value(count, span) + } + + fn follow_path_string(&self, column_name: String, span: Span) -> Result { + let column = self.column(&column_name, span)?; + Ok(column.to_value(span)) + } + + fn operation( + &self, + lhs_span: Span, + operator: Operator, + op: Span, + right: &Value, + ) -> Result { + self.compute_with_value(lhs_span, operator, op, right) + } +} diff --git a/crates/nu-dataframe/src/lib.rs b/crates/nu-dataframe/src/lib.rs new file mode 100644 index 0000000000..70196d676f --- /dev/null +++ b/crates/nu-dataframe/src/lib.rs @@ -0,0 +1,298 @@ +mod between_values; +mod conversion; +mod custom_value; +mod operations; + +use std::{cmp::Ordering, fmt::Display, hash::Hasher}; + +use conversion::{Column, ColumnMap}; +use indexmap::map::IndexMap; +use nu_protocol::{did_you_mean, ShellError, Span, Value}; +use polars::prelude::{DataFrame, PolarsObject, Series}; +use serde::{Deserialize, Serialize}; + +// DataFrameValue is an encapsulation of Nushell Value that can be used +// to define the PolarsObject Trait. The polars object trait allows to +// create dataframes with mixed datatypes +#[derive(Clone, Debug)] +pub struct DataFrameValue(Value); + +impl DataFrameValue { + fn new(value: Value) -> Self { + Self(value) + } + + fn get_value(&self) -> Value { + self.0.clone() + } +} + +impl Display for DataFrameValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.get_type()) + } +} + +impl Default for DataFrameValue { + fn default() -> Self { + Self(Value::Nothing { + span: Span::unknown(), + }) + } +} + +impl PartialEq for DataFrameValue { + fn eq(&self, other: &Self) -> bool { + self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq) + } +} +impl Eq for DataFrameValue {} + +impl std::hash::Hash for DataFrameValue { + fn hash(&self, state: &mut H) { + match &self.0 { + Value::Nothing { .. } => 0.hash(state), + Value::Int { val, .. } => val.hash(state), + Value::String { val, .. } => val.hash(state), + // TODO. Define hash for the rest of types + _ => {} + } + } +} + +impl PolarsObject for DataFrameValue { + fn type_name() -> &'static str { + "value" + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NuDataFrame(DataFrame); + +impl NuDataFrame { + pub fn new(dataframe: DataFrame) -> Self { + Self(dataframe) + } + + fn default_value(span: Span) -> Value { + let dataframe = DataFrame::default(); + NuDataFrame::dataframe_into_value(dataframe, span) + } + + pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value { + Value::CustomValue { + val: Box::new(Self::new(dataframe)), + span, + } + } + + pub fn to_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn series_to_value(series: Series, span: Span) -> Result { + match DataFrame::new(vec![series]) { + Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)), + Err(e) => Err(ShellError::InternalError(e.to_string())), + } + } + + pub fn try_from_iter(iter: T) -> Result + where + T: Iterator, + { + // Dictionary to store the columnar data extracted from + // the input. During the iteration we check if the values + // have different type + let mut column_values: ColumnMap = IndexMap::new(); + + for value in iter { + match value { + Value::List { vals, .. } => { + let cols = (0..vals.len()) + .map(|i| format!("{}", i)) + .collect::>(); + + conversion::insert_record(&mut column_values, &cols, &vals)? + } + Value::Record { cols, vals, .. } => { + conversion::insert_record(&mut column_values, &cols, &vals)? + } + _ => { + let key = "0".to_string(); + conversion::insert_value(value, key, &mut column_values)? + } + } + } + + conversion::from_parsed_columns(column_values) + } + + pub fn try_from_series(columns: Vec) -> Result { + let dataframe = DataFrame::new(columns) + .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?; + + Ok(Self::new(dataframe)) + } + + pub fn try_from_columns(columns: Vec) -> Result { + let mut column_values: ColumnMap = IndexMap::new(); + + for column in columns { + let name = column.name().to_string(); + for value in column { + conversion::insert_value(value, name.clone(), &mut column_values)?; + } + } + + conversion::from_parsed_columns(column_values) + } + + pub fn column(&self, column: &str, span: Span) -> Result { + let s = self.0.column(column).map_err(|_| { + let possibilities = self + .0 + .get_column_names() + .iter() + .map(|name| name.to_string()) + .collect::>(); + + let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string()); + ShellError::DidYouMean(option, span) + })?; + + let dataframe = DataFrame::new(vec![s.clone()]) + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + Ok(Self(dataframe)) + } + + pub fn is_series(&self) -> bool { + self.0.width() == 1 + } + + pub fn as_series(&self, _span: Span) -> Result { + if !self.is_series() { + return Err(ShellError::InternalError( + "DataFrame cannot be used as Series".into(), + )); + } + + let series = self + .0 + .get_columns() + .get(0) + .expect("We have already checked that the width is 1"); + + Ok(series.clone()) + } + + pub fn get_value(&self, row: usize, span: Span) -> Result { + let series = self.as_series(Span::unknown())?; + let column = conversion::create_column(&series, row, row + 1)?; + + if column.len() == 0 { + Err(ShellError::AccessBeyondEnd(series.len(), span)) + } else { + let value = column + .into_iter() + .next() + .expect("already checked there is a value"); + Ok(value) + } + } + + // Print is made out a head and if the dataframe is too large, then a tail + pub fn print(&self) -> Result, ShellError> { + let df = &self.0; + let size: usize = 20; + + if df.height() > size { + let sample_size = size / 2; + let mut values = self.head(Some(sample_size))?; + conversion::add_separator(&mut values, df); + let remaining = df.height() - sample_size; + let tail_size = remaining.min(sample_size); + let mut tail_values = self.tail(Some(tail_size))?; + values.append(&mut tail_values); + + Ok(values) + } else { + Ok(self.head(Some(size))?) + } + } + + pub fn head(&self, rows: Option) -> Result, ShellError> { + let to_row = rows.unwrap_or(5); + let values = self.to_rows(0, to_row)?; + + Ok(values) + } + + pub fn tail(&self, rows: Option) -> Result, ShellError> { + let df = &self.0; + let to_row = df.height(); + let size = rows.unwrap_or(5); + let from_row = to_row.saturating_sub(size); + + let values = self.to_rows(from_row, to_row)?; + + Ok(values) + } + + pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result, ShellError> { + let df = &self.0; + let upper_row = to_row.min(df.height()); + + let mut size: usize = 0; + let columns = self + .0 + .get_columns() + .iter() + .map( + |col| match conversion::create_column(col, from_row, upper_row) { + Ok(col) => { + size = col.len(); + Ok(col) + } + Err(e) => Err(e), + }, + ) + .collect::, ShellError>>()?; + + let mut iterators = columns + .into_iter() + .map(|col| (col.name().to_string(), col.into_iter())) + .collect::)>>(); + + let values = (0..size) + .into_iter() + .map(|_| { + let mut cols = vec![]; + let mut vals = vec![]; + + for (name, col) in &mut iterators { + cols.push(name.clone()); + + match col.next() { + Some(v) => vals.push(v), + None => vals.push(Value::Nothing { + span: Span::unknown(), + }), + }; + } + + Value::Record { + cols, + vals, + span: Span::unknown(), + } + }) + .collect::>(); + + Ok(values) + } +} diff --git a/crates/nu-dataframe/src/operations.rs b/crates/nu-dataframe/src/operations.rs new file mode 100644 index 0000000000..bcd57f3adc --- /dev/null +++ b/crates/nu-dataframe/src/operations.rs @@ -0,0 +1,220 @@ +use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value}; +use polars::prelude::{DataFrame, Series}; + +use crate::between_values::{ + between_dataframes, compute_between_series, compute_series_single_value, +}; + +use super::NuDataFrame; + +pub enum Axis { + Row, + Column, +} + +impl Axis { + pub fn try_from_str(axis: &str, span: Span) -> Result { + match axis { + "row" => Ok(Axis::Row), + "col" => Ok(Axis::Column), + _ => Err(ShellError::DidYouMean("'row' or 'col'".into(), span)), + } + } +} + +impl NuDataFrame { + pub fn compute_with_value( + &self, + lhs_span: Span, + operator: Operator, + op_span: Span, + right: &Value, + ) -> Result { + match right { + Value::CustomValue { + val: rhs, + span: rhs_span, + } => { + let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { + ShellError::DowncastNotPossible( + "Unable to create dataframe".to_string(), + *rhs_span, + ) + })?; + + let operation_span = span(&[lhs_span, *rhs_span]); + match (self.is_series(), rhs.is_series()) { + (true, true) => { + let lhs = &self + .as_series(lhs_span) + .expect("Already checked that is a series"); + let rhs = &rhs + .as_series(*rhs_span) + .expect("Already checked that is a series"); + + if lhs.dtype() != rhs.dtype() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("datatype {}", lhs.dtype()), + left_span: lhs_span, + right_message: format!("datatype {}", lhs.dtype()), + right_span: *rhs_span, + }); + } + + if lhs.len() != rhs.len() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("len {}", lhs.len()), + left_span: lhs_span, + right_message: format!("len {}", rhs.len()), + right_span: *rhs_span, + }); + } + + let op = Spanned { + item: operator, + span: op_span, + }; + + compute_between_series( + op, + NuDataFrame::default_value(lhs_span), + lhs, + right, + rhs, + operation_span, + ) + } + _ => { + if self.0.height() != rhs.0.height() { + return Err(ShellError::IncompatibleParameters { + left_message: format!("rows {}", self.0.height()), + left_span: lhs_span, + right_message: format!("rows {}", rhs.0.height()), + right_span: *rhs_span, + }); + } + + let op = Spanned { + item: operator, + span: op_span, + }; + + between_dataframes( + op, + NuDataFrame::default_value(lhs_span), + self, + right, + rhs, + operation_span, + ) + } + } + } + _ => { + let op = Spanned { + item: operator, + span: op_span, + }; + + compute_series_single_value( + op, + self, + &lhs_span, + NuDataFrame::default_value(lhs_span), + right, + ) + } + } + } + + pub fn append_df( + &self, + other: &NuDataFrame, + axis: Axis, + span: Span, + ) -> Result { + match axis { + Axis::Row => { + let mut columns: Vec<&str> = Vec::new(); + + let new_cols = self + .0 + .get_columns() + .iter() + .chain(other.0.get_columns()) + .map(|s| { + let name = if columns.contains(&s.name()) { + format!("{}_{}", s.name(), "x") + } else { + columns.push(s.name()); + s.name().to_string() + }; + + let mut series = s.clone(); + series.rename(&name); + series + }) + .collect::>(); + + let df_new = DataFrame::new(new_cols) + .map_err(|e| ShellError::InternalError(e.to_string()))?; + + Ok(NuDataFrame::new(df_new)) + } + Axis::Column => { + if self.0.width() != other.0.width() { + return Err(ShellError::IncompatibleParametersSingle( + "Dataframes with different number of columns".into(), + span, + )); + } + + if !self + .0 + .get_column_names() + .iter() + .all(|col| other.0.get_column_names().contains(col)) + { + return Err(ShellError::IncompatibleParametersSingle( + "Dataframes with different columns names".into(), + span, + )); + } + + let new_cols = self + .0 + .get_columns() + .iter() + .map(|s| { + let other_col = other + .0 + .column(s.name()) + .expect("Already checked that dataframes have same columns"); + + let mut tmp = s.clone(); + let res = tmp.append(other_col); + + match res { + Ok(s) => Ok(s.clone()), + Err(e) => Err({ + ShellError::InternalError(format!( + "Unable to append dataframes: {}", + e + )) + }), + } + }) + .collect::, ShellError>>()?; + + let df_new = DataFrame::new(new_cols).map_err(|e| { + ShellError::InternalError(format!( + "Unable to append dataframes: {}", + e.to_string() + )) + })?; + + Ok(NuDataFrame::new(df_new)) + } + } + } +} diff --git a/crates/nu-engine/src/from_value.rs b/crates/nu-engine/src/from_value.rs index 281796a7ce..54dfc100bc 100644 --- a/crates/nu-engine/src/from_value.rs +++ b/crates/nu-engine/src/from_value.rs @@ -1,5 +1,8 @@ // use std::path::PathBuf; +use std::path::PathBuf; +use std::str::FromStr; + use chrono::{DateTime, FixedOffset}; // use nu_path::expand_path; use nu_protocol::ast::{CellPath, PathMember}; @@ -92,6 +95,47 @@ impl FromValue for f64 { } } +impl FromValue for Spanned { + fn from_value(v: &Value) -> Result { + match v { + Value::Int { val, span } => Ok(Spanned { + item: *val as usize, + span: *span, + }), + Value::Filesize { val, span } => Ok(Spanned { + item: *val as usize, + span: *span, + }), + Value::Duration { val, span } => Ok(Spanned { + item: *val as usize, + span: *span, + }), + + v => Err(ShellError::CantConvert( + "integer".into(), + v.get_type().to_string(), + v.span()?, + )), + } + } +} + +impl FromValue for usize { + fn from_value(v: &Value) -> Result { + match v { + Value::Int { val, .. } => Ok(*val as usize), + Value::Filesize { val, .. } => Ok(*val as usize), + Value::Duration { val, .. } => Ok(*val as usize), + + v => Err(ShellError::CantConvert( + "integer".into(), + v.get_type().to_string(), + v.span()?, + )), + } + } +} + impl FromValue for String { fn from_value(v: &Value) -> Result { // FIXME: we may want to fail a little nicer here @@ -126,6 +170,30 @@ impl FromValue for Spanned { } } +impl FromValue for Vec { + fn from_value(v: &Value) -> Result { + // FIXME: we may want to fail a little nicer here + match v { + Value::List { vals, .. } => vals + .iter() + .map(|val| match val { + Value::String { val, .. } => Ok(val.clone()), + c => Err(ShellError::CantConvert( + "string".into(), + c.get_type().to_string(), + c.span()?, + )), + }) + .collect::, ShellError>>(), + v => Err(ShellError::CantConvert( + "string".into(), + v.get_type().to_string(), + v.span()?, + )), + } + } +} + impl FromValue for CellPath { fn from_value(v: &Value) -> Result { let span = v.span()?; @@ -253,6 +321,23 @@ impl FromValue for Vec { } } +impl FromValue for Spanned { + fn from_value(v: &Value) -> Result { + match v { + Value::String { val, span } => Ok(Spanned { + item: PathBuf::from_str(val) + .map_err(|err| ShellError::FileNotFoundCustom(err.to_string(), *span))?, + span: *span, + }), + v => Err(ShellError::CantConvert( + "range".into(), + v.get_type().to_string(), + v.span()?, + )), + } + } +} + // impl FromValue for Dictionary { // fn from_value(v: &Value) -> Result { // match v { diff --git a/crates/nu-plugin/src/serializers/signature.rs b/crates/nu-plugin/src/serializers/signature.rs index e54c1ec39a..539e23ca8f 100644 --- a/crates/nu-plugin/src/serializers/signature.rs +++ b/crates/nu-plugin/src/serializers/signature.rs @@ -22,6 +22,7 @@ pub(crate) fn serialize_signature(signature: &Signature, mut builder: signature: Category::Strings => builder.set_category(PluginCategory::Strings), Category::System => builder.set_category(PluginCategory::System), Category::Viewers => builder.set_category(PluginCategory::Viewers), + _ => builder.set_category(PluginCategory::Default), } // Serializing list of required arguments diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index 3656c0d456..84767c5bc7 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -14,9 +14,12 @@ chrono-humanize = "0.2.1" byte-unit = "4.0.9" im = "15.0.0" serde_json = { version = "1.0", optional = true } +nu-json = { path = "../nu-json" } +typetag = "0.1.8" [features] plugin = ["serde_json"] +custom = [] [dev-dependencies] serde_json = "1.0" diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index d2328c82da..f8d81b6faf 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -27,3 +27,6 @@ pub use span::*; pub use syntax_shape::*; pub use ty::*; pub use value::*; + +#[cfg(feature = "custom")] +pub use value::CustomValue; diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index ddf83e0efa..cdc1a8427d 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -203,9 +203,13 @@ pub enum ShellError { #[diagnostic(code(nu::shell::name_not_found), url(docsrs))] DidYouMean(String, #[label("did you mean '{0}'?")] Span), - #[error("Non-UTF8 string.")] + #[error("Non-UTF8 string")] #[diagnostic(code(nu::parser::non_utf8), url(docsrs))] NonUtf8(#[label = "non-UTF8 string"] Span), + + #[error("Casting error")] + #[diagnostic(code(nu::parser::downcast_not_possible), url(docsrs))] + DowncastNotPossible(String, #[label("{0}")] Span), } impl From for ShellError { diff --git a/crates/nu-protocol/src/signature.rs b/crates/nu-protocol/src/signature.rs index 44b564a52f..81d8152a1e 100644 --- a/crates/nu-protocol/src/signature.rs +++ b/crates/nu-protocol/src/signature.rs @@ -45,6 +45,7 @@ pub enum Category { Strings, System, Viewers, + Custom(String), } impl std::fmt::Display for Category { @@ -63,6 +64,7 @@ impl std::fmt::Display for Category { Category::Strings => "strings", Category::System => "system", Category::Viewers => "viewers", + Category::Custom(name) => name, }; write!(f, "{}", msg) diff --git a/crates/nu-protocol/src/ty.rs b/crates/nu-protocol/src/ty.rs index 36011e50ce..27893d26f4 100644 --- a/crates/nu-protocol/src/ty.rs +++ b/crates/nu-protocol/src/ty.rs @@ -23,6 +23,7 @@ pub enum Type { Unknown, Error, Binary, + Custom, } impl Display for Type { @@ -55,6 +56,7 @@ impl Display for Type { Type::Unknown => write!(f, "unknown"), Type::Error => write!(f, "error"), Type::Binary => write!(f, "binary"), + Type::Custom => write!(f, "custom"), } } } diff --git a/crates/nu-protocol/src/value/custom_value.rs b/crates/nu-protocol/src/value/custom_value.rs new file mode 100644 index 0000000000..c11d3526ba --- /dev/null +++ b/crates/nu-protocol/src/value/custom_value.rs @@ -0,0 +1,42 @@ +use std::fmt; + +use crate::{ast::Operator, ShellError, Span, Value}; + +// Trait definition for a custom value +#[typetag::serde(tag = "type")] +pub trait CustomValue: fmt::Debug + Send + Sync { + fn clone_value(&self, span: Span) -> Value; + + // Define string representation of the custom value + fn value_string(&self) -> String; + + // Converts the custom value to a base nushell value + // This is used to represent the custom value using the table representations + // That already exist in nushell + fn to_base_value(&self, span: Span) -> Result; + + // Json representation of custom value + fn to_json(&self) -> nu_json::Value { + nu_json::Value::Null + } + + // Any representation used to downcast object to its original type + fn as_any(&self) -> &dyn std::any::Any; + + // Follow cell path functions + fn follow_path_int(&self, count: usize, span: Span) -> Result; + fn follow_path_string(&self, column_name: String, span: Span) -> Result; + + // Definition of an operation between the object that implements the trait + // and another Value. + // The Operator enum is used to indicate the expected operation + fn operation( + &self, + _lhs_span: Span, + operator: Operator, + op: Span, + _right: &Value, + ) -> Result { + Err(ShellError::UnsupportedOperator(operator, op)) + } +} diff --git a/crates/nu-protocol/src/value/from.rs b/crates/nu-protocol/src/value/from.rs new file mode 100644 index 0000000000..c5e3c253b3 --- /dev/null +++ b/crates/nu-protocol/src/value/from.rs @@ -0,0 +1,115 @@ +use crate::{ShellError, Span, Value}; + +impl From for Value { + fn from(val: u8) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: u16) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: u32) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: u64) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: i8) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: i16) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: i32) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: i64) -> Self { + Value::Int { + val: val as i64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: f32) -> Self { + Value::Float { + val: val as f64, + span: Span::unknown(), + } + } +} + +impl From for Value { + fn from(val: f64) -> Self { + Value::Float { + val: val as f64, + span: Span::unknown(), + } + } +} + +impl Value { + pub fn as_f64(&self) -> Result { + match self { + Value::Float { val, .. } => Ok(*val), + x => Err(ShellError::CantConvert( + "f64".into(), + x.get_type().to_string(), + self.span()?, + )), + } + } + + pub fn as_i64(&self) -> Result { + match self { + Value::Int { val, .. } => Ok(*val), + x => Err(ShellError::CantConvert( + "rf64".into(), + x.get_type().to_string(), + self.span()?, + )), + } + } +} diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 03325e096b..720581699d 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -1,3 +1,5 @@ +mod custom_value; +mod from; mod range; mod stream; mod unit; @@ -15,10 +17,16 @@ use std::{cmp::Ordering, fmt::Debug}; use crate::ast::{CellPath, PathMember}; use crate::{did_you_mean, span, BlockId, Config, Span, Spanned, Type}; +#[cfg(feature = "custom")] +use crate::ast::Operator; + +#[cfg(feature = "custom")] +pub use custom_value::CustomValue; + use crate::ShellError; /// Core structured values that pass through the pipeline in engine-q -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub enum Value { Bool { val: bool, @@ -79,6 +87,77 @@ pub enum Value { val: CellPath, span: Span, }, + #[cfg(feature = "custom")] + CustomValue { + val: Box, + span: Span, + }, +} + +impl Clone for Value { + fn clone(&self) -> Self { + match self { + Value::Bool { val, span } => Value::Bool { + val: *val, + span: *span, + }, + Value::Int { val, span } => Value::Int { + val: *val, + span: *span, + }, + Value::Filesize { val, span } => Value::Filesize { + val: *val, + span: *span, + }, + Value::Duration { val, span } => Value::Duration { + val: *val, + span: *span, + }, + Value::Date { val, span } => Value::Date { + val: *val, + span: *span, + }, + Value::Range { val, span } => Value::Range { + val: val.clone(), + span: *span, + }, + Value::Float { val, span } => Value::Float { + val: *val, + span: *span, + }, + Value::String { val, span } => Value::String { + val: val.clone(), + span: *span, + }, + Value::Record { cols, vals, span } => Value::Record { + cols: cols.clone(), + vals: vals.clone(), + span: *span, + }, + Value::List { vals, span } => Value::List { + vals: vals.clone(), + span: *span, + }, + Value::Block { val, span } => Value::Block { + val: *val, + span: *span, + }, + Value::Nothing { span } => Value::Nothing { span: *span }, + Value::Error { error } => Value::Error { + error: error.clone(), + }, + Value::Binary { val, span } => Value::Binary { + val: val.clone(), + span: *span, + }, + Value::CellPath { val, span } => Value::CellPath { + val: val.clone(), + span: *span, + }, + #[cfg(feature = "custom")] + Value::CustomValue { val, span } => val.clone_value(*span), + } + } } impl Value { @@ -144,6 +223,8 @@ impl Value { Value::Nothing { span, .. } => Ok(*span), Value::Binary { span, .. } => Ok(*span), Value::CellPath { span, .. } => Ok(*span), + #[cfg(feature = "custom")] + Value::CustomValue { span, .. } => Ok(*span), } } @@ -165,6 +246,8 @@ impl Value { Value::Error { .. } => {} Value::Binary { span, .. } => *span = new_span, Value::CellPath { span, .. } => *span = new_span, + #[cfg(feature = "custom")] + Value::CustomValue { span, .. } => *span = new_span, } self @@ -193,6 +276,8 @@ impl Value { Value::Error { .. } => Type::Error, Value::Binary { .. } => Type::Binary, Value::CellPath { .. } => Type::CellPath, + #[cfg(feature = "custom")] + Value::CustomValue { .. } => Type::Custom, } } @@ -233,6 +318,8 @@ impl Value { Value::Error { error } => format!("{:?}", error), Value::Binary { val, .. } => format!("{:?}", val), Value::CellPath { val, .. } => val.into_string(), + #[cfg(feature = "custom")] + Value::CustomValue { val, .. } => val.value_string(), } } @@ -273,6 +360,8 @@ impl Value { Value::Error { error } => format!("{:?}", error), Value::Binary { val, .. } => format!("{:?}", val), Value::CellPath { val, .. } => val.into_string(), + #[cfg(feature = "custom")] + Value::CustomValue { val, .. } => val.value_string(), } } @@ -318,6 +407,10 @@ impl Value { return Err(ShellError::AccessBeyondEndOfStream(*origin_span)); } } + #[cfg(feature = "custom")] + Value::CustomValue { val, .. } => { + current = val.follow_path_int(*count, *origin_span)?; + } x => { return Err(ShellError::IncompatiblePathAccess( format!("{}", x.get_type()), @@ -365,6 +458,10 @@ impl Value { span: *span, }; } + #[cfg(feature = "custom")] + Value::CustomValue { val, .. } => { + current = val.follow_path_string(column_name.clone(), *origin_span)?; + } x => { return Err(ShellError::IncompatiblePathAccess( format!("{}", x.get_type()), @@ -627,6 +724,11 @@ impl Value { } } + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Plus, op, rhs) + } + _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -692,6 +794,11 @@ impl Value { } } + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Minus, op, rhs) + } + _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -727,6 +834,10 @@ impl Value { val: lhs * rhs, span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Multiply, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, @@ -788,6 +899,10 @@ impl Value { Err(ShellError::DivisionByZero(op)) } } + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Divide, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, @@ -801,6 +916,11 @@ impl Value { pub fn lt(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::LessThan, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: matches!(ordering, Ordering::Less), @@ -818,6 +938,11 @@ impl Value { pub fn lte(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::LessThanOrEqual, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: matches!(ordering, Ordering::Less | Ordering::Equal), @@ -835,6 +960,11 @@ impl Value { pub fn gt(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::GreaterThan, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: matches!(ordering, Ordering::Greater), @@ -852,6 +982,11 @@ impl Value { pub fn gte(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::GreaterThanOrEqual, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: matches!(ordering, Ordering::Greater | Ordering::Equal), @@ -869,6 +1004,11 @@ impl Value { pub fn eq(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::Equal, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: matches!(ordering, Ordering::Equal), @@ -886,6 +1026,11 @@ impl Value { pub fn ne(&self, op: Span, rhs: &Value) -> Result { let span = span(&[self.span()?, rhs.span()?]); + #[cfg(feature = "custom")] + if let (Value::CustomValue { val: lhs, span }, rhs) = (self, rhs) { + return lhs.operation(*span, Operator::NotEqual, op, rhs); + } + match self.partial_cmp(rhs) { Some(ordering) => Ok(Value::Bool { val: !matches!(ordering, Ordering::Equal), @@ -921,6 +1066,10 @@ impl Value { val: rhs.contains(lhs), span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::In, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -951,6 +1100,10 @@ impl Value { val: !rhs.contains(lhs), span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::NotIn, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -969,6 +1122,10 @@ impl Value { val: lhs.contains(rhs), span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Contains, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -987,6 +1144,10 @@ impl Value { val: !lhs.contains(rhs), span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::NotContains, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -1041,6 +1202,10 @@ impl Value { Err(ShellError::DivisionByZero(op)) } } + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Modulo, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, @@ -1060,6 +1225,10 @@ impl Value { val: *lhs && *rhs, span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::And, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -1078,6 +1247,10 @@ impl Value { val: *lhs || *rhs, span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Or, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op, lhs_ty: self.get_type(), @@ -1114,6 +1287,10 @@ impl Value { val: lhs.powf(*rhs), span, }), + #[cfg(feature = "custom")] + (Value::CustomValue { val: lhs, span }, rhs) => { + lhs.operation(*span, Operator::Pow, op, rhs) + } _ => Err(ShellError::OperatorMismatch { op_span: op,