Polars AWS S3 support (#14648)

# Description

Provides Amazon S3 support.

- Utilizes your existing AWS cli configuration. 
- Supports AWS SSO
- Supports
[gimme-aws-creds](https://github.com/Nike-Inc/gimme-aws-creds).
- respects the settings of AWS_PROFILE environment variable for
selecting profile config
- AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_REGION environment
variables for configuring without an AWS config

Usage:
```nushell
polars open s3://bucket/and/path.parquet
```

Supports:
- CSV
- Parquet
- NDJSON / json lines
- Arrow

Doesn't support:
- eager dataframes
-  Avro
- JSON
This commit is contained in:
Jack Wright 2024-12-25 04:15:50 -08:00 committed by GitHub
parent f2dcae570c
commit 23ba613b00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 843 additions and 103 deletions

594
Cargo.lock generated
View File

@ -339,6 +339,303 @@ dependencies = [
"snap",
]
[[package]]
name = "aws-config"
version = "1.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-sdk-sso",
"aws-sdk-ssooidc",
"aws-sdk-sts",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"hex",
"http 0.2.12",
"ring",
"time",
"tokio",
"tracing",
"url",
"zeroize",
]
[[package]]
name = "aws-credential-types"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
"aws-smithy-types",
"zeroize",
]
[[package]]
name = "aws-runtime"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468"
dependencies = [
"aws-credential-types",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"http-body 0.4.6",
"once_cell",
"percent-encoding",
"pin-project-lite",
"tracing",
"uuid",
]
[[package]]
name = "aws-sdk-sso"
version = "1.49.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"http 0.2.12",
"once_cell",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-ssooidc"
version = "1.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-types",
"bytes",
"http 0.2.12",
"once_cell",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sdk-sts"
version = "1.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ada54e5f26ac246dc79727def52f7f8ed38915cb47781e2a72213957dc3a7d5"
dependencies = [
"aws-credential-types",
"aws-runtime",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-json",
"aws-smithy-query",
"aws-smithy-runtime",
"aws-smithy-runtime-api",
"aws-smithy-types",
"aws-smithy-xml",
"aws-types",
"http 0.2.12",
"once_cell",
"regex-lite",
"tracing",
]
[[package]]
name = "aws-sigv4"
version = "1.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1"
dependencies = [
"aws-credential-types",
"aws-smithy-http",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"form_urlencoded",
"hex",
"hmac",
"http 0.2.12",
"http 1.2.0",
"once_cell",
"percent-encoding",
"sha2",
"time",
"tracing",
]
[[package]]
name = "aws-smithy-async"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
dependencies = [
"futures-util",
"pin-project-lite",
"tokio",
]
[[package]]
name = "aws-smithy-http"
version = "0.60.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6"
dependencies = [
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"bytes-utils",
"futures-core",
"http 0.2.12",
"http-body 0.4.6",
"once_cell",
"percent-encoding",
"pin-project-lite",
"pin-utils",
"tracing",
]
[[package]]
name = "aws-smithy-json"
version = "0.60.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-query"
version = "0.60.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
dependencies = [
"aws-smithy-types",
"urlencoding",
]
[[package]]
name = "aws-smithy-runtime"
version = "1.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be28bd063fa91fd871d131fc8b68d7cd4c5fa0869bea68daca50dcb1cbd76be2"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
"fastrand",
"h2 0.3.26",
"http 0.2.12",
"http-body 0.4.6",
"http-body 1.0.1",
"httparse",
"hyper 0.14.31",
"hyper-rustls 0.24.2",
"once_cell",
"pin-project-lite",
"pin-utils",
"rustls 0.21.12",
"tokio",
"tracing",
]
[[package]]
name = "aws-smithy-runtime-api"
version = "1.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
"bytes",
"http 0.2.12",
"http 1.2.0",
"pin-project-lite",
"tokio",
"tracing",
"zeroize",
]
[[package]]
name = "aws-smithy-types"
version = "1.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
dependencies = [
"base64-simd",
"bytes",
"bytes-utils",
"http 0.2.12",
"http 1.2.0",
"http-body 0.4.6",
"http-body 1.0.1",
"http-body-util",
"itoa",
"num-integer",
"pin-project-lite",
"pin-utils",
"ryu",
"serde",
"time",
]
[[package]]
name = "aws-smithy-xml"
version = "0.60.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc"
dependencies = [
"xmlparser",
]
[[package]]
name = "aws-types"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
"aws-smithy-runtime-api",
"aws-smithy-types",
"rustc_version",
"tracing",
]
[[package]]
name = "backtrace"
version = "0.3.74"
@ -363,12 +660,28 @@ dependencies = [
"backtrace",
]
[[package]]
name = "base64"
version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "base64-simd"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
dependencies = [
"outref",
"vsimd",
]
[[package]]
name = "bindgen"
version = "0.70.1"
@ -598,6 +911,16 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
[[package]]
name = "bytes-utils"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
dependencies = [
"bytes",
"either",
]
[[package]]
name = "bytesize"
version = "1.3.0"
@ -690,7 +1013,7 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1f927b07c74ba84c7e5fe4db2baeb3e996ab2688992e39ac68ce3220a677c7e"
dependencies = [
"base64",
"base64 0.22.1",
"encoding_rs",
]
@ -1307,6 +1630,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
@ -1907,6 +2231,25 @@ dependencies = [
"scroll",
]
[[package]]
name = "h2"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
dependencies = [
"bytes",
"fnv",
"futures-core",
"futures-sink",
"futures-util",
"http 0.2.12",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "h2"
version = "0.4.7"
@ -1918,7 +2261,7 @@ dependencies = [
"fnv",
"futures-core",
"futures-sink",
"http",
"http 1.2.0",
"indexmap",
"slab",
"tokio",
@ -2026,6 +2369,15 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "home"
version = "0.5.9"
@ -2063,6 +2415,17 @@ dependencies = [
"syn 2.0.90",
]
[[package]]
name = "http"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
dependencies = [
"bytes",
"fnv",
"itoa",
]
[[package]]
name = "http"
version = "1.2.0"
@ -2074,6 +2437,17 @@ dependencies = [
"itoa",
]
[[package]]
name = "http-body"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
dependencies = [
"bytes",
"http 0.2.12",
"pin-project-lite",
]
[[package]]
name = "http-body"
version = "1.0.1"
@ -2081,7 +2455,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
"http",
"http 1.2.0",
]
[[package]]
@ -2092,8 +2466,8 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
dependencies = [
"bytes",
"futures-util",
"http",
"http-body",
"http 1.2.0",
"http-body 1.0.1",
"pin-project-lite",
]
@ -2127,6 +2501,30 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "hyper"
version = "0.14.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85"
dependencies = [
"bytes",
"futures-channel",
"futures-core",
"futures-util",
"h2 0.3.26",
"http 0.2.12",
"http-body 0.4.6",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"socket2",
"tokio",
"tower-service",
"tracing",
"want",
]
[[package]]
name = "hyper"
version = "1.5.1"
@ -2136,9 +2534,9 @@ dependencies = [
"bytes",
"futures-channel",
"futures-util",
"h2",
"http",
"http-body",
"h2 0.4.7",
"http 1.2.0",
"http-body 1.0.1",
"httparse",
"httpdate",
"itoa",
@ -2148,6 +2546,22 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
dependencies = [
"futures-util",
"http 0.2.12",
"hyper 0.14.31",
"log",
"rustls 0.21.12",
"rustls-native-certs 0.6.3",
"tokio",
"tokio-rustls 0.24.1",
]
[[package]]
name = "hyper-rustls"
version = "0.27.3"
@ -2155,14 +2569,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333"
dependencies = [
"futures-util",
"http",
"hyper",
"http 1.2.0",
"hyper 1.5.1",
"hyper-util",
"rustls",
"rustls-native-certs",
"rustls 0.23.20",
"rustls-native-certs 0.8.1",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.1",
"tower-service",
]
@ -2175,9 +2589,9 @@ dependencies = [
"bytes",
"futures-channel",
"futures-util",
"http",
"http-body",
"hyper",
"http 1.2.0",
"http-body 1.0.1",
"hyper 1.5.1",
"pin-project-lite",
"socket2",
"tokio",
@ -2995,10 +3409,10 @@ dependencies = [
"assert-json-diff",
"bytes",
"futures-util",
"http",
"http-body",
"http 1.2.0",
"http-body 1.0.1",
"http-body-util",
"hyper",
"hyper 1.5.1",
"hyper-util",
"log",
"rand",
@ -3318,7 +3732,7 @@ name = "nu-command"
version = "0.101.1"
dependencies = [
"alphanumeric-sort",
"base64",
"base64 0.22.1",
"bracoxide",
"brotli",
"byteorder",
@ -3803,6 +4217,8 @@ dependencies = [
name = "nu_plugin_polars"
version = "0.101.1"
dependencies = [
"aws-config",
"aws-credential-types",
"chrono",
"chrono-tz 0.10.0",
"env_logger 0.11.5",
@ -3821,6 +4237,7 @@ dependencies = [
"nu-protocol",
"nu-utils",
"num",
"object_store",
"polars",
"polars-arrow",
"polars-io",
@ -3830,7 +4247,9 @@ dependencies = [
"serde",
"sqlparser",
"tempfile",
"tokio",
"typetag",
"url",
"uuid",
]
@ -4089,12 +4508,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3"
dependencies = [
"async-trait",
"base64",
"base64 0.22.1",
"bytes",
"chrono",
"futures",
"humantime",
"hyper",
"hyper 1.5.1",
"itertools 0.13.0",
"md-5",
"parking_lot",
@ -4236,6 +4655,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "outref"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "owo-colors"
version = "4.1.0"
@ -4497,7 +4922,7 @@ version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016"
dependencies = [
"base64",
"base64 0.22.1",
"indexmap",
"quick-xml 0.32.0",
"serde",
@ -4675,6 +5100,7 @@ dependencies = [
"blake3",
"bytes",
"chrono",
"chrono-tz 0.8.6",
"fast-float",
"flate2",
"fs4",
@ -4720,6 +5146,7 @@ checksum = "d5c8c057ef04feaf34b6ce52096bdea3a766fa4725f50442078c8a4ee86397bf"
dependencies = [
"ahash 0.8.11",
"chrono",
"chrono-tz 0.8.6",
"fallible-streaming-iterator",
"hashbrown 0.15.2",
"indexmap",
@ -4741,6 +5168,7 @@ checksum = "4a8ca74f42e7b47cad241b36b98d991cc7fbb51b8d0695a055eb937588d1f310"
dependencies = [
"ahash 0.8.11",
"bitflags 2.6.0",
"futures",
"memchr",
"once_cell",
"polars-arrow",
@ -4756,6 +5184,7 @@ dependencies = [
"polars-time",
"polars-utils",
"rayon",
"tokio",
"version_check",
]
@ -4790,7 +5219,7 @@ checksum = "035c800fbe5bbd820afeb8313713ed345853bb014e0f821a4025d40cf0d60e1a"
dependencies = [
"ahash 0.8.11",
"argminmax",
"base64",
"base64 0.22.1",
"bytemuck",
"chrono",
"chrono-tz 0.8.6",
@ -4828,7 +5257,7 @@ checksum = "91dcf1d9f048079376949eaf2e24e240b313ff4a102fb83b57c9a5f807cdca52"
dependencies = [
"ahash 0.8.11",
"async-stream",
"base64",
"base64 0.22.1",
"brotli",
"bytemuck",
"ethnum",
@ -4881,6 +5310,7 @@ dependencies = [
"polars-row",
"polars-utils",
"rayon",
"tokio",
"uuid",
"version_check",
]
@ -5385,7 +5815,7 @@ dependencies = [
"quinn-proto",
"quinn-udp",
"rustc-hash 2.1.0",
"rustls",
"rustls 0.23.20",
"socket2",
"thiserror 2.0.6",
"tokio",
@ -5403,7 +5833,7 @@ dependencies = [
"rand",
"ring",
"rustc-hash 2.1.0",
"rustls",
"rustls 0.23.20",
"rustls-pki-types",
"slab",
"thiserror 2.0.6",
@ -5648,6 +6078,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-lite"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
[[package]]
name = "regex-syntax"
version = "0.8.5"
@ -5675,16 +6111,16 @@ version = "0.12.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f"
dependencies = [
"base64",
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"h2",
"http",
"http-body",
"h2 0.4.7",
"http 1.2.0",
"http-body 1.0.1",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper 1.5.1",
"hyper-rustls 0.27.3",
"hyper-util",
"ipnet",
"js-sys",
@ -5694,16 +6130,16 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-native-certs",
"rustls-pemfile",
"rustls 0.23.20",
"rustls-native-certs 0.8.1",
"rustls-pemfile 2.2.0",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.1",
"tokio-util",
"tower-service",
"url",
@ -5720,7 +6156,7 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc36545d1021456a751b573517cb52e8c339b2f662e6b2778ef629282678de29"
dependencies = [
"base64",
"base64 0.22.1",
"charset",
"chumsky",
"memchr",
@ -5959,6 +6395,18 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "rustls"
version = "0.21.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [
"log",
"ring",
"rustls-webpki 0.101.7",
"sct",
]
[[package]]
name = "rustls"
version = "0.23.20"
@ -5968,11 +6416,23 @@ dependencies = [
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki",
"rustls-webpki 0.102.8",
"subtle",
"zeroize",
]
[[package]]
name = "rustls-native-certs"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00"
dependencies = [
"openssl-probe",
"rustls-pemfile 1.0.4",
"schannel",
"security-framework 2.11.1",
]
[[package]]
name = "rustls-native-certs"
version = "0.8.1"
@ -5985,6 +6445,15 @@ dependencies = [
"security-framework 3.0.1",
]
[[package]]
name = "rustls-pemfile"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
dependencies = [
"base64 0.21.7",
]
[[package]]
name = "rustls-pemfile"
version = "2.2.0"
@ -6003,6 +6472,16 @@ dependencies = [
"web-time",
]
[[package]]
name = "rustls-webpki"
version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "rustls-webpki"
version = "0.102.8"
@ -6099,6 +6578,16 @@ dependencies = [
"syn 2.0.90",
]
[[package]]
name = "sct"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "sdd"
version = "3.0.4"
@ -6976,6 +7465,7 @@ dependencies = [
"mio 1.0.3",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys 0.52.0",
@ -6992,13 +7482,23 @@ dependencies = [
"syn 2.0.90",
]
[[package]]
name = "tokio-rustls"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.12",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
dependencies = [
"rustls",
"rustls 0.23.20",
"tokio",
]
@ -7273,7 +7773,7 @@ version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
dependencies = [
"base64",
"base64 0.22.1",
"encoding_rs",
"flate2",
"log",
@ -7498,6 +7998,12 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "vsimd"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
[[package]]
name = "vte"
version = "0.10.1"
@ -8222,6 +8728,12 @@ dependencies = [
"markup5ever 0.12.1",
]
[[package]]
name = "xmlparser"
version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
[[package]]
name = "xxhash-rust"
version = "0.8.12"

View File

@ -31,7 +31,7 @@ mimalloc = { version = "0.1.42" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.49"}
polars-io = { version = "0.44", features = ["avro"]}
polars-io = { version = "0.44", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.44"}
polars-ops = { version = "0.44", features = ["pivot"]}
polars-plan = { version = "0.44", features = ["regex"]}
@ -44,10 +44,18 @@ uuid = { version = "1.11", features = ["v4", "serde"] }
# Do to a compile error with polars, this included to force the raw dependency
hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] }
# Cloud support
aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2"
tokio = { version = "1.41", features = ["full"] }
object_store = { version = "0.10", default-features = false }
url.workspace = true
[dependencies.polars]
features = [
"arg_where",
"checked_arithmetic",
"cloud",
"concat_str",
"cross_join",
"csv",
@ -77,6 +85,7 @@ features = [
"strings",
"string_to_integer",
"streaming",
"timezones",
"temporal",
"to_dummies",
]

View File

@ -89,7 +89,7 @@ mod test {
#[test]
fn test_remove() -> Result<(), ShellError> {
let plugin = PolarsPlugin::new_test_mode().into();
let plugin = PolarsPlugin::new_test_mode()?.into();
let pipeline_data = PluginTest::new("polars", plugin)?
.add_decl(Box::new(First))?
.add_decl(Box::new(Get))?

View File

@ -0,0 +1,67 @@
use std::error::Error;
use aws_config::{BehaviorVersion, SdkConfig};
use aws_credential_types::{provider::ProvideCredentials, Credentials};
use nu_protocol::ShellError;
use object_store::aws::AmazonS3ConfigKey;
use polars_io::cloud::CloudOptions;
use crate::PolarsPlugin;
async fn load_aws_config() -> SdkConfig {
aws_config::load_defaults(BehaviorVersion::latest()).await
}
async fn aws_creds(aws_config: &SdkConfig) -> Result<Option<Credentials>, ShellError> {
if let Some(provider) = aws_config.credentials_provider() {
Ok(Some(provider.provide_credentials().await.map_err(|e| {
ShellError::GenericError {
error: format!(
"Could not fetch AWS credentials: {} - {}",
e,
e.source()
.map(|e| format!("{}", e))
.unwrap_or("".to_string())
),
msg: "".into(),
span: None,
help: None,
inner: vec![],
}
})?))
} else {
Ok(None)
}
}
async fn build_aws_cloud_configs() -> Result<Vec<(AmazonS3ConfigKey, String)>, ShellError> {
let sdk_config = load_aws_config().await;
let creds = aws_creds(&sdk_config)
.await?
.ok_or(ShellError::GenericError {
error: "Could not determine AWS credentials".into(),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})?;
let mut configs = vec![
(AmazonS3ConfigKey::AccessKeyId, creds.access_key_id().into()),
(
AmazonS3ConfigKey::SecretAccessKey,
creds.secret_access_key().into(),
),
];
if let Some(token) = creds.session_token() {
configs.push((AmazonS3ConfigKey::Token, token.into()))
}
Ok(configs)
}
pub(crate) fn build_cloud_options(plugin: &PolarsPlugin) -> Result<CloudOptions, ShellError> {
let configs = plugin.runtime.block_on(build_aws_cloud_configs())?;
Ok(CloudOptions::default().with_aws(configs))
}

View File

@ -0,0 +1,28 @@
use nu_protocol::ShellError;
use polars_io::cloud::CloudOptions;
use url::Url;
use crate::PolarsPlugin;
mod aws;
enum CloudType {
Aws,
}
fn determine_cloud_type(url: &Url) -> Option<CloudType> {
match url.scheme() {
"s3" | "s3a" => Some(CloudType::Aws),
_ => None,
}
}
pub(crate) fn build_cloud_options(
plugin: &PolarsPlugin,
url: &Url,
) -> Result<Option<CloudOptions>, ShellError> {
match determine_cloud_type(url) {
Some(CloudType::Aws) => aws::build_cloud_options(plugin).map(Some),
_ => Ok(None),
}
}

View File

@ -1,10 +1,13 @@
use crate::{
cloud::build_cloud_options,
dataframe::values::NuSchema,
values::{CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsFileType},
EngineWrapper, PolarsPlugin,
};
use log::debug;
use nu_path::expand_path_with;
use nu_utils::perf;
use url::Url;
use nu_plugin::PluginCommand;
use nu_protocol::{
@ -12,13 +15,7 @@ use nu_protocol::{
SyntaxShape, Type, Value,
};
use std::{
fs::File,
io::BufReader,
num::NonZeroUsize,
path::{Path, PathBuf},
sync::Arc,
};
use std::{fmt::Debug, fs::File, io::BufReader, num::NonZeroUsize, path::PathBuf, sync::Arc};
use polars::{
lazy::frame::LazyJsonLineReader,
@ -28,7 +25,7 @@ use polars::{
},
};
use polars_io::{avro::AvroReader, csv::read::CsvReadOptions, HiveOptions};
use polars_io::{avro::AvroReader, cloud::CloudOptions, csv::read::CsvReadOptions, HiveOptions};
const DEFAULT_INFER_SCHEMA: usize = 100;
@ -50,8 +47,8 @@ impl PluginCommand for OpenDataFrame {
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
SyntaxShape::String,
"file path or cloud url to load values from",
)
.switch("eager", "Open dataframe as an eager dataframe", None)
.named(
@ -119,34 +116,98 @@ impl PluginCommand for OpenDataFrame {
}
}
struct Resource {
path: String,
extension: Option<String>,
cloud_options: Option<CloudOptions>,
span: Span,
}
impl Debug for Resource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// We can't print out the cloud options as it might have
// secrets in it.. So just print whether or not it was defined
f.debug_struct("Resource")
.field("path", &self.path)
.field("extension", &self.extension)
.field("has_cloud_options", &self.cloud_options.is_some())
.field("span", &self.span)
.finish()
}
}
impl Resource {
fn new(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
spanned_path: &Spanned<String>,
) -> Result<Self, ShellError> {
let mut path = spanned_path.item.clone();
let (path_buf, cloud_options) = if let Ok(url) = path.parse::<Url>() {
let cloud_options =
build_cloud_options(plugin, &url)?.ok_or(ShellError::GenericError {
error: format!("Could not determine a supported cloud type from url: {url}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})?;
let p: PathBuf = url.path().into();
(p, Some(cloud_options))
} else {
let new_path = expand_path_with(path, engine.get_current_dir()?, true);
path = new_path.to_string_lossy().to_string();
(new_path, None)
};
let extension = path_buf
.extension()
.and_then(|s| s.to_str().map(|s| s.to_string()));
Ok(Self {
path,
extension,
cloud_options,
span: spanned_path.span,
})
}
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<PipelineData, ShellError> {
let spanned_file: Spanned<PathBuf> = call.req(0)?;
let file_path = expand_path_with(&spanned_file.item, engine.get_current_dir()?, true);
let file_span = spanned_file.span;
let spanned_file: Spanned<String> = call.req(0)?;
debug!("file: {}", spanned_file.item);
let resource = Resource::new(plugin, engine, &spanned_file)?;
let type_option: Option<(String, Span)> = call
.get_flag("type")?
.map(|t: Spanned<String>| (t.item, t.span))
.or_else(|| {
file_path
.extension()
.map(|e| (e.to_string_lossy().into_owned(), spanned_file.span))
.or_else(|| resource.extension.clone().map(|e| (e, resource.span)));
debug!("resource: {resource:?}");
let is_eager = call.has_flag("eager")?;
if is_eager && resource.cloud_options.is_some() {
return Err(ShellError::GenericError {
error: "Cloud URLs are not supported with --eager".into(),
msg: "".into(),
span: call.get_flag_span("eager"),
help: Some("Remove flag".into()),
inner: vec![],
});
}
match type_option {
Some((ext, blamed)) => match PolarsFileType::from(ext.as_str()) {
PolarsFileType::Csv | PolarsFileType::Tsv => {
from_csv(plugin, engine, call, &file_path, file_span)
from_csv(plugin, engine, call, resource, is_eager)
}
PolarsFileType::Parquet => from_parquet(plugin, engine, call, &file_path, file_span),
PolarsFileType::Arrow => from_arrow(plugin, engine, call, &file_path, file_span),
PolarsFileType::Json => from_json(plugin, engine, call, &file_path, file_span),
PolarsFileType::NdJson => from_ndjson(plugin, engine, call, &file_path, file_span),
PolarsFileType::Avro => from_avro(plugin, engine, call, &file_path, file_span),
PolarsFileType::Parquet => from_parquet(plugin, engine, call, resource, is_eager),
PolarsFileType::Arrow => from_arrow(plugin, engine, call, resource, is_eager),
PolarsFileType::Json => from_json(plugin, engine, call, resource, is_eager),
PolarsFileType::NdJson => from_ndjson(plugin, engine, call, resource, is_eager),
PolarsFileType::Avro => from_avro(plugin, engine, call, resource, is_eager),
_ => Err(PolarsFileType::build_unsupported_error(
&ext,
&[
@ -172,13 +233,17 @@ fn from_parquet(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
is_eager: bool,
) -> Result<Value, ShellError> {
if !call.has_flag("eager")? {
let file: String = call.req(0)?;
let args = ScanArgsParquet::default();
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
let file_path = resource.path;
let file_span = resource.span;
if !is_eager {
let args = ScanArgsParquet {
cloud_options: resource.cloud_options.clone(),
..Default::default()
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file_path, args)
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
@ -225,11 +290,16 @@ fn from_avro(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
_is_eager: bool, // ignore, lazy frames are not currently supported
) -> Result<Value, ShellError> {
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let file_path = resource.path;
let file_span = resource.span;
if resource.cloud_options.is_some() {
return Err(cloud_not_supported(PolarsFileType::Avro, file_span));
}
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(file_path).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
@ -262,22 +332,23 @@ fn from_arrow(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
is_eager: bool,
) -> Result<Value, ShellError> {
if !call.has_flag("eager")? {
let file: String = call.req(0)?;
let file_path = resource.path;
let file_span = resource.span;
if !is_eager {
let args = ScanArgsIpc {
n_rows: None,
cache: true,
rechunk: false,
row_index: None,
cloud_options: None,
cloud_options: resource.cloud_options.clone(),
include_file_paths: None,
hive_options: HiveOptions::default(),
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
let df: NuLazyFrame = LazyFrame::scan_ipc(file_path, args)
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
@ -324,9 +395,14 @@ fn from_json(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
_is_eager: bool, // ignore = lazy frames not currently supported
) -> Result<Value, ShellError> {
let file_path = resource.path;
let file_span = resource.span;
if resource.cloud_options.is_some() {
return Err(cloud_not_supported(PolarsFileType::Json, file_span));
}
let file = File::open(file_path).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
@ -365,9 +441,11 @@ fn from_ndjson(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
is_eager: bool,
) -> Result<Value, ShellError> {
let file_path = resource.path;
let file_span = resource.span;
let infer_schema: NonZeroUsize = call
.get_flag("infer-schema")?
.and_then(NonZeroUsize::new)
@ -380,12 +458,13 @@ fn from_ndjson(
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if !call.has_flag("eager")? {
if !is_eager {
let start_time = std::time::Instant::now();
let df = LazyJsonLineReader::new(file_path)
.with_infer_schema_length(Some(infer_schema))
.with_schema(maybe_schema.map(|s| s.into()))
.with_cloud_options(resource.cloud_options.clone())
.finish()
.map_err(|e| ShellError::GenericError {
error: format!("NDJSON reader error: {e}"),
@ -444,9 +523,11 @@ fn from_csv(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
file_path: &Path,
file_span: Span,
resource: Resource,
is_eager: bool,
) -> Result<Value, ShellError> {
let file_path = resource.path;
let file_span = resource.span;
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?;
let infer_schema: usize = call
@ -460,8 +541,9 @@ fn from_csv(
.transpose()?;
let truncate_ragged_lines: bool = call.has_flag("truncate-ragged-lines")?;
if !call.has_flag("eager")? {
let csv_reader = LazyCsvReader::new(file_path);
if !is_eager {
let csv_reader =
LazyCsvReader::new(file_path).with_cloud_options(resource.cloud_options.clone());
let csv_reader = match delimiter {
None => csv_reader,
@ -533,7 +615,7 @@ fn from_csv(
.with_encoding(CsvEncoding::LossyUtf8)
.with_truncate_ragged_lines(truncate_ragged_lines)
})
.try_into_reader_with_file_path(Some(file_path.to_path_buf()))
.try_into_reader_with_file_path(Some(file_path.into()))
.map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(),
msg: e.to_string(),
@ -556,3 +638,16 @@ fn from_csv(
df.cache_and_to_value(plugin, engine, call.head)
}
}
fn cloud_not_supported(file_type: PolarsFileType, span: Span) -> ShellError {
ShellError::GenericError {
error: format!(
"Cloud operations not supported for file type {}",
file_type.to_str()
),
msg: "".into(),
span: Some(span),
help: None,
inner: vec![],
}
}

View File

@ -248,7 +248,7 @@ pub(crate) mod test {
let tmp_file_str = tmp_file.to_str().expect("should be able to get file path");
let cmd = format!("{cmd} {tmp_file_str}");
let mut plugin_test = PluginTest::new("polars", PolarsPlugin::default().into())?;
let mut plugin_test = PluginTest::new("polars", PolarsPlugin::new()?.into())?;
plugin_test.engine_state_mut().add_env_var(
"PWD".to_string(),
Value::string(

View File

@ -79,7 +79,7 @@ mod tests {
#[test]
fn test_to_lazy() -> Result<(), ShellError> {
let plugin: Arc<PolarsPlugin> = PolarsPlugin::new_test_mode().into();
let plugin: Arc<PolarsPlugin> = PolarsPlugin::new_test_mode()?.into();
let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?;
let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?;
let value = pipeline_data.into_value(Span::test_data())?;

View File

@ -1,5 +1,6 @@
use nu_protocol::{ShellError, Span};
#[derive(Debug, Clone, PartialEq)]
pub enum PolarsFileType {
Csv,
Tsv,
@ -23,9 +24,12 @@ impl PolarsFileType {
.collect::<Vec<&'static str>>()
.join(", ");
ShellError::FileNotFoundCustom {
msg: format!("Unsupported type {extension} expected {type_string}"),
span,
ShellError::GenericError {
error: format!("Unsupported type {extension} expected {type_string}"),
msg: "".into(),
span: Some(span),
help: None,
inner: vec![],
}
}

View File

@ -14,9 +14,11 @@ use log::debug;
use nu_plugin::{EngineInterface, Plugin, PluginCommand};
mod cache;
mod cloud;
pub mod dataframe;
pub use dataframe::*;
use nu_protocol::{ast::Operator, CustomValue, LabeledError, ShellError, Span, Spanned, Value};
use tokio::runtime::Runtime;
use values::CustomValueType;
use crate::values::PolarsPluginCustomValue;
@ -52,11 +54,27 @@ impl EngineWrapper for &EngineInterface {
}
}
#[derive(Default)]
pub struct PolarsPlugin {
pub(crate) cache: Cache,
/// For testing purposes only
pub(crate) disable_cache_drop: bool,
pub(crate) runtime: Runtime,
}
impl PolarsPlugin {
pub fn new() -> Result<Self, ShellError> {
Ok(Self {
cache: Cache::default(),
disable_cache_drop: false,
runtime: Runtime::new().map_err(|e| ShellError::GenericError {
error: format!("Could not instantiate tokio: {e}"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})?,
})
}
}
impl Plugin for PolarsPlugin {
@ -237,11 +255,11 @@ pub mod test {
impl PolarsPlugin {
/// Creates a new polars plugin in test mode
pub fn new_test_mode() -> Self {
PolarsPlugin {
pub fn new_test_mode() -> Result<Self, ShellError> {
Ok(PolarsPlugin {
disable_cache_drop: true,
..PolarsPlugin::default()
}
..PolarsPlugin::new()?
})
}
}
@ -269,7 +287,7 @@ pub mod test {
command: &impl PluginCommand,
decls: Vec<Box<dyn Command>>,
) -> Result<(), ShellError> {
let plugin = PolarsPlugin::new_test_mode();
let plugin = PolarsPlugin::new_test_mode()?;
let examples = command.examples();
// we need to cache values in the examples

View File

@ -6,5 +6,12 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn main() {
env_logger::init();
serve_plugin(&PolarsPlugin::default(), MsgPackSerializer {})
match PolarsPlugin::new() {
Ok(ref plugin) => serve_plugin(plugin, MsgPackSerializer {}),
Err(e) => {
eprintln!("{}", e);
std::process::exit(1);
}
}
}