diff --git a/Cargo.lock b/Cargo.lock index 3d206f3966..d4bc893ffb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4262,7 +4262,7 @@ dependencies = [ "nu-protocol", "nu-utils", "num", - "object_store 0.11.1", + "object_store", "polars", "polars-arrow", "polars-io", @@ -4548,37 +4548,7 @@ dependencies = [ "ring", "serde", "serde_json", - "snafu 0.7.5", - "tokio", - "tracing", - "url", - "walkdir", -] - -[[package]] -name = "object_store" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" -dependencies = [ - "async-trait", - "base64 0.22.1", - "bytes", - "chrono", - "futures", - "humantime", - "hyper 1.5.0", - "itertools 0.13.0", - "md-5", - "parking_lot", - "percent-encoding", - "quick-xml 0.36.2", - "rand", - "reqwest", - "ring", - "serde", - "serde_json", - "snafu 0.8.5", + "snafu", "tokio", "tracing", "url", @@ -5111,7 +5081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4643898a644f30c83737db85f942f8c8956b0c11190b39afec745218eae1746b" dependencies = [ "avro-schema", - "object_store 0.10.2", + "object_store", "polars-arrow-format", "regex", "simdutf8", @@ -5165,7 +5135,7 @@ dependencies = [ "memchr", "memmap2", "num-traits", - "object_store 0.10.2", + "object_store", "once_cell", "percent-encoding", "polars-arrow", @@ -6970,16 +6940,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive 0.7.5", -] - -[[package]] -name = "snafu" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" -dependencies = [ - "snafu-derive 0.8.5", + "snafu-derive", ] [[package]] @@ -6994,18 +6955,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "snafu-derive" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "snap" version = "1.1.1" diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index aac4f458f4..a03f634b3a 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -47,8 +47,8 @@ hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] } # Cloud support aws-config = { version = "1.5", features = ["sso"] } aws-credential-types = "1.2" -tokio = { version = "1.41.1", features = ["full"] } -object_store = { version = "0.11.1", features = ["aws"] } +tokio = { version = "1.41", features = ["full"] } +object_store = { version = "0.10", default-features = false } [dependencies.polars] features = [ diff --git a/crates/nu_plugin_polars/src/cloud/aws.rs b/crates/nu_plugin_polars/src/cloud/aws.rs index 55a3c82e38..320acd820f 100644 --- a/crates/nu_plugin_polars/src/cloud/aws.rs +++ b/crates/nu_plugin_polars/src/cloud/aws.rs @@ -61,7 +61,7 @@ async fn build_aws_cloud_configs() -> Result, S Ok(configs) } -fn build_aws_cloud_options(plugin: &PolarsPlugin) -> Result { +pub(crate) fn build_cloud_options(plugin: &PolarsPlugin) -> Result { let configs = plugin.runtime.block_on(build_aws_cloud_configs())?; Ok(CloudOptions::default().with_aws(configs.into_iter())) } diff --git a/crates/nu_plugin_polars/src/cloud/mod.rs b/crates/nu_plugin_polars/src/cloud/mod.rs index 827da9e3e5..e630fe0c2d 100644 --- a/crates/nu_plugin_polars/src/cloud/mod.rs +++ b/crates/nu_plugin_polars/src/cloud/mod.rs @@ -1 +1,33 @@ +use nu_protocol::ShellError; +use polars_io::cloud::CloudOptions; + +use crate::PolarsPlugin; + mod aws; + +enum CloudType { + Aws, +} + +fn determine_cloud_type(path: &str) -> Option { + if path.starts_with("s3://") | path.starts_with("s3a://") { + Some(CloudType::Aws) + } else { + None + } +} + +/// Returns true if it is a supported cloud url +pub(crate) fn is_cloud_url(path: &str) ->bool { + determine_cloud_type(path).is_some() +} + +pub(crate) fn build_cloud_options( + plugin: &PolarsPlugin, + path: &str, +) -> Result, ShellError> { + match determine_cloud_type(path) { + Some(CloudType::Aws) => aws::build_cloud_options(plugin).map(|c| Some(c)), + _ => Ok(None), + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/open.rs b/crates/nu_plugin_polars/src/dataframe/command/core/open.rs index 856c037270..ede1f070bc 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/open.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/open.rs @@ -28,7 +28,7 @@ use polars::{ }, }; -use polars_io::{avro::AvroReader, csv::read::CsvReadOptions, HiveOptions}; +use polars_io::{avro::AvroReader, cloud::CloudOptions, csv::read::CsvReadOptions, HiveOptions}; const DEFAULT_INFER_SCHEMA: usize = 100; @@ -50,8 +50,8 @@ impl PluginCommand for OpenDataFrame { Signature::build(self.name()) .required( "file", - SyntaxShape::Filepath, - "file path to load values from", + SyntaxShape::OneOf(vec![SyntaxShape::Filepath, SyntaxShape::String]), + "file path or cloud url to load values from", ) .switch("eager", "Open dataframe as an eager dataframe", None) .named( @@ -119,12 +119,23 @@ impl PluginCommand for OpenDataFrame { } } +enum Resource { + File(PathBuf, Span), + CloudUrl(String, CloudOptions, Span) +} + fn command( plugin: &PolarsPlugin, engine: &nu_plugin::EngineInterface, call: &nu_plugin::EvaluatedCall, ) -> Result { - let spanned_file: Spanned = call.req(0)?; + let spanned_file: Spanned = call.req(0)?; + + let resources = if let Some(cloud_options) = crate::cloud::build_cloud_options(plugin, &spanned_file.item)? { + Resource::CloudUrl(spanned_file.item, cloud_options, spanned_file.span) + } else { + + } let file_path = expand_path_with(&spanned_file.item, engine.get_current_dir()?, true); let file_span = spanned_file.span; @@ -175,9 +186,11 @@ fn from_parquet( file_path: &Path, file_span: Span, ) -> Result { + + let cloud_options = crate::cloud::build_cloud_options(plugin, file_path) if !call.has_flag("eager")? { let file: String = call.req(0)?; - let args = ScanArgsParquet::default(); + let mut args = ScanArgsParquet::default(); let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) .map_err(|e| ShellError::GenericError { error: "Parquet reader error".into(),