more s3 work.. savin

This commit is contained in:
Jack Wright 2024-12-06 17:43:01 -08:00
parent 6d51d23681
commit 8dc20d5fc8
5 changed files with 58 additions and 64 deletions

61
Cargo.lock generated
View File

@ -4262,7 +4262,7 @@ dependencies = [
"nu-protocol",
"nu-utils",
"num",
"object_store 0.11.1",
"object_store",
"polars",
"polars-arrow",
"polars-io",
@ -4548,37 +4548,7 @@ dependencies = [
"ring",
"serde",
"serde_json",
"snafu 0.7.5",
"tokio",
"tracing",
"url",
"walkdir",
]
[[package]]
name = "object_store"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3"
dependencies = [
"async-trait",
"base64 0.22.1",
"bytes",
"chrono",
"futures",
"humantime",
"hyper 1.5.0",
"itertools 0.13.0",
"md-5",
"parking_lot",
"percent-encoding",
"quick-xml 0.36.2",
"rand",
"reqwest",
"ring",
"serde",
"serde_json",
"snafu 0.8.5",
"snafu",
"tokio",
"tracing",
"url",
@ -5111,7 +5081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4643898a644f30c83737db85f942f8c8956b0c11190b39afec745218eae1746b"
dependencies = [
"avro-schema",
"object_store 0.10.2",
"object_store",
"polars-arrow-format",
"regex",
"simdutf8",
@ -5165,7 +5135,7 @@ dependencies = [
"memchr",
"memmap2",
"num-traits",
"object_store 0.10.2",
"object_store",
"once_cell",
"percent-encoding",
"polars-arrow",
@ -6970,16 +6940,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6"
dependencies = [
"doc-comment",
"snafu-derive 0.7.5",
]
[[package]]
name = "snafu"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
dependencies = [
"snafu-derive 0.8.5",
"snafu-derive",
]
[[package]]
@ -6994,18 +6955,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "snafu-derive"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.87",
]
[[package]]
name = "snap"
version = "1.1.1"

View File

@ -47,8 +47,8 @@ hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] }
# Cloud support
aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2"
tokio = { version = "1.41.1", features = ["full"] }
object_store = { version = "0.11.1", features = ["aws"] }
tokio = { version = "1.41", features = ["full"] }
object_store = { version = "0.10", default-features = false }
[dependencies.polars]
features = [

View File

@ -61,7 +61,7 @@ async fn build_aws_cloud_configs() -> Result<Vec<(AmazonS3ConfigKey, String)>, S
Ok(configs)
}
fn build_aws_cloud_options(plugin: &PolarsPlugin) -> Result<CloudOptions, ShellError> {
pub(crate) fn build_cloud_options(plugin: &PolarsPlugin) -> Result<CloudOptions, ShellError> {
let configs = plugin.runtime.block_on(build_aws_cloud_configs())?;
Ok(CloudOptions::default().with_aws(configs.into_iter()))
}

View File

@ -1 +1,33 @@
use nu_protocol::ShellError;
use polars_io::cloud::CloudOptions;
use crate::PolarsPlugin;
mod aws;
enum CloudType {
Aws,
}
fn determine_cloud_type(path: &str) -> Option<CloudType> {
if path.starts_with("s3://") | path.starts_with("s3a://") {
Some(CloudType::Aws)
} else {
None
}
}
/// Returns true if it is a supported cloud url
pub(crate) fn is_cloud_url(path: &str) ->bool {
determine_cloud_type(path).is_some()
}
pub(crate) fn build_cloud_options(
plugin: &PolarsPlugin,
path: &str,
) -> Result<Option<CloudOptions>, ShellError> {
match determine_cloud_type(path) {
Some(CloudType::Aws) => aws::build_cloud_options(plugin).map(|c| Some(c)),
_ => Ok(None),
}
}

View File

@ -28,7 +28,7 @@ use polars::{
},
};
use polars_io::{avro::AvroReader, csv::read::CsvReadOptions, HiveOptions};
use polars_io::{avro::AvroReader, cloud::CloudOptions, csv::read::CsvReadOptions, HiveOptions};
const DEFAULT_INFER_SCHEMA: usize = 100;
@ -50,8 +50,8 @@ impl PluginCommand for OpenDataFrame {
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
SyntaxShape::OneOf(vec![SyntaxShape::Filepath, SyntaxShape::String]),
"file path or cloud url to load values from",
)
.switch("eager", "Open dataframe as an eager dataframe", None)
.named(
@ -119,12 +119,23 @@ impl PluginCommand for OpenDataFrame {
}
}
enum Resource {
File(PathBuf, Span),
CloudUrl(String, CloudOptions, Span)
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<PipelineData, ShellError> {
let spanned_file: Spanned<PathBuf> = call.req(0)?;
let spanned_file: Spanned<String> = call.req(0)?;
let resources = if let Some(cloud_options) = crate::cloud::build_cloud_options(plugin, &spanned_file.item)? {
Resource::CloudUrl(spanned_file.item, cloud_options, spanned_file.span)
} else {
}
let file_path = expand_path_with(&spanned_file.item, engine.get_current_dir()?, true);
let file_span = spanned_file.span;
@ -175,9 +186,11 @@ fn from_parquet(
file_path: &Path,
file_span: Span,
) -> Result<Value, ShellError> {
let cloud_options = crate::cloud::build_cloud_options(plugin, file_path)
if !call.has_flag("eager")? {
let file: String = call.req(0)?;
let args = ScanArgsParquet::default();
let mut args = ScanArgsParquet::default();
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),