more s3 work.. savin

This commit is contained in:
Jack Wright
2024-12-06 17:43:01 -08:00
parent 6d51d23681
commit 8dc20d5fc8
5 changed files with 58 additions and 64 deletions

View File

@ -47,8 +47,8 @@ hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] }
# Cloud support
aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2"
tokio = { version = "1.41.1", features = ["full"] }
object_store = { version = "0.11.1", features = ["aws"] }
tokio = { version = "1.41", features = ["full"] }
object_store = { version = "0.10", default-features = false }
[dependencies.polars]
features = [

View File

@ -61,7 +61,7 @@ async fn build_aws_cloud_configs() -> Result<Vec<(AmazonS3ConfigKey, String)>, S
Ok(configs)
}
fn build_aws_cloud_options(plugin: &PolarsPlugin) -> Result<CloudOptions, ShellError> {
pub(crate) fn build_cloud_options(plugin: &PolarsPlugin) -> Result<CloudOptions, ShellError> {
let configs = plugin.runtime.block_on(build_aws_cloud_configs())?;
Ok(CloudOptions::default().with_aws(configs.into_iter()))
}

View File

@ -1 +1,33 @@
use nu_protocol::ShellError;
use polars_io::cloud::CloudOptions;
use crate::PolarsPlugin;
mod aws;
enum CloudType {
Aws,
}
fn determine_cloud_type(path: &str) -> Option<CloudType> {
if path.starts_with("s3://") | path.starts_with("s3a://") {
Some(CloudType::Aws)
} else {
None
}
}
/// Returns true if it is a supported cloud url
pub(crate) fn is_cloud_url(path: &str) ->bool {
determine_cloud_type(path).is_some()
}
pub(crate) fn build_cloud_options(
plugin: &PolarsPlugin,
path: &str,
) -> Result<Option<CloudOptions>, ShellError> {
match determine_cloud_type(path) {
Some(CloudType::Aws) => aws::build_cloud_options(plugin).map(|c| Some(c)),
_ => Ok(None),
}
}

View File

@ -28,7 +28,7 @@ use polars::{
},
};
use polars_io::{avro::AvroReader, csv::read::CsvReadOptions, HiveOptions};
use polars_io::{avro::AvroReader, cloud::CloudOptions, csv::read::CsvReadOptions, HiveOptions};
const DEFAULT_INFER_SCHEMA: usize = 100;
@ -50,8 +50,8 @@ impl PluginCommand for OpenDataFrame {
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
SyntaxShape::OneOf(vec![SyntaxShape::Filepath, SyntaxShape::String]),
"file path or cloud url to load values from",
)
.switch("eager", "Open dataframe as an eager dataframe", None)
.named(
@ -119,12 +119,23 @@ impl PluginCommand for OpenDataFrame {
}
}
enum Resource {
File(PathBuf, Span),
CloudUrl(String, CloudOptions, Span)
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<PipelineData, ShellError> {
let spanned_file: Spanned<PathBuf> = call.req(0)?;
let spanned_file: Spanned<String> = call.req(0)?;
let resources = if let Some(cloud_options) = crate::cloud::build_cloud_options(plugin, &spanned_file.item)? {
Resource::CloudUrl(spanned_file.item, cloud_options, spanned_file.span)
} else {
}
let file_path = expand_path_with(&spanned_file.item, engine.get_current_dir()?, true);
let file_span = spanned_file.span;
@ -175,9 +186,11 @@ fn from_parquet(
file_path: &Path,
file_span: Span,
) -> Result<Value, ShellError> {
let cloud_options = crate::cloud::build_cloud_options(plugin, file_path)
if !call.has_flag("eager")? {
let file: String = call.req(0)?;
let args = ScanArgsParquet::default();
let mut args = ScanArgsParquet::default();
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),