mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 14:36:08 +02:00
Working on S3/File resource
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -4245,6 +4245,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"typetag",
|
||||
"url",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
|
@ -49,6 +49,7 @@ aws-config = { version = "1.5", features = ["sso"] }
|
||||
aws-credential-types = "1.2"
|
||||
tokio = { version = "1.41", features = ["full"] }
|
||||
object_store = { version = "0.10", default-features = false }
|
||||
url.workspace = true
|
||||
|
||||
[dependencies.polars]
|
||||
features = [
|
||||
|
@ -1,5 +1,6 @@
|
||||
use nu_protocol::ShellError;
|
||||
use polars_io::cloud::CloudOptions;
|
||||
use url::Url;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
@ -9,24 +10,18 @@ enum CloudType {
|
||||
Aws,
|
||||
}
|
||||
|
||||
fn determine_cloud_type(path: &str) -> Option<CloudType> {
|
||||
if path.starts_with("s3://") | path.starts_with("s3a://") {
|
||||
Some(CloudType::Aws)
|
||||
} else {
|
||||
None
|
||||
fn determine_cloud_type(url: &Url) -> Option<CloudType> {
|
||||
match url.scheme() {
|
||||
"s3" | "s3a" => Some(CloudType::Aws),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if it is a supported cloud url
|
||||
pub(crate) fn is_cloud_url(path: &str) ->bool {
|
||||
determine_cloud_type(path).is_some()
|
||||
}
|
||||
|
||||
pub(crate) fn build_cloud_options(
|
||||
plugin: &PolarsPlugin,
|
||||
path: &str,
|
||||
url: &Url,
|
||||
) -> Result<Option<CloudOptions>, ShellError> {
|
||||
match determine_cloud_type(path) {
|
||||
match determine_cloud_type(url) {
|
||||
Some(CloudType::Aws) => aws::build_cloud_options(plugin).map(|c| Some(c)),
|
||||
_ => Ok(None),
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
use crate::{
|
||||
dataframe::values::NuSchema,
|
||||
values::{CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsFileType},
|
||||
EngineWrapper, PolarsPlugin,
|
||||
cloud::build_cloud_options, dataframe::values::NuSchema, values::{CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsFileType}, EngineWrapper, PolarsPlugin
|
||||
};
|
||||
use nu_path::expand_path_with;
|
||||
use nu_utils::perf;
|
||||
use url::Url;
|
||||
|
||||
use nu_plugin::PluginCommand;
|
||||
use nu_protocol::{
|
||||
@ -12,13 +10,7 @@ use nu_protocol::{
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use std::{
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
num::NonZeroUsize,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::{fs::File, io::BufReader, num::NonZeroUsize, path::PathBuf, sync::Arc};
|
||||
|
||||
use polars::{
|
||||
lazy::frame::LazyJsonLineReader,
|
||||
@ -119,21 +111,41 @@ impl PluginCommand for OpenDataFrame {
|
||||
}
|
||||
}
|
||||
|
||||
enum Resource {
|
||||
File(PathBuf, Span),
|
||||
CloudUrl(String, CloudOptions, Span)
|
||||
struct Resource {
|
||||
path: String,
|
||||
extension: Option<String>,
|
||||
cloud_options: Option<CloudOptions>,
|
||||
span: Span,
|
||||
}
|
||||
|
||||
impl Resource {
|
||||
fn file_type(&self) ->Option<PolarsFileType> {
|
||||
let extension = match self {
|
||||
Self::File(p, _) => p.extension().map(|s| s.to_string_lossy()),
|
||||
Self::CloudUrl(p, _, _) => p.ri
|
||||
fn new(plugin: &PolarsPlugin, spanned_path: &Spanned<String>) -> Result<Self, ShellError> {
|
||||
let (path_buf, cloud_options) = if let Ok(url) = spanned_path.item.parse::<Url>() {
|
||||
let cloud_options =
|
||||
build_cloud_options(plugin, &url)?.ok_or(ShellError::GenericError {
|
||||
error: format!("Could not determine a supported cloud type from url: {url}"),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let p: PathBuf = url.path().into();
|
||||
(p, Some(cloud_options))
|
||||
} else {
|
||||
(PathBuf::from(&spanned_path.item), None)
|
||||
};
|
||||
let extension = path_buf
|
||||
.extension()
|
||||
.and_then(|s| s.to_str().map(|s| s.to_string()));
|
||||
Ok(Self {
|
||||
path: spanned_path.item.clone(),
|
||||
extension,
|
||||
cloud_options,
|
||||
span: spanned_path.span,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
@ -141,32 +153,20 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let spanned_file: Spanned<String> = call.req(0)?;
|
||||
|
||||
let resource = if let Some(cloud_options) = crate::cloud::build_cloud_options(plugin, &spanned_file.item)? {
|
||||
Resource::CloudUrl(spanned_file.item, cloud_options, spanned_file.span)
|
||||
} else {
|
||||
let path = expand_path_with(&spanned_file.item, engine.get_current_dir()?, true);
|
||||
Resource::File(path, spanned_file.span)
|
||||
};
|
||||
|
||||
let resource = Resource::new(plugin, &spanned_file)?;
|
||||
let type_option: Option<(String, Span)> = call
|
||||
.get_flag("type")?
|
||||
.map(|t: Spanned<String>| (t.item, t.span))
|
||||
.or_else(|| {
|
||||
file_path
|
||||
.extension()
|
||||
.map(|e| (e.to_string_lossy().into_owned(), spanned_file.span))
|
||||
});
|
||||
.or_else(|| resource.extension.clone().map(|e| (e, resource.span)));
|
||||
|
||||
match type_option {
|
||||
Some((ext, blamed)) => match PolarsFileType::from(ext.as_str()) {
|
||||
PolarsFileType::Csv | PolarsFileType::Tsv => {
|
||||
from_csv(plugin, engine, call, &file_path, file_span)
|
||||
}
|
||||
PolarsFileType::Parquet => from_parquet(plugin, engine, call, &file_path, file_span),
|
||||
PolarsFileType::Arrow => from_arrow(plugin, engine, call, &file_path, file_span),
|
||||
PolarsFileType::Json => from_json(plugin, engine, call, &file_path, file_span),
|
||||
PolarsFileType::NdJson => from_ndjson(plugin, engine, call, &file_path, file_span),
|
||||
PolarsFileType::Avro => from_avro(plugin, engine, call, &file_path, file_span),
|
||||
PolarsFileType::Csv | PolarsFileType::Tsv => from_csv(plugin, engine, call, resource),
|
||||
PolarsFileType::Parquet => from_parquet(plugin, engine, call, resource),
|
||||
PolarsFileType::Arrow => from_arrow(plugin, engine, call, resource),
|
||||
PolarsFileType::Json => from_json(plugin, engine, call, resource),
|
||||
PolarsFileType::NdJson => from_ndjson(plugin, engine, call, resource),
|
||||
PolarsFileType::Avro => from_avro(plugin, engine, call, resource),
|
||||
_ => Err(PolarsFileType::build_unsupported_error(
|
||||
&ext,
|
||||
&[
|
||||
@ -192,14 +192,13 @@ fn from_parquet(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
|
||||
let cloud_options = crate::cloud::build_cloud_options(plugin, file_path)?;
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
if !call.has_flag("eager")? {
|
||||
let file: String = call.req(0)?;
|
||||
let mut args = ScanArgsParquet::default();
|
||||
let args = ScanArgsParquet::default();
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
@ -247,11 +246,11 @@ fn from_avro(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
let columns: Option<Vec<String>> = call.get_flag("columns")?;
|
||||
|
||||
let r = File::open(file_path).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
@ -284,9 +283,10 @@ fn from_arrow(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
if !call.has_flag("eager")? {
|
||||
let file: String = call.req(0)?;
|
||||
let args = ScanArgsIpc {
|
||||
@ -346,9 +346,10 @@ fn from_json(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
let file = File::open(file_path).map_err(|e| ShellError::GenericError {
|
||||
error: "Error opening file".into(),
|
||||
msg: e.to_string(),
|
||||
@ -387,9 +388,10 @@ fn from_ndjson(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
let infer_schema: NonZeroUsize = call
|
||||
.get_flag("infer-schema")?
|
||||
.and_then(NonZeroUsize::new)
|
||||
@ -466,9 +468,10 @@ fn from_csv(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
resource: Resource,
|
||||
) -> Result<Value, ShellError> {
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
|
||||
let no_header: bool = call.has_flag("no-header")?;
|
||||
let infer_schema: usize = call
|
||||
@ -555,7 +558,7 @@ fn from_csv(
|
||||
.with_encoding(CsvEncoding::LossyUtf8)
|
||||
.with_truncate_ragged_lines(truncate_ragged_lines)
|
||||
})
|
||||
.try_into_reader_with_file_path(Some(file_path.to_path_buf()))
|
||||
.try_into_reader_with_file_path(Some(file_path.into()))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error creating CSV reader".into(),
|
||||
msg: e.to_string(),
|
||||
|
@ -1,5 +1,6 @@
|
||||
use nu_protocol::{ShellError, Span};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum PolarsFileType {
|
||||
Csv,
|
||||
Tsv,
|
||||
|
Reference in New Issue
Block a user