mirror of
https://github.com/nushell/nushell.git
synced 2025-01-21 13:50:11 +01:00
Wiring up s3 support and debugging
This commit is contained in:
parent
96df4b9b92
commit
8d2ad29682
5
Cargo.lock
generated
5
Cargo.lock
generated
@ -5097,6 +5097,7 @@ dependencies = [
|
||||
"blake3",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"chrono-tz 0.8.6",
|
||||
"fast-float",
|
||||
"flate2",
|
||||
"fs4",
|
||||
@ -5142,6 +5143,7 @@ checksum = "d5c8c057ef04feaf34b6ce52096bdea3a766fa4725f50442078c8a4ee86397bf"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"chrono",
|
||||
"chrono-tz 0.8.6",
|
||||
"fallible-streaming-iterator",
|
||||
"hashbrown 0.15.2",
|
||||
"indexmap",
|
||||
@ -5163,6 +5165,7 @@ checksum = "4a8ca74f42e7b47cad241b36b98d991cc7fbb51b8d0695a055eb937588d1f310"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bitflags 2.6.0",
|
||||
"futures",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"polars-arrow",
|
||||
@ -5178,6 +5181,7 @@ dependencies = [
|
||||
"polars-time",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
"tokio",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
@ -5303,6 +5307,7 @@ dependencies = [
|
||||
"polars-row",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
"tokio",
|
||||
"uuid",
|
||||
"version_check",
|
||||
]
|
||||
|
@ -55,6 +55,7 @@ url.workspace = true
|
||||
features = [
|
||||
"arg_where",
|
||||
"checked_arithmetic",
|
||||
"cloud",
|
||||
"concat_str",
|
||||
"cross_join",
|
||||
"csv",
|
||||
@ -84,6 +85,7 @@ features = [
|
||||
"strings",
|
||||
"string_to_integer",
|
||||
"streaming",
|
||||
"timezones",
|
||||
"temporal",
|
||||
"to_dummies",
|
||||
]
|
||||
|
@ -4,6 +4,8 @@ use crate::{
|
||||
values::{CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsFileType},
|
||||
EngineWrapper, PolarsPlugin,
|
||||
};
|
||||
use log::debug;
|
||||
use nu_path::expand_path_with;
|
||||
use nu_utils::perf;
|
||||
use url::Url;
|
||||
|
||||
@ -13,7 +15,7 @@ use nu_protocol::{
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use std::{fs::File, io::BufReader, num::NonZeroUsize, path::PathBuf, sync::Arc};
|
||||
use std::{fmt::Debug, fs::File, io::BufReader, num::NonZeroUsize, path::PathBuf, sync::Arc};
|
||||
|
||||
use polars::{
|
||||
lazy::frame::LazyJsonLineReader,
|
||||
@ -45,7 +47,7 @@ impl PluginCommand for OpenDataFrame {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"file",
|
||||
SyntaxShape::OneOf(vec![SyntaxShape::Filepath, SyntaxShape::String]),
|
||||
SyntaxShape::String,
|
||||
"file path or cloud url to load values from",
|
||||
)
|
||||
.switch("eager", "Open dataframe as an eager dataframe", None)
|
||||
@ -121,9 +123,27 @@ struct Resource {
|
||||
span: Span,
|
||||
}
|
||||
|
||||
impl Debug for Resource {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// We can't print out the cloud options as it might have
|
||||
// secrets in it.. So just print whether or not it was defined
|
||||
f.debug_struct("Resource")
|
||||
.field("path", &self.path)
|
||||
.field("extension", &self.extension)
|
||||
.field("has_cloud_options", &self.cloud_options.is_some())
|
||||
.field("span", &self.span)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Resource {
|
||||
fn new(plugin: &PolarsPlugin, spanned_path: &Spanned<String>) -> Result<Self, ShellError> {
|
||||
let (path_buf, cloud_options) = if let Ok(url) = spanned_path.item.parse::<Url>() {
|
||||
fn new(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &nu_plugin::EngineInterface,
|
||||
spanned_path: &Spanned<String>,
|
||||
) -> Result<Self, ShellError> {
|
||||
let mut path = spanned_path.item.clone();
|
||||
let (path_buf, cloud_options) = if let Ok(url) = path.parse::<Url>() {
|
||||
let cloud_options =
|
||||
build_cloud_options(plugin, &url)?.ok_or(ShellError::GenericError {
|
||||
error: format!("Could not determine a supported cloud type from url: {url}"),
|
||||
@ -135,13 +155,15 @@ impl Resource {
|
||||
let p: PathBuf = url.path().into();
|
||||
(p, Some(cloud_options))
|
||||
} else {
|
||||
(PathBuf::from(&spanned_path.item), None)
|
||||
let new_path = expand_path_with(path, engine.get_current_dir()?, true);
|
||||
path = new_path.to_string_lossy().to_string();
|
||||
(new_path, None)
|
||||
};
|
||||
let extension = path_buf
|
||||
.extension()
|
||||
.and_then(|s| s.to_str().map(|s| s.to_string()));
|
||||
Ok(Self {
|
||||
path: spanned_path.item.clone(),
|
||||
path,
|
||||
extension,
|
||||
cloud_options,
|
||||
span: spanned_path.span,
|
||||
@ -155,12 +177,14 @@ fn command(
|
||||
call: &nu_plugin::EvaluatedCall,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let spanned_file: Spanned<String> = call.req(0)?;
|
||||
debug!("file: {}", spanned_file.item);
|
||||
|
||||
let resource = Resource::new(plugin, &spanned_file)?;
|
||||
let resource = Resource::new(plugin, engine, &spanned_file)?;
|
||||
let type_option: Option<(String, Span)> = call
|
||||
.get_flag("type")?
|
||||
.map(|t: Spanned<String>| (t.item, t.span))
|
||||
.or_else(|| resource.extension.clone().map(|e| (e, resource.span)));
|
||||
debug!("resource: {resource:?}");
|
||||
|
||||
let is_eager = call.has_flag("eager")?;
|
||||
|
||||
@ -215,12 +239,11 @@ fn from_parquet(
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
if !is_eager {
|
||||
let file: String = call.req(0)?;
|
||||
let args = ScanArgsParquet {
|
||||
cloud_options: resource.cloud_options.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file_path, args)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Parquet reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
@ -311,7 +334,6 @@ fn from_arrow(
|
||||
let file_path = resource.path;
|
||||
let file_span = resource.span;
|
||||
if !is_eager {
|
||||
let file: String = call.req(0)?;
|
||||
let args = ScanArgsIpc {
|
||||
n_rows: None,
|
||||
cache: true,
|
||||
@ -322,7 +344,7 @@ fn from_arrow(
|
||||
hive_options: HiveOptions::default(),
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
|
||||
let df: NuLazyFrame = LazyFrame::scan_ipc(file_path, args)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "IPC reader error".into(),
|
||||
msg: format!("{e:?}"),
|
||||
|
Loading…
Reference in New Issue
Block a user