From bcaef8959c012f60c1a462cf747514adfea3a526 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Fri, 27 Sep 2024 04:54:46 -0700 Subject: [PATCH] Expose flag truncate-ragged-lines in `polars open` (#13939) # Description Introduces a new flag `--truncate-ragged-lines` for `polars open` that will truncate lines that are longer than the schema. # User-Facing Changes - Introduction of the flag `--truncate-ragged-lines` for `polars open` --- .../src/dataframe/command/core/open.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/core/open.rs b/crates/nu_plugin_polars/src/dataframe/command/core/open.rs index a897114999..1aae5f26e2 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/core/open.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/core/open.rs @@ -97,6 +97,7 @@ impl PluginCommand for OpenDataFrame { r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, Some('s') ) + .switch("truncate-ragged-lines", "Truncate lines that are longer than the schema. CSV file", None) .input_output_type(Type::Any, Type::Custom("dataframe".into())) .category(Category::Custom("dataframe".into())) } @@ -466,11 +467,11 @@ fn from_csv( .unwrap_or(DEFAULT_INFER_SCHEMA); let skip_rows: Option = call.get_flag("skip-rows")?; let columns: Option> = call.get_flag("columns")?; - let maybe_schema = call .get_flag("schema")? .map(|schema| NuSchema::try_from(&schema)) .transpose()?; + let truncate_ragged_lines: bool = call.has_flag("truncate-ragged-lines")?; if !call.has_flag("eager")? { let csv_reader = LazyCsvReader::new(file_path); @@ -496,14 +497,11 @@ fn from_csv( } }; - let csv_reader = csv_reader.with_has_header(!no_header); - - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; - - let csv_reader = csv_reader.with_infer_schema_length(Some(infer_schema)); + let csv_reader = csv_reader + .with_has_header(!no_header) + .with_infer_schema_length(Some(infer_schema)) + .with_schema(maybe_schema.map(Into::into)) + .with_truncate_ragged_lines(truncate_ragged_lines); let csv_reader = match skip_rows { None => csv_reader, @@ -542,6 +540,7 @@ fn from_csv( .unwrap_or(b','), ) .with_encoding(CsvEncoding::LossyUtf8) + .with_truncate_ragged_lines(truncate_ragged_lines) }) .try_into_reader_with_file_path(Some(file_path.to_path_buf())) .map_err(|e| ShellError::GenericError {