Added the ability to open json lines dataframes with polars lazy json lines reader. (#13167)

The `--lazy` flag will now use the polars' LazyJsonLinesReader when
opening a json lines file with `polars open`
This commit is contained in:
Jack Wright 2024-06-20 10:55:49 -07:00 committed by GitHub
parent c09a8a5ec9
commit 7d2d573eb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -19,9 +19,12 @@ use std::{
sync::Arc, sync::Arc,
}; };
use polars::prelude::{ use polars::{
CsvEncoding, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame, lazy::frame::LazyJsonLineReader,
ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, prelude::{
CsvEncoding, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
},
}; };
use polars_io::{ use polars_io::{
@ -375,36 +378,51 @@ fn from_jsonl(
.get_flag("schema")? .get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema)) .map(|schema| NuSchema::try_from(&schema))
.transpose()?; .transpose()?;
let file = File::open(file_path).map_err(|e| ShellError::GenericError { if call.has_flag("lazy")? {
error: "Error opening file".into(), let df = LazyJsonLineReader::new(file_path)
msg: e.to_string(), .with_infer_schema_length(infer_schema)
span: Some(file_span), .with_schema(maybe_schema.map(|s| s.into()))
help: None, .finish()
inner: vec![], .map_err(|e| ShellError::GenericError {
})?; error: format!("Json lines reader error: {e}"),
msg: "".into(),
let buf_reader = BufReader::new(file); span: Some(call.head),
let reader = JsonReader::new(buf_reader) help: None,
.with_json_format(JsonFormat::JsonLines) inner: vec![],
.infer_schema_len(infer_schema); })?;
let df = NuLazyFrame::new(false, df);
let reader = match maybe_schema { df.cache_and_to_value(plugin, engine, call.head)
Some(schema) => reader.with_schema(schema.into()), } else {
None => reader, let file = File::open(file_path).map_err(|e| ShellError::GenericError {
}; error: "Error opening file".into(),
msg: e.to_string(),
let df: NuDataFrame = reader span: Some(file_span),
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json lines reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None, help: None,
inner: vec![], inner: vec![],
})? })?;
.into(); let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader)
.with_json_format(JsonFormat::JsonLines)
.infer_schema_len(infer_schema);
df.cache_and_to_value(plugin, engine, call.head) let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json lines reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head)
}
} }
fn from_csv( fn from_csv(