Added the ability to turn on performance debugging through and env var for the polars plugin (#13191)

This allows performance debugging to be turned on by setting:

```nushell
$env.POLARS_PLUGIN_PERF = "true"
```

Furthermore, this improves the other plugin debugging by allowing the
env variable for debugging to be set at any time versus having to be
available when nushell is launched:

```nushell
$env.POLARS_PLUGIN_DEBUG = "true"
```

This plugin introduces a `perf` function that will output timing
results. This works very similar to the perf function available in
nu_utils::utils::perf. This version prints everything to std error to
not break the plugin stream and uses the engine interface to see if the
env variable is configured.

This pull requests uses this `perf` function when:
* opening csv files as dataframes
* opening json lines files as dataframes

This will hopefully help provide some more fine grained information on
how long it takes polars to open different dataframes. The `perf` can
also be utilized later for other dataframes use cases.
This commit is contained in:
Jack Wright
2024-06-20 16:37:38 -07:00
committed by GitHub
parent 7d2d573eb8
commit 20834c9d47
4 changed files with 154 additions and 29 deletions

View File

@ -1,5 +1,6 @@
use crate::{
dataframe::values::NuSchema,
perf,
values::{CustomValueSupport, NuLazyFrame},
PolarsPlugin,
};
@ -378,7 +379,10 @@ fn from_jsonl(
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if call.has_flag("lazy")? {
let start_time = std::time::Instant::now();
let df = LazyJsonLineReader::new(file_path)
.with_infer_schema_length(infer_schema)
.with_schema(maybe_schema.map(|s| s.into()))
@ -390,6 +394,16 @@ fn from_jsonl(
help: None,
inner: vec![],
})?;
perf(
engine,
"Lazy json lines dataframe open",
start_time,
file!(),
line!(),
column!(),
);
let df = NuLazyFrame::new(false, df);
df.cache_and_to_value(plugin, engine, call.head)
} else {
@ -410,6 +424,8 @@ fn from_jsonl(
None => reader,
};
let start_time = std::time::Instant::now();
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
@ -421,6 +437,15 @@ fn from_jsonl(
})?
.into();
perf(
engine,
"Eager json lines dataframe open",
start_time,
file!(),
line!(),
column!(),
);
df.cache_and_to_value(plugin, engine, call.head)
}
}
@ -484,6 +509,7 @@ fn from_csv(
Some(r) => csv_reader.with_skip_rows(r),
};
let start_time = std::time::Instant::now();
let df: NuLazyFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
@ -495,8 +521,18 @@ fn from_csv(
})?
.into();
perf(
engine,
"Lazy CSV dataframe open",
start_time,
file!(),
line!(),
column!(),
);
df.cache_and_to_value(plugin, engine, call.head)
} else {
let start_time = std::time::Instant::now();
let df = CsvReadOptions::default()
.with_has_header(!no_header)
.with_infer_schema_length(infer_schema)
@ -529,6 +565,16 @@ fn from_csv(
help: None,
inner: vec![],
})?;
perf(
engine,
"Eager CSV dataframe open",
start_time,
file!(),
line!(),
column!(),
);
let df = NuDataFrame::new(false, df);
df.cache_and_to_value(plugin, engine, call.head)
}