use std::path::PathBuf; use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value, }; use nu_source::Tagged; use polars::prelude::{CsvReader, JsonReader, ParquetReader, SerReader}; use std::fs::File; pub struct DataFrame; impl WholeStreamCommand for DataFrame { fn name(&self) -> &str { "dataframe open" } fn usage(&self) -> &str { "Opens csv, json or parquet file to create dataframe" } fn signature(&self) -> Signature { Signature::build("dataframe open") .required( "file", SyntaxShape::FilePath, "file path to load values from", ) .named( "delimiter", SyntaxShape::String, "file delimiter character. CSV file", Some('d'), ) .switch( "no_header", "Indicates if file doesn't have header. CSV file", None, ) .named( "infer_schema", SyntaxShape::Number, "Set number of rows to infer the schema of the file. CSV file", None, ) .named( "skip_rows", SyntaxShape::Number, "Number of rows to skip from file. CSV file", None, ) .named( "columns", SyntaxShape::Table, "Columns to be selected from csv file. CSV file", None, ) } fn run(&self, args: CommandArgs) -> Result { command(args) } fn examples(&self) -> Vec { vec![Example { description: "Takes a file name and creates a dataframe", example: "dataframe open test.csv", result: None, }] } } fn command(args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let file: Tagged = args.req(0)?; let df = match file.item().extension() { Some(e) => match e.to_str() { Some("csv") => from_csv(args), Some("parquet") => from_parquet(args), Some("json") => from_json(args), _ => Err(ShellError::labeled_error( "Error with file", "Not a csv, parquet or json file", &file.tag, )), }, None => Err(ShellError::labeled_error( "Error with file", "File without extension", &file.tag, )), }?; let file_name = match file.item.into_os_string().into_string() { Ok(name) => name, Err(e) => { return Err(ShellError::labeled_error( "File Name Error", format!("{:?}", e), &file.tag, )) } }; let df_tag = Tag { anchor: Some(AnchorLocation::File(file_name)), span: tag.span, }; Ok(OutputStream::one(NuDataFrame::dataframe_to_value( df, df_tag, ))) } fn from_parquet(args: CommandArgs) -> Result { let file: Tagged = args.req(0)?; let r = File::open(&file.item) .map_err(|e| ShellError::labeled_error("Error with file", format!("{:?}", e), &file.tag))?; let reader = ParquetReader::new(r); reader .finish() .map_err(|e| parse_polars_error::<&str>(&e, &file.tag.span, None)) } fn from_json(args: CommandArgs) -> Result { let file: Tagged = args.req(0)?; let r = File::open(&file.item) .map_err(|e| ShellError::labeled_error("Error with file", format!("{:?}", e), &file.tag))?; let reader = JsonReader::new(r); reader .finish() .map_err(|e| parse_polars_error::<&str>(&e, &file.tag.span, None)) } fn from_csv(args: CommandArgs) -> Result { let file: Tagged = args.req(0)?; let delimiter: Option> = args.get_flag("delimiter")?; let no_header: bool = args.has_flag("no_header"); let infer_schema: Option> = args.get_flag("infer_schema")?; let skip_rows: Option> = args.get_flag("skip_rows")?; let columns: Option> = args.get_flag("columns")?; let csv_reader = CsvReader::from_path(&file.item) .map_err(|e| parse_polars_error::<&str>(&e, &file.tag.span, None))?; let csv_reader = match delimiter { None => csv_reader, Some(d) => { if d.item.len() != 1 { return Err(ShellError::labeled_error( "Incorrect delimiter", "Delimiter has to be one char", &d.tag, )); } else { let delimiter = match d.item.chars().next() { Some(d) => d as u8, None => unreachable!(), }; csv_reader.with_delimiter(delimiter) } } }; let csv_reader = csv_reader.has_header(!no_header); let csv_reader = match infer_schema { None => csv_reader, Some(r) => csv_reader.infer_schema(Some(r.item)), }; let csv_reader = match skip_rows { None => csv_reader, Some(r) => csv_reader.with_skip_rows(r.item), }; let csv_reader = match columns { None => csv_reader, Some(c) => { let columns = c .into_iter() .map(|value| match value.value { UntaggedValue::Primitive(Primitive::String(s)) => Ok(s), _ => Err(ShellError::labeled_error( "Incorrect type for column", "Only string as columns", &value.tag, )), }) .collect::, ShellError>>(); csv_reader.with_columns(Some(columns?)) } }; match csv_reader.finish() { Ok(df) => Ok(df), Err(e) => Err(parse_polars_error::<&str>(&e, &file.tag.span, None)), } }