fix #14398: truly flexible csv/tsv parsing

This commit is contained in:
Bahex 2024-11-20 14:29:51 +03:00
parent 1e7840c376
commit 9f6d6abdc4

View File

@ -39,12 +39,7 @@ fn from_delimited_stream(
.from_reader(input_reader); .from_reader(input_reader);
let headers = if noheaders { let headers = if noheaders {
(0..reader vec![]
.headers()
.map_err(|err| from_csv_error(err, span))?
.len())
.map(|i| format!("column{i}"))
.collect::<Vec<String>>()
} else { } else {
reader reader
.headers() .headers()
@ -54,32 +49,28 @@ fn from_delimited_stream(
.collect() .collect()
}; };
let n = headers.len();
let columns = headers
.into_iter()
.chain((n..).map(|i| format!("column{i}")));
let iter = reader.into_records().map(move |row| { let iter = reader.into_records().map(move |row| {
let row = match row { let row = match row {
Ok(row) => row, Ok(row) => row,
Err(err) => return Value::error(from_csv_error(err, span), span), Err(err) => return Value::error(from_csv_error(err, span), span),
}; };
let columns = headers.iter().cloned(); let columns = columns.clone();
let values = row let values = row.into_iter().map(|s| {
.into_iter() if no_infer {
.map(|s| { Value::string(s, span)
if no_infer { } else if let Ok(i) = s.parse() {
Value::string(s, span) Value::int(i, span)
} else if let Ok(i) = s.parse() { } else if let Ok(f) = s.parse() {
Value::int(i, span) Value::float(f, span)
} else if let Ok(f) = s.parse() { } else {
Value::float(f, span) Value::string(s, span)
} else { }
Value::string(s, span) });
}
})
.chain(std::iter::repeat(Value::nothing(span)));
// If there are more values than the number of headers,
// then the remaining values are ignored.
//
// Otherwise, if there are less values than headers,
// then `Value::nothing(span)` is used to fill the remaining columns.
Value::record(columns.zip(values).collect(), span) Value::record(columns.zip(values).collect(), span)
}); });