mirror of
https://github.com/nushell/nushell.git
synced 2025-01-23 06:39:17 +01:00
into sqlite: Fix insertion of null values (#12328)
# Description In #10232, the allowed input types were changed to be stricter, only allowing records with types that can easily map onto sqlite equivalents. Unfortunately, null was left out of the accepted input types, which makes inserting rows with null values impossible. This change fixes that by accepting null values as input. One caveat of this is that when the command is creating a new table, it uses the first row to infer an appropriate sqlite schema. If the first row contains a null value, then it is impossible to tell which type this column is supposed to have. Throwing a hard error seems undesirable from a UX perspective, but guessing can lead to a potentially useless database if we guess wrong. So as a compromise, for null columns, we will assume the sqlite type is TEXT and print a warning so the user knows. For the time being, if users can't avoid a first row with null values, but also wants the right schema, they are advised to create their table before running `into sqlite`. A future PR can add the ability to explicitly specify a schema. Fixes #12225 # Tests + Formatting * Tests added to cover expected behavior around insertion of null values
This commit is contained in:
parent
3857e368ff
commit
cf923fc44c
@ -115,31 +115,56 @@ impl Table {
|
||||
&mut self,
|
||||
record: &Record,
|
||||
) -> Result<rusqlite::Transaction, nu_protocol::ShellError> {
|
||||
let first_row_null = record.values().any(Value::is_nothing);
|
||||
let columns = get_columns_with_sqlite_types(record)?;
|
||||
|
||||
// create a string for sql table creation
|
||||
let create_statement = format!(
|
||||
"CREATE TABLE IF NOT EXISTS [{}] ({})",
|
||||
self.table_name,
|
||||
columns
|
||||
.into_iter()
|
||||
.map(|(col_name, sql_type)| format!("{col_name} {sql_type}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
let table_exists_query = format!(
|
||||
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{}';",
|
||||
self.name(),
|
||||
);
|
||||
|
||||
// execute the statement
|
||||
self.conn.execute(&create_statement, []).map_err(|err| {
|
||||
eprintln!("{:?}", err);
|
||||
|
||||
ShellError::GenericError {
|
||||
error: "Failed to create table".into(),
|
||||
msg: err.to_string(),
|
||||
let table_count: u64 = self
|
||||
.conn
|
||||
.query_row(&table_exists_query, [], |row| row.get(0))
|
||||
.map_err(|err| ShellError::GenericError {
|
||||
error: format!("{:#?}", err),
|
||||
msg: format!("{:#?}", err),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: Vec::new(),
|
||||
})?;
|
||||
|
||||
if table_count == 0 {
|
||||
if first_row_null {
|
||||
eprintln!(
|
||||
"Warning: The first row contains a null value, which has an \
|
||||
unknown SQL type. Null values will be assumed to be TEXT columns. \
|
||||
If this is undesirable, you can create the table first with your desired schema."
|
||||
);
|
||||
}
|
||||
})?;
|
||||
|
||||
// create a string for sql table creation
|
||||
let create_statement = format!(
|
||||
"CREATE TABLE [{}] ({})",
|
||||
self.table_name,
|
||||
columns
|
||||
.into_iter()
|
||||
.map(|(col_name, sql_type)| format!("{col_name} {sql_type}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
);
|
||||
|
||||
// execute the statement
|
||||
self.conn
|
||||
.execute(&create_statement, [])
|
||||
.map_err(|err| ShellError::GenericError {
|
||||
error: "Failed to create table".into(),
|
||||
msg: err.to_string(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: Vec::new(),
|
||||
})?;
|
||||
}
|
||||
|
||||
self.conn
|
||||
.transaction()
|
||||
@ -209,7 +234,7 @@ fn insert_in_transaction(
|
||||
let mut stream = stream.peekable();
|
||||
let first_val = match stream.peek() {
|
||||
None => return Ok(Value::nothing(span)),
|
||||
Some(val) => val.as_record()?,
|
||||
Some(val) => val.as_record()?.clone(),
|
||||
};
|
||||
|
||||
if first_val.is_empty() {
|
||||
@ -223,7 +248,7 @@ fn insert_in_transaction(
|
||||
}
|
||||
|
||||
let table_name = table.name().clone();
|
||||
let tx = table.try_init(first_val)?;
|
||||
let tx = table.try_init(&first_val)?;
|
||||
|
||||
for stream_value in stream {
|
||||
if let Some(ref ctrlc) = ctrl_c {
|
||||
@ -332,6 +357,11 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> {
|
||||
Type::Duration => Ok("BIGINT"),
|
||||
Type::Filesize => Ok("INTEGER"),
|
||||
|
||||
// [NOTE] On null values, we just assume TEXT. This could end up
|
||||
// creating a table where the column type is wrong in the table schema.
|
||||
// This means the table could end up with the wrong schema.
|
||||
Type::Nothing => Ok("TEXT"),
|
||||
|
||||
// intentionally enumerated so that any future types get handled
|
||||
Type::Any
|
||||
| Type::Block
|
||||
@ -341,7 +371,6 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> {
|
||||
| Type::Error
|
||||
| Type::List(_)
|
||||
| Type::ListStream
|
||||
| Type::Nothing
|
||||
| Type::Range
|
||||
| Type::Record(_)
|
||||
| Type::Signature
|
||||
|
@ -458,8 +458,7 @@ pub fn value_to_sql(value: Value) -> Result<Box<dyn rusqlite::ToSql>, ShellError
|
||||
Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned())
|
||||
}
|
||||
Value::Binary { val, .. } => Box::new(val),
|
||||
Value::Nothing { .. } => Box::new(None::<String>),
|
||||
|
||||
Value::Nothing { .. } => Box::new(rusqlite::types::Null),
|
||||
val => {
|
||||
return Err(ShellError::OnlySupportsThisInputType {
|
||||
exp_input_type:
|
||||
|
@ -60,9 +60,9 @@ fn into_sqlite_values() {
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary];
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)],
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)],
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
|
||||
]"#,
|
||||
None,
|
||||
vec![
|
||||
@ -75,6 +75,7 @@ fn into_sqlite_values() {
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Integer(1),
|
||||
),
|
||||
TestRow(
|
||||
false,
|
||||
@ -85,6 +86,146 @@ fn into_sqlite_values() {
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"bar".into(),
|
||||
b"wut".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/// When we create a new table, we use the first row to infer the schema of the
|
||||
/// table. In the event that a column is null, we can't know what type the row
|
||||
/// should be, so we just assume TEXT.
|
||||
#[test]
|
||||
fn into_sqlite_values_first_column_null() {
|
||||
Playground::setup("values", |dirs, _| {
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
|
||||
]"#,
|
||||
None,
|
||||
vec![
|
||||
TestRow(
|
||||
false,
|
||||
2,
|
||||
3.0,
|
||||
2000000,
|
||||
2419200000000000,
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"bar".into(),
|
||||
b"wut".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
TestRow(
|
||||
true,
|
||||
1,
|
||||
2.0,
|
||||
1000,
|
||||
1000000000,
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Text("1".into()),
|
||||
),
|
||||
],
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/// If the DB / table already exist, then the insert should end up with the
|
||||
/// right data types no matter if the first row is null or not.
|
||||
#[test]
|
||||
fn into_sqlite_values_first_column_null_preexisting_db() {
|
||||
Playground::setup("values", |dirs, _| {
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
|
||||
]"#,
|
||||
None,
|
||||
vec![
|
||||
TestRow(
|
||||
true,
|
||||
1,
|
||||
2.0,
|
||||
1000,
|
||||
1000000000,
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Integer(1),
|
||||
),
|
||||
TestRow(
|
||||
false,
|
||||
2,
|
||||
3.0,
|
||||
2000000,
|
||||
2419200000000000,
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"bar".into(),
|
||||
b"wut".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
],
|
||||
);
|
||||
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), null],
|
||||
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), 3],
|
||||
]"#,
|
||||
None,
|
||||
vec![
|
||||
TestRow(
|
||||
true,
|
||||
1,
|
||||
2.0,
|
||||
1000,
|
||||
1000000000,
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Integer(1),
|
||||
),
|
||||
TestRow(
|
||||
false,
|
||||
2,
|
||||
3.0,
|
||||
2000000,
|
||||
2419200000000000,
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"bar".into(),
|
||||
b"wut".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
TestRow(
|
||||
true,
|
||||
3,
|
||||
5.0,
|
||||
3100000,
|
||||
604800000000000,
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"baz".into(),
|
||||
b"huh".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
TestRow(
|
||||
true,
|
||||
3,
|
||||
5.0,
|
||||
3100000,
|
||||
604800000000000,
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"baz".into(),
|
||||
b"huh".to_vec(),
|
||||
rusqlite::types::Value::Integer(3),
|
||||
),
|
||||
],
|
||||
);
|
||||
@ -99,8 +240,8 @@ fn into_sqlite_existing_db_append() {
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary];
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)],
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), null],
|
||||
]"#,
|
||||
None,
|
||||
vec![TestRow(
|
||||
@ -112,6 +253,7 @@ fn into_sqlite_existing_db_append() {
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
)],
|
||||
);
|
||||
|
||||
@ -119,8 +261,8 @@ fn into_sqlite_existing_db_append() {
|
||||
insert_test_rows(
|
||||
&dirs,
|
||||
r#"[
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary];
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)],
|
||||
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
|
||||
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
|
||||
]"#,
|
||||
None,
|
||||
// it should have both rows
|
||||
@ -134,6 +276,7 @@ fn into_sqlite_existing_db_append() {
|
||||
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
|
||||
"foo".into(),
|
||||
b"binary".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
TestRow(
|
||||
false,
|
||||
@ -144,6 +287,7 @@ fn into_sqlite_existing_db_append() {
|
||||
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
|
||||
"bar".into(),
|
||||
b"wut".to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
),
|
||||
],
|
||||
);
|
||||
@ -223,6 +367,7 @@ struct TestRow(
|
||||
chrono::DateTime<chrono::FixedOffset>,
|
||||
std::string::String,
|
||||
std::vec::Vec<u8>,
|
||||
rusqlite::types::Value,
|
||||
);
|
||||
|
||||
impl TestRow {
|
||||
@ -243,6 +388,7 @@ impl From<TestRow> for Value {
|
||||
"somedate" => Value::date(row.5, Span::unknown()),
|
||||
"somestring" => Value::string(row.6, Span::unknown()),
|
||||
"somebinary" => Value::binary(row.7, Span::unknown()),
|
||||
"somenull" => Value::nothing(Span::unknown()),
|
||||
},
|
||||
Span::unknown(),
|
||||
)
|
||||
@ -261,6 +407,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow {
|
||||
let somedate: DateTime<FixedOffset> = row.get("somedate").unwrap();
|
||||
let somestring: String = row.get("somestring").unwrap();
|
||||
let somebinary: Vec<u8> = row.get("somebinary").unwrap();
|
||||
let somenull: rusqlite::types::Value = row.get("somenull").unwrap();
|
||||
|
||||
Ok(TestRow(
|
||||
somebool,
|
||||
@ -271,6 +418,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow {
|
||||
somedate,
|
||||
somestring,
|
||||
somebinary,
|
||||
somenull,
|
||||
))
|
||||
}
|
||||
}
|
||||
@ -300,6 +448,7 @@ impl Distribution<TestRow> for Standard {
|
||||
dt,
|
||||
rand_string,
|
||||
rng.gen::<u64>().to_be_bytes().to_vec(),
|
||||
rusqlite::types::Value::Null,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user