From cf923fc44c202d965e2a1436b6552eb5a9bf7631 Mon Sep 17 00:00:00 2001 From: Skyler Hawthorne Date: Fri, 29 Mar 2024 07:41:16 -0400 Subject: [PATCH] into sqlite: Fix insertion of null values (#12328) # Description In #10232, the allowed input types were changed to be stricter, only allowing records with types that can easily map onto sqlite equivalents. Unfortunately, null was left out of the accepted input types, which makes inserting rows with null values impossible. This change fixes that by accepting null values as input. One caveat of this is that when the command is creating a new table, it uses the first row to infer an appropriate sqlite schema. If the first row contains a null value, then it is impossible to tell which type this column is supposed to have. Throwing a hard error seems undesirable from a UX perspective, but guessing can lead to a potentially useless database if we guess wrong. So as a compromise, for null columns, we will assume the sqlite type is TEXT and print a warning so the user knows. For the time being, if users can't avoid a first row with null values, but also wants the right schema, they are advised to create their table before running `into sqlite`. A future PR can add the ability to explicitly specify a schema. Fixes #12225 # Tests + Formatting * Tests added to cover expected behavior around insertion of null values --- .../src/database/commands/into_sqlite.rs | 69 +++++--- .../nu-command/src/database/values/sqlite.rs | 3 +- .../tests/commands/database/into_sqlite.rs | 163 +++++++++++++++++- 3 files changed, 206 insertions(+), 29 deletions(-) diff --git a/crates/nu-command/src/database/commands/into_sqlite.rs b/crates/nu-command/src/database/commands/into_sqlite.rs index bd72993919..730bf92b78 100644 --- a/crates/nu-command/src/database/commands/into_sqlite.rs +++ b/crates/nu-command/src/database/commands/into_sqlite.rs @@ -115,31 +115,56 @@ impl Table { &mut self, record: &Record, ) -> Result { + let first_row_null = record.values().any(Value::is_nothing); let columns = get_columns_with_sqlite_types(record)?; - // create a string for sql table creation - let create_statement = format!( - "CREATE TABLE IF NOT EXISTS [{}] ({})", - self.table_name, - columns - .into_iter() - .map(|(col_name, sql_type)| format!("{col_name} {sql_type}")) - .collect::>() - .join(", ") + let table_exists_query = format!( + "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{}';", + self.name(), ); - // execute the statement - self.conn.execute(&create_statement, []).map_err(|err| { - eprintln!("{:?}", err); - - ShellError::GenericError { - error: "Failed to create table".into(), - msg: err.to_string(), + let table_count: u64 = self + .conn + .query_row(&table_exists_query, [], |row| row.get(0)) + .map_err(|err| ShellError::GenericError { + error: format!("{:#?}", err), + msg: format!("{:#?}", err), span: None, help: None, inner: Vec::new(), + })?; + + if table_count == 0 { + if first_row_null { + eprintln!( + "Warning: The first row contains a null value, which has an \ +unknown SQL type. Null values will be assumed to be TEXT columns. \ +If this is undesirable, you can create the table first with your desired schema." + ); } - })?; + + // create a string for sql table creation + let create_statement = format!( + "CREATE TABLE [{}] ({})", + self.table_name, + columns + .into_iter() + .map(|(col_name, sql_type)| format!("{col_name} {sql_type}")) + .collect::>() + .join(", ") + ); + + // execute the statement + self.conn + .execute(&create_statement, []) + .map_err(|err| ShellError::GenericError { + error: "Failed to create table".into(), + msg: err.to_string(), + span: None, + help: None, + inner: Vec::new(), + })?; + } self.conn .transaction() @@ -209,7 +234,7 @@ fn insert_in_transaction( let mut stream = stream.peekable(); let first_val = match stream.peek() { None => return Ok(Value::nothing(span)), - Some(val) => val.as_record()?, + Some(val) => val.as_record()?.clone(), }; if first_val.is_empty() { @@ -223,7 +248,7 @@ fn insert_in_transaction( } let table_name = table.name().clone(); - let tx = table.try_init(first_val)?; + let tx = table.try_init(&first_val)?; for stream_value in stream { if let Some(ref ctrlc) = ctrl_c { @@ -332,6 +357,11 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> { Type::Duration => Ok("BIGINT"), Type::Filesize => Ok("INTEGER"), + // [NOTE] On null values, we just assume TEXT. This could end up + // creating a table where the column type is wrong in the table schema. + // This means the table could end up with the wrong schema. + Type::Nothing => Ok("TEXT"), + // intentionally enumerated so that any future types get handled Type::Any | Type::Block @@ -341,7 +371,6 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> { | Type::Error | Type::List(_) | Type::ListStream - | Type::Nothing | Type::Range | Type::Record(_) | Type::Signature diff --git a/crates/nu-command/src/database/values/sqlite.rs b/crates/nu-command/src/database/values/sqlite.rs index a25cc22140..d865f4a26a 100644 --- a/crates/nu-command/src/database/values/sqlite.rs +++ b/crates/nu-command/src/database/values/sqlite.rs @@ -458,8 +458,7 @@ pub fn value_to_sql(value: Value) -> Result, ShellError Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned()) } Value::Binary { val, .. } => Box::new(val), - Value::Nothing { .. } => Box::new(None::), - + Value::Nothing { .. } => Box::new(rusqlite::types::Null), val => { return Err(ShellError::OnlySupportsThisInputType { exp_input_type: diff --git a/crates/nu-command/tests/commands/database/into_sqlite.rs b/crates/nu-command/tests/commands/database/into_sqlite.rs index 7b041c947a..e70e5b5528 100644 --- a/crates/nu-command/tests/commands/database/into_sqlite.rs +++ b/crates/nu-command/tests/commands/database/into_sqlite.rs @@ -60,9 +60,9 @@ fn into_sqlite_values() { insert_test_rows( &dirs, r#"[ - [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; - [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)], - [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)], + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1], + [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null], ]"#, None, vec![ @@ -75,6 +75,7 @@ fn into_sqlite_values() { DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), "foo".into(), b"binary".to_vec(), + rusqlite::types::Value::Integer(1), ), TestRow( false, @@ -85,6 +86,146 @@ fn into_sqlite_values() { DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), "bar".into(), b"wut".to_vec(), + rusqlite::types::Value::Null, + ), + ], + ); + }); +} + +/// When we create a new table, we use the first row to infer the schema of the +/// table. In the event that a column is null, we can't know what type the row +/// should be, so we just assume TEXT. +#[test] +fn into_sqlite_values_first_column_null() { + Playground::setup("values", |dirs, _| { + insert_test_rows( + &dirs, + r#"[ + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null], + [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1], + ]"#, + None, + vec![ + TestRow( + false, + 2, + 3.0, + 2000000, + 2419200000000000, + DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), + "bar".into(), + b"wut".to_vec(), + rusqlite::types::Value::Null, + ), + TestRow( + true, + 1, + 2.0, + 1000, + 1000000000, + DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), + "foo".into(), + b"binary".to_vec(), + rusqlite::types::Value::Text("1".into()), + ), + ], + ); + }); +} + +/// If the DB / table already exist, then the insert should end up with the +/// right data types no matter if the first row is null or not. +#[test] +fn into_sqlite_values_first_column_null_preexisting_db() { + Playground::setup("values", |dirs, _| { + insert_test_rows( + &dirs, + r#"[ + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1], + [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null], + ]"#, + None, + vec![ + TestRow( + true, + 1, + 2.0, + 1000, + 1000000000, + DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), + "foo".into(), + b"binary".to_vec(), + rusqlite::types::Value::Integer(1), + ), + TestRow( + false, + 2, + 3.0, + 2000000, + 2419200000000000, + DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), + "bar".into(), + b"wut".to_vec(), + rusqlite::types::Value::Null, + ), + ], + ); + + insert_test_rows( + &dirs, + r#"[ + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), null], + [true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), 3], + ]"#, + None, + vec![ + TestRow( + true, + 1, + 2.0, + 1000, + 1000000000, + DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), + "foo".into(), + b"binary".to_vec(), + rusqlite::types::Value::Integer(1), + ), + TestRow( + false, + 2, + 3.0, + 2000000, + 2419200000000000, + DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), + "bar".into(), + b"wut".to_vec(), + rusqlite::types::Value::Null, + ), + TestRow( + true, + 3, + 5.0, + 3100000, + 604800000000000, + DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), + "baz".into(), + b"huh".to_vec(), + rusqlite::types::Value::Null, + ), + TestRow( + true, + 3, + 5.0, + 3100000, + 604800000000000, + DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), + "baz".into(), + b"huh".to_vec(), + rusqlite::types::Value::Integer(3), ), ], ); @@ -99,8 +240,8 @@ fn into_sqlite_existing_db_append() { insert_test_rows( &dirs, r#"[ - [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; - [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)], + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), null], ]"#, None, vec![TestRow( @@ -112,6 +253,7 @@ fn into_sqlite_existing_db_append() { DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), "foo".into(), b"binary".to_vec(), + rusqlite::types::Value::Null, )], ); @@ -119,8 +261,8 @@ fn into_sqlite_existing_db_append() { insert_test_rows( &dirs, r#"[ - [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; - [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)], + [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull]; + [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null], ]"#, None, // it should have both rows @@ -134,6 +276,7 @@ fn into_sqlite_existing_db_append() { DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), "foo".into(), b"binary".to_vec(), + rusqlite::types::Value::Null, ), TestRow( false, @@ -144,6 +287,7 @@ fn into_sqlite_existing_db_append() { DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), "bar".into(), b"wut".to_vec(), + rusqlite::types::Value::Null, ), ], ); @@ -223,6 +367,7 @@ struct TestRow( chrono::DateTime, std::string::String, std::vec::Vec, + rusqlite::types::Value, ); impl TestRow { @@ -243,6 +388,7 @@ impl From for Value { "somedate" => Value::date(row.5, Span::unknown()), "somestring" => Value::string(row.6, Span::unknown()), "somebinary" => Value::binary(row.7, Span::unknown()), + "somenull" => Value::nothing(Span::unknown()), }, Span::unknown(), ) @@ -261,6 +407,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow { let somedate: DateTime = row.get("somedate").unwrap(); let somestring: String = row.get("somestring").unwrap(); let somebinary: Vec = row.get("somebinary").unwrap(); + let somenull: rusqlite::types::Value = row.get("somenull").unwrap(); Ok(TestRow( somebool, @@ -271,6 +418,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow { somedate, somestring, somebinary, + somenull, )) } } @@ -300,6 +448,7 @@ impl Distribution for Standard { dt, rand_string, rng.gen::().to_be_bytes().to_vec(), + rusqlite::types::Value::Null, ) } }