into sqlite: Fix insertion of null values (#12328)

# Description

In #10232, the allowed input types were changed to be stricter, only
allowing records with types that can easily map onto sqlite equivalents.
Unfortunately, null was left out of the accepted input types, which
makes inserting rows with null values impossible.

This change fixes that by accepting null values as input.

One caveat of this is that when the command is creating a new table, it
uses the first row to infer an appropriate sqlite schema. If the first
row contains a null value, then it is impossible to tell which type this
column is supposed to have.

Throwing a hard error seems undesirable from a UX perspective, but
guessing can lead to a potentially useless database if we guess wrong.

So as a compromise, for null columns, we will assume the sqlite type is
TEXT and print a warning so the user knows. For the time being, if users
can't avoid a first row with null values, but also wants the right
schema, they are advised to create their table before running `into
sqlite`.

A future PR can add the ability to explicitly specify a schema.

Fixes #12225

# Tests + Formatting

* Tests added to cover expected behavior around insertion of null values
This commit is contained in:
Skyler Hawthorne 2024-03-29 07:41:16 -04:00 committed by GitHub
parent 3857e368ff
commit cf923fc44c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 206 additions and 29 deletions

View File

@ -115,31 +115,56 @@ impl Table {
&mut self, &mut self,
record: &Record, record: &Record,
) -> Result<rusqlite::Transaction, nu_protocol::ShellError> { ) -> Result<rusqlite::Transaction, nu_protocol::ShellError> {
let first_row_null = record.values().any(Value::is_nothing);
let columns = get_columns_with_sqlite_types(record)?; let columns = get_columns_with_sqlite_types(record)?;
// create a string for sql table creation let table_exists_query = format!(
let create_statement = format!( "SELECT count(*) FROM sqlite_master WHERE type='table' AND name='{}';",
"CREATE TABLE IF NOT EXISTS [{}] ({})", self.name(),
self.table_name,
columns
.into_iter()
.map(|(col_name, sql_type)| format!("{col_name} {sql_type}"))
.collect::<Vec<_>>()
.join(", ")
); );
// execute the statement let table_count: u64 = self
self.conn.execute(&create_statement, []).map_err(|err| { .conn
eprintln!("{:?}", err); .query_row(&table_exists_query, [], |row| row.get(0))
.map_err(|err| ShellError::GenericError {
ShellError::GenericError { error: format!("{:#?}", err),
error: "Failed to create table".into(), msg: format!("{:#?}", err),
msg: err.to_string(),
span: None, span: None,
help: None, help: None,
inner: Vec::new(), inner: Vec::new(),
})?;
if table_count == 0 {
if first_row_null {
eprintln!(
"Warning: The first row contains a null value, which has an \
unknown SQL type. Null values will be assumed to be TEXT columns. \
If this is undesirable, you can create the table first with your desired schema."
);
} }
})?;
// create a string for sql table creation
let create_statement = format!(
"CREATE TABLE [{}] ({})",
self.table_name,
columns
.into_iter()
.map(|(col_name, sql_type)| format!("{col_name} {sql_type}"))
.collect::<Vec<_>>()
.join(", ")
);
// execute the statement
self.conn
.execute(&create_statement, [])
.map_err(|err| ShellError::GenericError {
error: "Failed to create table".into(),
msg: err.to_string(),
span: None,
help: None,
inner: Vec::new(),
})?;
}
self.conn self.conn
.transaction() .transaction()
@ -209,7 +234,7 @@ fn insert_in_transaction(
let mut stream = stream.peekable(); let mut stream = stream.peekable();
let first_val = match stream.peek() { let first_val = match stream.peek() {
None => return Ok(Value::nothing(span)), None => return Ok(Value::nothing(span)),
Some(val) => val.as_record()?, Some(val) => val.as_record()?.clone(),
}; };
if first_val.is_empty() { if first_val.is_empty() {
@ -223,7 +248,7 @@ fn insert_in_transaction(
} }
let table_name = table.name().clone(); let table_name = table.name().clone();
let tx = table.try_init(first_val)?; let tx = table.try_init(&first_val)?;
for stream_value in stream { for stream_value in stream {
if let Some(ref ctrlc) = ctrl_c { if let Some(ref ctrlc) = ctrl_c {
@ -332,6 +357,11 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> {
Type::Duration => Ok("BIGINT"), Type::Duration => Ok("BIGINT"),
Type::Filesize => Ok("INTEGER"), Type::Filesize => Ok("INTEGER"),
// [NOTE] On null values, we just assume TEXT. This could end up
// creating a table where the column type is wrong in the table schema.
// This means the table could end up with the wrong schema.
Type::Nothing => Ok("TEXT"),
// intentionally enumerated so that any future types get handled // intentionally enumerated so that any future types get handled
Type::Any Type::Any
| Type::Block | Type::Block
@ -341,7 +371,6 @@ fn nu_value_to_sqlite_type(val: &Value) -> Result<&'static str, ShellError> {
| Type::Error | Type::Error
| Type::List(_) | Type::List(_)
| Type::ListStream | Type::ListStream
| Type::Nothing
| Type::Range | Type::Range
| Type::Record(_) | Type::Record(_)
| Type::Signature | Type::Signature

View File

@ -458,8 +458,7 @@ pub fn value_to_sql(value: Value) -> Result<Box<dyn rusqlite::ToSql>, ShellError
Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned()) Box::new(nu_utils::strip_ansi_unlikely(&val).into_owned())
} }
Value::Binary { val, .. } => Box::new(val), Value::Binary { val, .. } => Box::new(val),
Value::Nothing { .. } => Box::new(None::<String>), Value::Nothing { .. } => Box::new(rusqlite::types::Null),
val => { val => {
return Err(ShellError::OnlySupportsThisInputType { return Err(ShellError::OnlySupportsThisInputType {
exp_input_type: exp_input_type:

View File

@ -60,9 +60,9 @@ fn into_sqlite_values() {
insert_test_rows( insert_test_rows(
&dirs, &dirs,
r#"[ r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)], [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)], [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#, ]"#,
None, None,
vec![ vec![
@ -75,6 +75,7 @@ fn into_sqlite_values() {
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(), "foo".into(),
b"binary".to_vec(), b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
), ),
TestRow( TestRow(
false, false,
@ -85,6 +86,146 @@ fn into_sqlite_values() {
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(), "bar".into(),
b"wut".to_vec(), b"wut".to_vec(),
rusqlite::types::Value::Null,
),
],
);
});
}
/// When we create a new table, we use the first row to infer the schema of the
/// table. In the event that a column is null, we can't know what type the row
/// should be, so we just assume TEXT.
#[test]
fn into_sqlite_values_first_column_null() {
Playground::setup("values", |dirs, _| {
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
]"#,
None,
vec![
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Text("1".into()),
),
],
);
});
}
/// If the DB / table already exist, then the insert should end up with the
/// right data types no matter if the first row is null or not.
#[test]
fn into_sqlite_values_first_column_null_preexisting_db() {
Playground::setup("values", |dirs, _| {
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), 1],
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#,
None,
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
],
);
insert_test_rows(
&dirs,
r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), null],
[true, 3, 5.0, 3.1mb, 1wk, "2020-09-10T12:30:00-00:00", "baz", ("huh" | into binary), 3],
]"#,
None,
vec![
TestRow(
true,
1,
2.0,
1000,
1000000000,
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(),
b"binary".to_vec(),
rusqlite::types::Value::Integer(1),
),
TestRow(
false,
2,
3.0,
2000000,
2419200000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(),
b"wut".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
3,
5.0,
3100000,
604800000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"baz".into(),
b"huh".to_vec(),
rusqlite::types::Value::Null,
),
TestRow(
true,
3,
5.0,
3100000,
604800000000000,
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"baz".into(),
b"huh".to_vec(),
rusqlite::types::Value::Integer(3),
), ),
], ],
); );
@ -99,8 +240,8 @@ fn into_sqlite_existing_db_append() {
insert_test_rows( insert_test_rows(
&dirs, &dirs,
r#"[ r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary)], [true, 1, 2.0, 1kb, 1sec, "2023-09-10T11:30:00-00:00", "foo", ("binary" | into binary), null],
]"#, ]"#,
None, None,
vec![TestRow( vec![TestRow(
@ -112,6 +253,7 @@ fn into_sqlite_existing_db_append() {
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(), "foo".into(),
b"binary".to_vec(), b"binary".to_vec(),
rusqlite::types::Value::Null,
)], )],
); );
@ -119,8 +261,8 @@ fn into_sqlite_existing_db_append() {
insert_test_rows( insert_test_rows(
&dirs, &dirs,
r#"[ r#"[
[somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary]; [somebool, someint, somefloat, somefilesize, someduration, somedate, somestring, somebinary, somenull];
[false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary)], [false, 2, 3.0, 2mb, 4wk, "2020-09-10T12:30:00-00:00", "bar", ("wut" | into binary), null],
]"#, ]"#,
None, None,
// it should have both rows // it should have both rows
@ -134,6 +276,7 @@ fn into_sqlite_existing_db_append() {
DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(), DateTime::parse_from_rfc3339("2023-09-10T11:30:00-00:00").unwrap(),
"foo".into(), "foo".into(),
b"binary".to_vec(), b"binary".to_vec(),
rusqlite::types::Value::Null,
), ),
TestRow( TestRow(
false, false,
@ -144,6 +287,7 @@ fn into_sqlite_existing_db_append() {
DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(), DateTime::parse_from_rfc3339("2020-09-10T12:30:00-00:00").unwrap(),
"bar".into(), "bar".into(),
b"wut".to_vec(), b"wut".to_vec(),
rusqlite::types::Value::Null,
), ),
], ],
); );
@ -223,6 +367,7 @@ struct TestRow(
chrono::DateTime<chrono::FixedOffset>, chrono::DateTime<chrono::FixedOffset>,
std::string::String, std::string::String,
std::vec::Vec<u8>, std::vec::Vec<u8>,
rusqlite::types::Value,
); );
impl TestRow { impl TestRow {
@ -243,6 +388,7 @@ impl From<TestRow> for Value {
"somedate" => Value::date(row.5, Span::unknown()), "somedate" => Value::date(row.5, Span::unknown()),
"somestring" => Value::string(row.6, Span::unknown()), "somestring" => Value::string(row.6, Span::unknown()),
"somebinary" => Value::binary(row.7, Span::unknown()), "somebinary" => Value::binary(row.7, Span::unknown()),
"somenull" => Value::nothing(Span::unknown()),
}, },
Span::unknown(), Span::unknown(),
) )
@ -261,6 +407,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow {
let somedate: DateTime<FixedOffset> = row.get("somedate").unwrap(); let somedate: DateTime<FixedOffset> = row.get("somedate").unwrap();
let somestring: String = row.get("somestring").unwrap(); let somestring: String = row.get("somestring").unwrap();
let somebinary: Vec<u8> = row.get("somebinary").unwrap(); let somebinary: Vec<u8> = row.get("somebinary").unwrap();
let somenull: rusqlite::types::Value = row.get("somenull").unwrap();
Ok(TestRow( Ok(TestRow(
somebool, somebool,
@ -271,6 +418,7 @@ impl<'r> TryFrom<&rusqlite::Row<'r>> for TestRow {
somedate, somedate,
somestring, somestring,
somebinary, somebinary,
somenull,
)) ))
} }
} }
@ -300,6 +448,7 @@ impl Distribution<TestRow> for Standard {
dt, dt,
rand_string, rand_string,
rng.gen::<u64>().to_be_bytes().to_vec(), rng.gen::<u64>().to_be_bytes().to_vec(),
rusqlite::types::Value::Null,
) )
} }
} }