From c150e11cb4a51ca89b7e52fbbf5571b446122487 Mon Sep 17 00:00:00 2001 From: Reilly Wood <26268125+rgwood@users.noreply.github.com> Date: Wed, 13 Apr 2022 20:15:02 -0700 Subject: [PATCH] Initial SQLite functionality (#5182) * Add SQLite functionality to open * Add in-memory SQLite tests * clippy fixes * Fix up old SQLite-related tests --- Cargo.lock | 45 +++ crates/nu-command/Cargo.toml | 1 + .../nu-command/src/core_commands/version.rs | 5 - crates/nu-command/src/filesystem/open.rs | 257 ++++++++++++++++-- crates/nu-command/tests/commands/hash_/mod.rs | 4 +- crates/nu-command/tests/commands/open.rs | 63 ++--- crates/nu-command/tests/commands/where_.rs | 51 +--- .../tests/format_conversions/mod.rs | 1 - .../tests/format_conversions/sqlite.rs | 36 --- 9 files changed, 312 insertions(+), 151 deletions(-) delete mode 100644 crates/nu-command/tests/format_conversions/sqlite.rs diff --git a/Cargo.lock b/Cargo.lock index 192f433e39..0b835bc500 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1009,6 +1009,12 @@ dependencies = [ "libc", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + [[package]] name = "fallible-streaming-iterator" version = "0.1.9" @@ -1392,6 +1398,9 @@ name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] [[package]] name = "hashbrown" @@ -1403,6 +1412,15 @@ dependencies = [ "rayon", ] +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown 0.11.2", +] + [[package]] name = "heapless" version = "0.5.6" @@ -1815,6 +1833,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "libssh2-sys" version = "0.2.23" @@ -2342,6 +2371,7 @@ dependencies = [ "regex", "reqwest", "roxmltree", + "rusqlite", "rust-embed", "serde", "serde_ini", @@ -3607,6 +3637,21 @@ dependencies = [ "syn", ] +[[package]] +name = "rusqlite" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "memchr", + "smallvec", +] + [[package]] name = "rust-embed" version = "6.3.0" diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index c999af37d0..2d6bf8ed46 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -24,6 +24,7 @@ nu-term-grid = { path = "../nu-term-grid", version = "0.61.0" } nu-test-support = { path = "../nu-test-support", version = "0.61.0" } nu-utils = { path = "../nu-utils", version = "0.61.0" } nu-ansi-term = "0.45.1" +rusqlite = { version = "0.27.0", features = ["bundled"] } # Potential dependencies for extras base64 = "0.13.0" diff --git a/crates/nu-command/src/core_commands/version.rs b/crates/nu-command/src/core_commands/version.rs index 297d907285..7365fb7fb2 100644 --- a/crates/nu-command/src/core_commands/version.rs +++ b/crates/nu-command/src/core_commands/version.rs @@ -270,11 +270,6 @@ fn features_enabled() -> Vec { // names.push("bson".to_string()); // } - // #[cfg(feature = "sqlite")] - // { - // names.push("sqlite".to_string()); - // } - // #[cfg(feature = "s3")] // { // names.push("s3".to_string()); diff --git a/crates/nu-command/src/filesystem/open.rs b/crates/nu-command/src/filesystem/open.rs index fc9c4fd392..333e32e2f6 100644 --- a/crates/nu-command/src/filesystem/open.rs +++ b/crates/nu-command/src/filesystem/open.rs @@ -2,10 +2,12 @@ use nu_engine::{eval_block, get_full_help, CallExt}; use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::{ - Category, Example, IntoPipelineData, PipelineData, RawStream, ShellError, Signature, Spanned, - SyntaxShape, Value, + Category, Example, IntoPipelineData, PipelineData, RawStream, ShellError, Signature, Span, + Spanned, SyntaxShape, Value, }; -use std::io::{BufRead, BufReader, Read}; +use rusqlite::types::ValueRef; +use rusqlite::{Connection, Row}; +use std::io::{BufRead, BufReader, Read, Seek}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; @@ -91,33 +93,38 @@ impl Command for Open { ); #[cfg(not(unix))] let error_msg = String::from("Permission denied"); - Ok(PipelineData::Value( - Value::Error { - error: ShellError::SpannedLabeledError( - "Permission denied".into(), - error_msg, - arg_span, - ), - }, - None, + Err(ShellError::SpannedLabeledError( + "Permission denied".into(), + error_msg, + arg_span, )) } else { - let file = match std::fs::File::open(path) { + let mut file = match std::fs::File::open(path) { Ok(file) => file, Err(err) => { - return Ok(PipelineData::Value( - Value::Error { - error: ShellError::SpannedLabeledError( - "Permission denied".into(), - err.to_string(), - arg_span, - ), - }, - None, + return Err(ShellError::SpannedLabeledError( + "Permission denied".into(), + err.to_string(), + arg_span, )); } }; + // Peek at the file to see if we can detect a SQLite database + if !raw { + let sqlite_magic_bytes = "SQLite format 3\0".as_bytes(); + let mut buf: [u8; 16] = [0; 16]; + + if file.read_exact(&mut buf).is_ok() && buf == sqlite_magic_bytes { + return open_and_read_sqlite_db(path, call_span) + .map(|val| PipelineData::Value(val, None)); + } + + if file.rewind().is_err() { + return Err(ShellError::IOError("Failed to rewind file".into())); + }; + } + let buf_reader = BufReader::new(file); let output = PipelineData::ExternalStream { @@ -161,7 +168,7 @@ impl Command for Open { fn examples(&self) -> Vec { vec![ Example { - description: "Open a file, with structure (based on file extension)", + description: "Open a file, with structure (based on file extension or SQLite database header)", example: "open myfile.json", result: None, }, @@ -184,6 +191,104 @@ impl Command for Open { } } +fn open_and_read_sqlite_db(path: &Path, call_span: Span) -> Result { + let path = path.to_string_lossy().to_string(); + + match Connection::open(path) { + Ok(conn) => match read_sqlite_db(conn, call_span) { + Ok(data) => Ok(data), + Err(err) => Err(ShellError::SpannedLabeledError( + "Failed to read from SQLite database".into(), + err.to_string(), + call_span, + )), + }, + Err(err) => Err(ShellError::SpannedLabeledError( + "Failed to open SQLite database".into(), + err.to_string(), + call_span, + )), + } +} + +fn read_sqlite_db(conn: Connection, call_span: Span) -> Result { + let mut table_names: Vec = Vec::new(); + let mut tables: Vec = Vec::new(); + + let mut get_table_names = + conn.prepare("SELECT name from sqlite_master where type = 'table'")?; + let rows = get_table_names.query_map([], |row| row.get(0))?; + + for row in rows { + let table_name: String = row?; + table_names.push(table_name.clone()); + + let mut rows = Vec::new(); + let mut table_stmt = conn.prepare(&format!("select * from [{}]", table_name))?; + let mut table_rows = table_stmt.query([])?; + while let Some(table_row) = table_rows.next()? { + rows.push(convert_sqlite_row_to_nu_value(table_row, call_span)) + } + + let table_record = Value::List { + vals: rows, + span: call_span, + }; + + tables.push(table_record); + } + + Ok(Value::Record { + cols: table_names, + vals: tables, + span: call_span, + }) +} + +fn convert_sqlite_row_to_nu_value(row: &Row, span: Span) -> Value { + let mut vals = Vec::new(); + let colnamestr = row.as_ref().column_names().to_vec(); + let colnames = colnamestr.iter().map(|s| s.to_string()).collect(); + + for (i, c) in row.as_ref().column_names().iter().enumerate() { + let _column = c.to_string(); + let val = convert_sqlite_value_to_nu_value(row.get_ref_unwrap(i), span); + vals.push(val); + } + + Value::Record { + cols: colnames, + vals, + span, + } +} + +fn convert_sqlite_value_to_nu_value(value: ValueRef, span: Span) -> Value { + match value { + ValueRef::Null => Value::Nothing { span }, + ValueRef::Integer(i) => Value::Int { val: i, span }, + ValueRef::Real(f) => Value::Float { val: f, span }, + ValueRef::Text(buf) => { + let s = match std::str::from_utf8(buf) { + Ok(v) => v, + Err(_) => { + return Value::Error { + error: ShellError::NonUtf8(span), + } + } + }; + Value::String { + val: s.to_string(), + span, + } + } + ValueRef::Blob(u) => Value::Binary { + val: u.to_vec(), + span, + }, + } +} + fn permission_denied(dir: impl AsRef) -> bool { match dir.as_ref().read_dir() { Err(e) => matches!(e.kind(), std::io::ErrorKind::PermissionDenied), @@ -224,3 +329,109 @@ impl Iterator for BufferedReader { } } } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_read_empty_db() { + let db = Connection::open_in_memory().unwrap(); + let converted_db = read_sqlite_db(db, Span::test_data()).unwrap(); + + let expected = Value::Record { + cols: vec![], + vals: vec![], + span: Span::test_data(), + }; + + assert_eq!(converted_db, expected); + } + + #[test] + fn can_read_empty_table() { + let db = Connection::open_in_memory().unwrap(); + + db.execute( + "CREATE TABLE person ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + data BLOB + )", + [], + ) + .unwrap(); + let converted_db = read_sqlite_db(db, Span::test_data()).unwrap(); + + let expected = Value::Record { + cols: vec!["person".to_string()], + vals: vec![Value::List { + vals: vec![], + span: Span::test_data(), + }], + span: Span::test_data(), + }; + + assert_eq!(converted_db, expected); + } + + #[test] + fn can_read_null_and_non_null_data() { + let span = Span::test_data(); + let db = Connection::open_in_memory().unwrap(); + + db.execute( + "CREATE TABLE item ( + id INTEGER PRIMARY KEY, + name TEXT + )", + [], + ) + .unwrap(); + + db.execute("INSERT INTO item (id, name) VALUES (123, NULL)", []) + .unwrap(); + + db.execute("INSERT INTO item (id, name) VALUES (456, 'foo bar')", []) + .unwrap(); + + let converted_db = read_sqlite_db(db, span).unwrap(); + + let expected = Value::Record { + cols: vec!["item".to_string()], + vals: vec![Value::List { + vals: vec![ + Value::Record { + cols: vec!["id".to_string(), "name".to_string()], + vals: vec![ + Value::Int { + val: 123, + span: span, + }, + Value::Nothing { span: span }, + ], + span: span, + }, + Value::Record { + cols: vec!["id".to_string(), "name".to_string()], + vals: vec![ + Value::Int { + val: 456, + span: span, + }, + Value::String { + val: "foo bar".to_string(), + span: span, + }, + ], + span: span, + }, + ], + span: span, + }], + span, + }; + + assert_eq!(converted_db, expected); + } +} diff --git a/crates/nu-command/tests/commands/hash_/mod.rs b/crates/nu-command/tests/commands/hash_/mod.rs index 02ea0603cb..230b93ce73 100644 --- a/crates/nu-command/tests/commands/hash_/mod.rs +++ b/crates/nu-command/tests/commands/hash_/mod.rs @@ -93,7 +93,7 @@ fn md5_works_with_file() { let actual = nu!( cwd: "tests/fixtures/formats", pipeline( r#" - open sample.db | hash md5 + open sample.db --raw | hash md5 "# ) ); @@ -106,7 +106,7 @@ fn sha256_works_with_file() { let actual = nu!( cwd: "tests/fixtures/formats", pipeline( r#" - open sample.db | hash sha256 + open sample.db --raw | hash sha256 "# ) ); diff --git a/crates/nu-command/tests/commands/open.rs b/crates/nu-command/tests/commands/open.rs index 2240f55866..d796addf89 100644 --- a/crates/nu-command/tests/commands/open.rs +++ b/crates/nu-command/tests/commands/open.rs @@ -84,63 +84,38 @@ fn parses_more_bson_complexity() { // sample.db has the following format: // -// ━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━ -// # │ table_name │ table_values -// ───┼────────────┼────────────── -// 0 │ strings │ [6 items] -// 1 │ ints │ [5 items] -// 2 │ floats │ [4 items] -// ━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━ +// ╭─────────┬────────────────╮ +// │ strings │ [table 6 rows] │ +// │ ints │ [table 5 rows] │ +// │ floats │ [table 4 rows] │ +// ╰─────────┴────────────────╯ // // In this case, this represents a sqlite database // with three tables named `strings`, `ints`, and `floats`. -// The table_values represent the values for the tables: // -// ━━━━┯━━━━━━━┯━━━━━━━━━━┯━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -// # │ x │ y │ z │ f -// ────┼───────┼──────────┼──────┼────────────────────────────────────────────────────────────────────── -// 0 │ hello │ │ │ -// 1 │ hello │ │ │ -// 2 │ hello │ │ │ -// 3 │ hello │ │ │ -// 4 │ world │ │ │ -// 5 │ world │ │ │ -// 6 │ │ │ 1 │ -// 7 │ │ │ 42 │ -// 8 │ │ │ 425 │ -// 9 │ │ │ 4253 │ -// 10 │ │ │ │ -// 11 │ │ │ │ 3.400000000000000 -// 12 │ │ │ │ 3.141592650000000 -// 13 │ │ │ │ 23.00000000000000 -// 14 │ │ │ │ this string that doesn't really belong here but sqlite is what it is -// ━━━━┷━━━━━━━┷━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -// -// We can see here that each table has different columns. `strings` has `x` and `y`, while -// `ints` has just `z`, and `floats` has only the column `f`. This means, in general, when working +// Each table has different columns. `strings` has `x` and `y`, while +// `ints` has just `z`, and `floats` has only the column `f`. In general, when working // with sqlite, one will want to select a single table, e.g.: // -// open sample.db | select 1 | get table_values -// ━━━┯━━━━━━ -// # │ z -// ───┼────── -// 0 │ 1 -// 1 │ 42 -// 2 │ 425 -// 3 │ 4253 -// 4 │ -// ━━━┷━━━━━━ +// open sample.db | get ints +// ╭───┬──────╮ +// │ # │ z │ +// ├───┼──────┤ +// │ 0 │ 1 │ +// │ 1 │ 42 │ +// │ 2 │ 425 │ +// │ 3 │ 4253 │ +// │ 4 │ │ +// ╰───┴──────╯ -#[cfg(feature = "sqlite")] #[test] fn parses_sqlite() { let actual = nu!( cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | get table_values - | select 2 - | get x + | get strings + | get x.0 "# )); diff --git a/crates/nu-command/tests/commands/where_.rs b/crates/nu-command/tests/commands/where_.rs index c4eb69c639..4575dc7887 100644 --- a/crates/nu-command/tests/commands/where_.rs +++ b/crates/nu-command/tests/commands/where_.rs @@ -1,6 +1,4 @@ use nu_test_support::nu; - -#[cfg(feature = "sqlite")] use nu_test_support::pipeline; #[test] @@ -43,36 +41,16 @@ fn where_not_in_table() { assert_eq!(actual.out, "4"); } -#[cfg(feature = "sqlite")] -#[test] -fn explicit_block_condition() { - let actual = nu!( - cwd: "tests/fixtures/formats", pipeline( - r#" - open sample.db - | where table_name == ints - | get table_values - | first 4 - | where {= $it.z > 4200} - | get z - "# - )); - - assert_eq!(actual.out, "4253"); -} - -#[cfg(feature = "sqlite")] #[test] fn binary_operator_comparisons() { let actual = nu!( cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == ints - | get table_values + | get ints | first 4 | where z > 4200 - | get z + | get z.0 "# )); @@ -82,11 +60,10 @@ fn binary_operator_comparisons() { cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == ints - | get table_values + | get ints | first 4 | where z >= 4253 - | get z + | get z.0 "# )); @@ -96,11 +73,10 @@ fn binary_operator_comparisons() { cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == ints - | get table_values + | get ints | first 4 | where z < 10 - | get z + | get z.0 "# )); @@ -110,11 +86,10 @@ fn binary_operator_comparisons() { cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == ints - | get table_values + | get ints | first 4 | where z <= 1 - | get z + | get z.0 "# )); @@ -124,8 +99,7 @@ fn binary_operator_comparisons() { cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == ints - | get table_values + | get ints | where z != 1 | first 1 | get z @@ -135,15 +109,13 @@ fn binary_operator_comparisons() { assert_eq!(actual.out, "42"); } -#[cfg(feature = "sqlite")] #[test] fn contains_operator() { let actual = nu!( cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == strings - | get table_values + | get strings | where x =~ ell | length "# @@ -155,8 +127,7 @@ fn contains_operator() { cwd: "tests/fixtures/formats", pipeline( r#" open sample.db - | where table_name == strings - | get table_values + | get strings | where x !~ ell | length "# diff --git a/crates/nu-command/tests/format_conversions/mod.rs b/crates/nu-command/tests/format_conversions/mod.rs index cc89e0275f..c9f7a20b1b 100644 --- a/crates/nu-command/tests/format_conversions/mod.rs +++ b/crates/nu-command/tests/format_conversions/mod.rs @@ -7,7 +7,6 @@ mod json; mod markdown; mod nuon; mod ods; -mod sqlite; mod ssv; mod toml; mod tsv; diff --git a/crates/nu-command/tests/format_conversions/sqlite.rs b/crates/nu-command/tests/format_conversions/sqlite.rs deleted file mode 100644 index 338d876c61..0000000000 --- a/crates/nu-command/tests/format_conversions/sqlite.rs +++ /dev/null @@ -1,36 +0,0 @@ -#[cfg(feature = "sqlite")] -use nu_test_support::{nu, pipeline}; - -#[cfg(feature = "sqlite")] -#[test] -fn table_to_sqlite_and_back_into_table() { - let actual = nu!( - cwd: "tests/fixtures/formats", pipeline( - r#" - open sample.db - | to sqlite - | from sqlite - | get table_values - | select 2 - | get x - "# - )); - - assert_eq!(actual.out, "hello"); -} - -#[cfg(feature = "sqlite")] -#[test] -fn table_to_sqlite_and_back_into_table_select_table() { - let actual = nu!( - cwd: "tests/fixtures/formats", pipeline( - r#" - open sample.db - | to sqlite - | from sqlite -t [strings] - | get table_names - "# - )); - - assert_eq!(actual.out, "strings"); -}