Initial SQLite functionality (#5182)

* Add SQLite functionality to open

* Add in-memory SQLite tests

* clippy fixes

* Fix up old SQLite-related tests
This commit is contained in:
Reilly Wood 2022-04-13 20:15:02 -07:00 committed by GitHub
parent 87c684c7da
commit c150e11cb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 312 additions and 151 deletions

45
Cargo.lock generated
View File

@ -1009,6 +1009,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "fallible-iterator"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
[[package]] [[package]]
name = "fallible-streaming-iterator" name = "fallible-streaming-iterator"
version = "0.1.9" version = "0.1.9"
@ -1392,6 +1398,9 @@ name = "hashbrown"
version = "0.11.2" version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
@ -1403,6 +1412,15 @@ dependencies = [
"rayon", "rayon",
] ]
[[package]]
name = "hashlink"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
dependencies = [
"hashbrown 0.11.2",
]
[[package]] [[package]]
name = "heapless" name = "heapless"
version = "0.5.6" version = "0.5.6"
@ -1815,6 +1833,17 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "libsqlite3-sys"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]] [[package]]
name = "libssh2-sys" name = "libssh2-sys"
version = "0.2.23" version = "0.2.23"
@ -2342,6 +2371,7 @@ dependencies = [
"regex", "regex",
"reqwest", "reqwest",
"roxmltree", "roxmltree",
"rusqlite",
"rust-embed", "rust-embed",
"serde", "serde",
"serde_ini", "serde_ini",
@ -3607,6 +3637,21 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "rusqlite"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"memchr",
"smallvec",
]
[[package]] [[package]]
name = "rust-embed" name = "rust-embed"
version = "6.3.0" version = "6.3.0"

View File

@ -24,6 +24,7 @@ nu-term-grid = { path = "../nu-term-grid", version = "0.61.0" }
nu-test-support = { path = "../nu-test-support", version = "0.61.0" } nu-test-support = { path = "../nu-test-support", version = "0.61.0" }
nu-utils = { path = "../nu-utils", version = "0.61.0" } nu-utils = { path = "../nu-utils", version = "0.61.0" }
nu-ansi-term = "0.45.1" nu-ansi-term = "0.45.1"
rusqlite = { version = "0.27.0", features = ["bundled"] }
# Potential dependencies for extras # Potential dependencies for extras
base64 = "0.13.0" base64 = "0.13.0"

View File

@ -270,11 +270,6 @@ fn features_enabled() -> Vec<String> {
// names.push("bson".to_string()); // names.push("bson".to_string());
// } // }
// #[cfg(feature = "sqlite")]
// {
// names.push("sqlite".to_string());
// }
// #[cfg(feature = "s3")] // #[cfg(feature = "s3")]
// { // {
// names.push("s3".to_string()); // names.push("s3".to_string());

View File

@ -2,10 +2,12 @@ use nu_engine::{eval_block, get_full_help, CallExt};
use nu_protocol::ast::Call; use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{ use nu_protocol::{
Category, Example, IntoPipelineData, PipelineData, RawStream, ShellError, Signature, Spanned, Category, Example, IntoPipelineData, PipelineData, RawStream, ShellError, Signature, Span,
SyntaxShape, Value, Spanned, SyntaxShape, Value,
}; };
use std::io::{BufRead, BufReader, Read}; use rusqlite::types::ValueRef;
use rusqlite::{Connection, Row};
use std::io::{BufRead, BufReader, Read, Seek};
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::fs::PermissionsExt; use std::os::unix::fs::PermissionsExt;
@ -91,33 +93,38 @@ impl Command for Open {
); );
#[cfg(not(unix))] #[cfg(not(unix))]
let error_msg = String::from("Permission denied"); let error_msg = String::from("Permission denied");
Ok(PipelineData::Value( Err(ShellError::SpannedLabeledError(
Value::Error { "Permission denied".into(),
error: ShellError::SpannedLabeledError( error_msg,
"Permission denied".into(), arg_span,
error_msg,
arg_span,
),
},
None,
)) ))
} else { } else {
let file = match std::fs::File::open(path) { let mut file = match std::fs::File::open(path) {
Ok(file) => file, Ok(file) => file,
Err(err) => { Err(err) => {
return Ok(PipelineData::Value( return Err(ShellError::SpannedLabeledError(
Value::Error { "Permission denied".into(),
error: ShellError::SpannedLabeledError( err.to_string(),
"Permission denied".into(), arg_span,
err.to_string(),
arg_span,
),
},
None,
)); ));
} }
}; };
// Peek at the file to see if we can detect a SQLite database
if !raw {
let sqlite_magic_bytes = "SQLite format 3\0".as_bytes();
let mut buf: [u8; 16] = [0; 16];
if file.read_exact(&mut buf).is_ok() && buf == sqlite_magic_bytes {
return open_and_read_sqlite_db(path, call_span)
.map(|val| PipelineData::Value(val, None));
}
if file.rewind().is_err() {
return Err(ShellError::IOError("Failed to rewind file".into()));
};
}
let buf_reader = BufReader::new(file); let buf_reader = BufReader::new(file);
let output = PipelineData::ExternalStream { let output = PipelineData::ExternalStream {
@ -161,7 +168,7 @@ impl Command for Open {
fn examples(&self) -> Vec<nu_protocol::Example> { fn examples(&self) -> Vec<nu_protocol::Example> {
vec![ vec![
Example { Example {
description: "Open a file, with structure (based on file extension)", description: "Open a file, with structure (based on file extension or SQLite database header)",
example: "open myfile.json", example: "open myfile.json",
result: None, result: None,
}, },
@ -184,6 +191,104 @@ impl Command for Open {
} }
} }
fn open_and_read_sqlite_db(path: &Path, call_span: Span) -> Result<Value, nu_protocol::ShellError> {
let path = path.to_string_lossy().to_string();
match Connection::open(path) {
Ok(conn) => match read_sqlite_db(conn, call_span) {
Ok(data) => Ok(data),
Err(err) => Err(ShellError::SpannedLabeledError(
"Failed to read from SQLite database".into(),
err.to_string(),
call_span,
)),
},
Err(err) => Err(ShellError::SpannedLabeledError(
"Failed to open SQLite database".into(),
err.to_string(),
call_span,
)),
}
}
fn read_sqlite_db(conn: Connection, call_span: Span) -> Result<Value, rusqlite::Error> {
let mut table_names: Vec<String> = Vec::new();
let mut tables: Vec<Value> = Vec::new();
let mut get_table_names =
conn.prepare("SELECT name from sqlite_master where type = 'table'")?;
let rows = get_table_names.query_map([], |row| row.get(0))?;
for row in rows {
let table_name: String = row?;
table_names.push(table_name.clone());
let mut rows = Vec::new();
let mut table_stmt = conn.prepare(&format!("select * from [{}]", table_name))?;
let mut table_rows = table_stmt.query([])?;
while let Some(table_row) = table_rows.next()? {
rows.push(convert_sqlite_row_to_nu_value(table_row, call_span))
}
let table_record = Value::List {
vals: rows,
span: call_span,
};
tables.push(table_record);
}
Ok(Value::Record {
cols: table_names,
vals: tables,
span: call_span,
})
}
fn convert_sqlite_row_to_nu_value(row: &Row, span: Span) -> Value {
let mut vals = Vec::new();
let colnamestr = row.as_ref().column_names().to_vec();
let colnames = colnamestr.iter().map(|s| s.to_string()).collect();
for (i, c) in row.as_ref().column_names().iter().enumerate() {
let _column = c.to_string();
let val = convert_sqlite_value_to_nu_value(row.get_ref_unwrap(i), span);
vals.push(val);
}
Value::Record {
cols: colnames,
vals,
span,
}
}
fn convert_sqlite_value_to_nu_value(value: ValueRef, span: Span) -> Value {
match value {
ValueRef::Null => Value::Nothing { span },
ValueRef::Integer(i) => Value::Int { val: i, span },
ValueRef::Real(f) => Value::Float { val: f, span },
ValueRef::Text(buf) => {
let s = match std::str::from_utf8(buf) {
Ok(v) => v,
Err(_) => {
return Value::Error {
error: ShellError::NonUtf8(span),
}
}
};
Value::String {
val: s.to_string(),
span,
}
}
ValueRef::Blob(u) => Value::Binary {
val: u.to_vec(),
span,
},
}
}
fn permission_denied(dir: impl AsRef<Path>) -> bool { fn permission_denied(dir: impl AsRef<Path>) -> bool {
match dir.as_ref().read_dir() { match dir.as_ref().read_dir() {
Err(e) => matches!(e.kind(), std::io::ErrorKind::PermissionDenied), Err(e) => matches!(e.kind(), std::io::ErrorKind::PermissionDenied),
@ -224,3 +329,109 @@ impl<R: Read> Iterator for BufferedReader<R> {
} }
} }
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn can_read_empty_db() {
let db = Connection::open_in_memory().unwrap();
let converted_db = read_sqlite_db(db, Span::test_data()).unwrap();
let expected = Value::Record {
cols: vec![],
vals: vec![],
span: Span::test_data(),
};
assert_eq!(converted_db, expected);
}
#[test]
fn can_read_empty_table() {
let db = Connection::open_in_memory().unwrap();
db.execute(
"CREATE TABLE person (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
data BLOB
)",
[],
)
.unwrap();
let converted_db = read_sqlite_db(db, Span::test_data()).unwrap();
let expected = Value::Record {
cols: vec!["person".to_string()],
vals: vec![Value::List {
vals: vec![],
span: Span::test_data(),
}],
span: Span::test_data(),
};
assert_eq!(converted_db, expected);
}
#[test]
fn can_read_null_and_non_null_data() {
let span = Span::test_data();
let db = Connection::open_in_memory().unwrap();
db.execute(
"CREATE TABLE item (
id INTEGER PRIMARY KEY,
name TEXT
)",
[],
)
.unwrap();
db.execute("INSERT INTO item (id, name) VALUES (123, NULL)", [])
.unwrap();
db.execute("INSERT INTO item (id, name) VALUES (456, 'foo bar')", [])
.unwrap();
let converted_db = read_sqlite_db(db, span).unwrap();
let expected = Value::Record {
cols: vec!["item".to_string()],
vals: vec![Value::List {
vals: vec![
Value::Record {
cols: vec!["id".to_string(), "name".to_string()],
vals: vec![
Value::Int {
val: 123,
span: span,
},
Value::Nothing { span: span },
],
span: span,
},
Value::Record {
cols: vec!["id".to_string(), "name".to_string()],
vals: vec![
Value::Int {
val: 456,
span: span,
},
Value::String {
val: "foo bar".to_string(),
span: span,
},
],
span: span,
},
],
span: span,
}],
span,
};
assert_eq!(converted_db, expected);
}
}

View File

@ -93,7 +93,7 @@ fn md5_works_with_file() {
let actual = nu!( let actual = nu!(
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db | hash md5 open sample.db --raw | hash md5
"# "#
) )
); );
@ -106,7 +106,7 @@ fn sha256_works_with_file() {
let actual = nu!( let actual = nu!(
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db | hash sha256 open sample.db --raw | hash sha256
"# "#
) )
); );

View File

@ -84,63 +84,38 @@ fn parses_more_bson_complexity() {
// sample.db has the following format: // sample.db has the following format:
// //
// ━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━ // ╭─────────┬────────────────╮
// # │ table_name │ table_values // │ strings │ [table 6 rows] │
// ───┼────────────┼────────────── // │ ints │ [table 5 rows] │
// 0 │ strings │ [6 items] // │ floats │ [table 4 rows] │
// 1 │ ints │ [5 items] // ╰─────────┴────────────────╯
// 2 │ floats │ [4 items]
// ━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━
// //
// In this case, this represents a sqlite database // In this case, this represents a sqlite database
// with three tables named `strings`, `ints`, and `floats`. // with three tables named `strings`, `ints`, and `floats`.
// The table_values represent the values for the tables:
// //
// ━━━━┯━━━━━━━┯━━━━━━━━━━┯━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ // Each table has different columns. `strings` has `x` and `y`, while
// # │ x │ y │ z │ f // `ints` has just `z`, and `floats` has only the column `f`. In general, when working
// ────┼───────┼──────────┼──────┼──────────────────────────────────────────────────────────────────────
// 0 │ hello │ <binary> │ │
// 1 │ hello │ <binary> │ │
// 2 │ hello │ <binary> │ │
// 3 │ hello │ <binary> │ │
// 4 │ world │ <binary> │ │
// 5 │ world │ <binary> │ │
// 6 │ │ │ 1 │
// 7 │ │ │ 42 │
// 8 │ │ │ 425 │
// 9 │ │ │ 4253 │
// 10 │ │ │ │
// 11 │ │ │ │ 3.400000000000000
// 12 │ │ │ │ 3.141592650000000
// 13 │ │ │ │ 23.00000000000000
// 14 │ │ │ │ this string that doesn't really belong here but sqlite is what it is
// ━━━━┷━━━━━━━┷━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
//
// We can see here that each table has different columns. `strings` has `x` and `y`, while
// `ints` has just `z`, and `floats` has only the column `f`. This means, in general, when working
// with sqlite, one will want to select a single table, e.g.: // with sqlite, one will want to select a single table, e.g.:
// //
// open sample.db | select 1 | get table_values // open sample.db | get ints
// ━━━┯━━━━━━ // ╭───┬──────╮
// # │ z // │ # │ z │
// ───┼────── // ├───┼──────┤
// 0 │ 1 // │ 0 │ 1 │
// 1 │ 42 // │ 1 │ 42 │
// 2 │ 425 // │ 2 │ 425 │
// 3 │ 4253 // │ 3 │ 4253 │
// 4 │ // │ 4 │ │
// ━━━┷━━━━━━ // ╰───┴──────╯
#[cfg(feature = "sqlite")]
#[test] #[test]
fn parses_sqlite() { fn parses_sqlite() {
let actual = nu!( let actual = nu!(
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| get table_values | get strings
| select 2 | get x.0
| get x
"# "#
)); ));

View File

@ -1,6 +1,4 @@
use nu_test_support::nu; use nu_test_support::nu;
#[cfg(feature = "sqlite")]
use nu_test_support::pipeline; use nu_test_support::pipeline;
#[test] #[test]
@ -43,36 +41,16 @@ fn where_not_in_table() {
assert_eq!(actual.out, "4"); assert_eq!(actual.out, "4");
} }
#[cfg(feature = "sqlite")]
#[test]
fn explicit_block_condition() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open sample.db
| where table_name == ints
| get table_values
| first 4
| where {= $it.z > 4200}
| get z
"#
));
assert_eq!(actual.out, "4253");
}
#[cfg(feature = "sqlite")]
#[test] #[test]
fn binary_operator_comparisons() { fn binary_operator_comparisons() {
let actual = nu!( let actual = nu!(
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == ints | get ints
| get table_values
| first 4 | first 4
| where z > 4200 | where z > 4200
| get z | get z.0
"# "#
)); ));
@ -82,11 +60,10 @@ fn binary_operator_comparisons() {
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == ints | get ints
| get table_values
| first 4 | first 4
| where z >= 4253 | where z >= 4253
| get z | get z.0
"# "#
)); ));
@ -96,11 +73,10 @@ fn binary_operator_comparisons() {
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == ints | get ints
| get table_values
| first 4 | first 4
| where z < 10 | where z < 10
| get z | get z.0
"# "#
)); ));
@ -110,11 +86,10 @@ fn binary_operator_comparisons() {
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == ints | get ints
| get table_values
| first 4 | first 4
| where z <= 1 | where z <= 1
| get z | get z.0
"# "#
)); ));
@ -124,8 +99,7 @@ fn binary_operator_comparisons() {
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == ints | get ints
| get table_values
| where z != 1 | where z != 1
| first 1 | first 1
| get z | get z
@ -135,15 +109,13 @@ fn binary_operator_comparisons() {
assert_eq!(actual.out, "42"); assert_eq!(actual.out, "42");
} }
#[cfg(feature = "sqlite")]
#[test] #[test]
fn contains_operator() { fn contains_operator() {
let actual = nu!( let actual = nu!(
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == strings | get strings
| get table_values
| where x =~ ell | where x =~ ell
| length | length
"# "#
@ -155,8 +127,7 @@ fn contains_operator() {
cwd: "tests/fixtures/formats", pipeline( cwd: "tests/fixtures/formats", pipeline(
r#" r#"
open sample.db open sample.db
| where table_name == strings | get strings
| get table_values
| where x !~ ell | where x !~ ell
| length | length
"# "#

View File

@ -7,7 +7,6 @@ mod json;
mod markdown; mod markdown;
mod nuon; mod nuon;
mod ods; mod ods;
mod sqlite;
mod ssv; mod ssv;
mod toml; mod toml;
mod tsv; mod tsv;

View File

@ -1,36 +0,0 @@
#[cfg(feature = "sqlite")]
use nu_test_support::{nu, pipeline};
#[cfg(feature = "sqlite")]
#[test]
fn table_to_sqlite_and_back_into_table() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open sample.db
| to sqlite
| from sqlite
| get table_values
| select 2
| get x
"#
));
assert_eq!(actual.out, "hello");
}
#[cfg(feature = "sqlite")]
#[test]
fn table_to_sqlite_and_back_into_table_select_table() {
let actual = nu!(
cwd: "tests/fixtures/formats", pipeline(
r#"
open sample.db
| to sqlite
| from sqlite -t [strings]
| get table_names
"#
));
assert_eq!(actual.out, "strings");
}