Infer types from regular delimited plain text unstructured files. (#1494)

* Infer types from regular delimited plain text unstructured files. * Nothing resolves to an empty string.
2025-01-12 01:09:05 +01:00 · 2020-03-16 15:50:45 -05:00 · 2020-03-16 15:50:45 -05:00 · b36d21e76f
commit b36d21e76f
parent d8c4565413
20 changed files with 751 additions and 315 deletions
--- a/.cargo/config
+++ b/.cargo/config
@ -1,3 +1,3 @@
 [build]

-#rustflags = ["--cfg", "coloring_in_tokens"]
+#rustflags = ["--cfg", "data_processing_primitives"]
--- a/crates/nu-cli/src/commands/from_delimited_data.rs
+++ b/crates/nu-cli/src/commands/from_delimited_data.rs
@ -1,42 +1,12 @@
 use crate::prelude::*;
-use csv::{ErrorKind, ReaderBuilder};
 use nu_errors::ShellError;
-use nu_protocol::{Primitive, ReturnSuccess, TaggedDictBuilder, UntaggedValue, Value};
+use nu_parser::hir::syntax_shape::{ExpandContext, SignatureRegistry};
+use nu_parser::utils::{parse_line_with_separator as parse, LineSeparatedShape};
+use nu_parser::TokensIterator;
+use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
+use nu_source::nom_input;

-fn from_delimited_string_to_value(
-    s: String,
-    headerless: bool,
-    separator: char,
-    tag: impl Into<Tag>,
-) -> Result<Value, csv::Error> {
-    let mut reader = ReaderBuilder::new()
-        .has_headers(!headerless)
-        .delimiter(separator as u8)
-        .from_reader(s.as_bytes());
-    let tag = tag.into();
-
-    let headers = if headerless {
-        (1..=reader.headers()?.len())
-            .map(|i| format!("Column{}", i))
-            .collect::<Vec<String>>()
-    } else {
-        reader.headers()?.iter().map(String::from).collect()
-    };
-
-    let mut rows = vec![];
-    for row in reader.records() {
-        let mut tagged_row = TaggedDictBuilder::new(&tag);
-        for (value, header) in row?.iter().zip(headers.iter()) {
-            tagged_row.insert_value(
-                header,
-                UntaggedValue::Primitive(Primitive::String(String::from(value))).into_value(&tag),
-            )
-        }
-        rows.push(tagged_row.into_value());
-    }
-
-    Ok(UntaggedValue::Table(rows).into_value(&tag))
-}
+use derive_new::new;

 pub fn from_delimited_data(
    headerless: bool,
@ -50,19 +20,20 @@ pub fn from_delimited_data(
        let concat_string = input.collect_string(name_tag.clone()).await?;

        match from_delimited_string_to_value(concat_string.item, headerless, sep, name_tag.clone()) {
-            Ok(x) => match x {
-                Value { value: UntaggedValue::Table(list), .. } => {
-                    for l in list {
-                        yield ReturnSuccess::value(l);
+            Ok(rows) => {
+                for row in rows {
+                    match row {
+                        Value { value: UntaggedValue::Table(list), .. } => {
+                            for l in list {
+                                yield ReturnSuccess::value(l);
+                            }
+                        }
+                        x => yield ReturnSuccess::value(x),
                    }
                }
-                x => yield ReturnSuccess::value(x),
            },
            Err(err) => {
-                let line_one = match pretty_csv_error(err) {
-                    Some(pretty) => format!("Could not parse as {} ({})", format_name,pretty),
-                    None => format!("Could not parse as {}", format_name),
-                };
+                let line_one = format!("Could not parse as {}", format_name);
                let line_two = format!("input cannot be parsed as {}", format_name);
                yield Err(ShellError::labeled_error_with_secondary(
                    line_one,
@ -78,25 +49,121 @@ pub fn from_delimited_data(
    Ok(stream.to_output_stream())
 }

-fn pretty_csv_error(err: csv::Error) -> Option<String> {
-    match err.kind() {
-        ErrorKind::UnequalLengths {
-            pos,
-            expected_len,
-            len,
-        } => {
-            if let Some(pos) = pos {
-                Some(format!(
-                    "Line {}: expected {} fields, found {}",
-                    pos.line(),
-                    expected_len,
-                    len
-                ))
-            } else {
-                Some(format!("Expected {} fields, found {}", expected_len, len))
-            }
-        }
-        ErrorKind::Seek => Some("Internal error while parsing csv".to_string()),
-        _ => None,
+#[derive(Debug, Clone, new)]
+pub struct EmptyRegistry {
+    #[new(default)]
+    signatures: indexmap::IndexMap<String, Signature>,
+}
+
+impl EmptyRegistry {}
+
+impl SignatureRegistry for EmptyRegistry {
+    fn has(&self, _name: &str) -> bool {
+        false
+    }
+    fn get(&self, _name: &str) -> Option<Signature> {
+        None
+    }
+    fn clone_box(&self) -> Box<dyn SignatureRegistry> {
+        Box::new(self.clone())
    }
 }
+
+fn from_delimited_string_to_value(
+    s: String,
+    headerless: bool,
+    sep: char,
+    tag: impl Into<Tag>,
+) -> Result<Vec<Value>, ShellError> {
+    let tag = tag.into();
+
+    let mut entries = s.lines();
+
+    let mut fields = vec![];
+    let mut out = vec![];
+
+    if let Some(first_entry) = entries.next() {
+        let tokens = match parse(&sep.to_string(), nom_input(first_entry)) {
+            Ok((_, tokens)) => tokens,
+            Err(err) => return Err(ShellError::parse_error(err)),
+        };
+
+        let tokens_span = tokens.span;
+        let source: nu_source::Text = tokens_span.slice(&first_entry).into();
+
+        if !headerless {
+            fields = tokens
+                .item
+                .iter()
+                .filter(|token| !token.is_separator())
+                .map(|field| field.source(&source).to_string())
+                .collect::<Vec<_>>();
+        }
+
+        let registry = Box::new(EmptyRegistry::new());
+        let ctx = ExpandContext::new(registry, &source, None);
+
+        let mut iterator = TokensIterator::new(&tokens.item, ctx, tokens_span);
+        let (results, tokens_identified) = iterator.expand(LineSeparatedShape);
+        let results = results?;
+
+        let mut row = TaggedDictBuilder::new(&tag);
+
+        if headerless {
+            let fallback_columns = (1..=tokens_identified)
+                .map(|i| format!("Column{}", i))
+                .collect::<Vec<String>>();
+
+            for (idx, field) in results.into_iter().enumerate() {
+                let key = if headerless {
+                    &fallback_columns[idx]
+                } else {
+                    &fields[idx]
+                };
+
+                row.insert_value(key, field.into_value(&tag));
+            }
+
+            out.push(row.into_value())
+        }
+    }
+
+    for entry in entries {
+        let tokens = match parse(&sep.to_string(), nom_input(entry)) {
+            Ok((_, tokens)) => tokens,
+            Err(err) => return Err(ShellError::parse_error(err)),
+        };
+        let tokens_span = tokens.span;
+
+        let source: nu_source::Text = tokens_span.slice(&entry).into();
+        let registry = Box::new(EmptyRegistry::new());
+        let ctx = ExpandContext::new(registry, &source, None);
+
+        let mut iterator = TokensIterator::new(&tokens.item, ctx, tokens_span);
+        let (results, tokens_identified) = iterator.expand(LineSeparatedShape);
+        let results = results?;
+
+        let mut row = TaggedDictBuilder::new(&tag);
+
+        let fallback_columns = (1..=tokens_identified)
+            .map(|i| format!("Column{}", i))
+            .collect::<Vec<String>>();
+
+        for (idx, field) in results.into_iter().enumerate() {
+            let key = if headerless {
+                &fallback_columns[idx]
+            } else {
+                match fields.get(idx) {
+                    Some(key) => key,
+                    None => &fallback_columns[idx],
+                }
+            };
+
+            row.insert_value(key, field.into_value(&tag));
+        }
+
+        out.push(row.into_value())
+    }
+
+    Ok(out)
+}
--- a/crates/nu-cli/src/commands/to_delimited_data.rs
+++ b/crates/nu-cli/src/commands/to_delimited_data.rs
@ -140,6 +140,7 @@ fn to_string_tagged_value(v: &Value) -> Result<String, ShellError> {
        | UntaggedValue::Primitive(Primitive::Path(_))
        | UntaggedValue::Primitive(Primitive::Int(_)) => as_string(v),
        UntaggedValue::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
+        UntaggedValue::Primitive(Primitive::Nothing) => Ok(String::new()),
        UntaggedValue::Table(_) => Ok(String::from("[Table]")),
        UntaggedValue::Row(_) => Ok(String::from("[Row]")),
        _ => Err(ShellError::labeled_error(
--- a/crates/nu-cli/tests/format_conversions/csv.rs
+++ b/crates/nu-cli/tests/format_conversions/csv.rs
@ -73,8 +73,36 @@ fn table_to_csv_text_skipping_headers_after_conversion() {
 }

 #[test]
-fn from_csv_text_to_table() {
+fn infers_types() {
    Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_cuatro_mosqueteros.csv",
+            r#"
+                first_name,last_name,rusty_luck
+                Andrés,Robalino,1,d
+                Jonathan,Turner,1,d
+                Yehuda,Katz,1,d
+                Jason,Gedge,1,d
+            "#,
+        )]);
+
+        let actual = nu!(
+            cwd: dirs.test(), pipeline(
+            r#"
+                open los_cuatro_mosqueteros.csv
+                | where rusty_luck > 0
+                | count
+                | echo $it
+            "#
+        ));
+
+        assert_eq!(actual, "4");
+    })
+}
+
+#[test]
+fn from_csv_text_to_table() {
+    Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
        sandbox.with_files(vec![FileWithContentToBeTrimmed(
            "los_tres_caballeros.txt",
            r#"
@ -102,7 +130,7 @@ fn from_csv_text_to_table() {

 #[test]
 fn from_csv_text_with_separator_to_table() {
-    Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
+    Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
        sandbox.with_files(vec![FileWithContentToBeTrimmed(
            "los_tres_caballeros.txt",
            r#"
@ -130,7 +158,7 @@ fn from_csv_text_with_separator_to_table() {

 #[test]
 fn from_csv_text_with_tab_separator_to_table() {
-    Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
+    Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
        sandbox.with_files(vec![FileWithContentToBeTrimmed(
            "los_tres_caballeros.txt",
            r#"
@ -158,7 +186,7 @@ fn from_csv_text_with_tab_separator_to_table() {

 #[test]
 fn from_csv_text_skipping_headers_to_table() {
-    Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
+    Playground::setup("filter_from_csv_test_5", |dirs, sandbox| {
        sandbox.with_files(vec![FileWithContentToBeTrimmed(
            "los_tres_amigos.txt",
            r#"
--- a/crates/nu-parser/src/hir/signature.rs
+++ b/crates/nu-parser/src/hir/signature.rs
@ -1,6 +1,6 @@
 use crate::hir;
 use crate::hir::syntax_shape::{
-    expand_atom, expand_syntax, BareShape, ExpandContext, ExpandSyntax, ExpansionRule,
+    ExpandSyntax, expand_atom, expand_syntax, BareShape, ExpandContext, ExpandSyntax, ExpansionRule,
    UnspannedAtomicToken, WhitespaceShape,
 };
 use crate::hir::tokens_iterator::TokensIterator;
--- a/crates/nu-parser/src/hir/syntax_shape/expression/variable_path.rs
+++ b/crates/nu-parser/src/hir/syntax_shape/expression/variable_path.rs
@ -477,18 +477,6 @@ impl ExpandSyntax for MemberShape {
            return Ok(Member::Bare(bare.span()));
        }

-        /* KATZ */
-        /* let number = NumberShape.test(token_nodes, context);
-
-        if let Some(peeked) = number {
-            let node = peeked.not_eof("column")?.commit();
-            let (n, span) = node.as_number().ok_or_else(|| {
-                ParseError::internal_error("can't convert node to number".spanned(node.span()))
-            })?;
-
-            return Ok(Member::Number(n, span))
-        }*/
-
        let string = token_nodes.expand_syntax(StringShape);

        if let Ok(syntax) = string {
--- a/crates/nu-parser/src/hir/tokens_iterator.rs
+++ b/crates/nu-parser/src/hir/tokens_iterator.rs
@ -3,9 +3,6 @@ pub(crate) mod into_shapes;
 pub(crate) mod pattern;
 pub(crate) mod state;

-#[cfg(test)]
-mod tests;
-
 use self::debug::ExpandTracer;
 use self::into_shapes::IntoShapes;
 use self::state::{Peeked, TokensIteratorState};
@ -510,7 +507,7 @@ impl<'content> TokensIterator<'content> {
    /// The purpose of `expand_infallible` is to clearly mark the infallible path through
    /// and entire list of tokens that produces a fully colored version of the source.
    ///
-    /// If the `ExpandSyntax` can poroduce a `Result`, make sure to use `expand_syntax`,
+    /// If the `ExpandSyntax` can produce a `Result`, make sure to use `expand_syntax`,
    /// which will correctly show the error in the trace.
    pub fn expand_infallible<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> U
    where
@ -536,7 +533,7 @@ impl<'content> TokensIterator<'content> {
        })
    }

-    fn expand<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> (U, usize)
+    pub fn expand<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> (U, usize)
    where
        U: std::fmt::Debug + Clone + 'static,
    {
--- a/crates/nu-parser/src/hir/tokens_iterator/tests.rs
+++ b/crates/nu-parser/src/hir/tokens_iterator/tests.rs
@ -1,46 +0,0 @@
-use crate::hir::{syntax_shape::ExpandContext, syntax_shape::SignatureRegistry, TokensIterator};
-use crate::parse::token_tree_builder::TokenTreeBuilder as b;
-use nu_protocol::Signature;
-use nu_source::{Span, Text};
-
-use derive_new::new;
-
-#[derive(Debug, Clone, new)]
-struct TestRegistry {
-    #[new(default)]
-    signatures: indexmap::IndexMap<String, Signature>,
-}
-
-impl TestRegistry {}
-
-impl SignatureRegistry for TestRegistry {
-    fn has(&self, name: &str) -> bool {
-        self.signatures.contains_key(name)
-    }
-    fn get(&self, name: &str) -> Option<Signature> {
-        self.signatures.get(name).cloned()
-    }
-    fn clone_box(&self) -> Box<dyn SignatureRegistry> {
-        Box::new(self.clone())
-    }
-}
-
-#[test]
-fn supplies_tokens() {
-    let token = b::it_var();
-
-    let (tokens, source) = b::build(token);
-
-    let tokens = vec![tokens];
-    let source = Text::from(&source);
-
-    let mut iterator = TokensIterator::new(
-        &tokens,
-        ExpandContext::new(Box::new(TestRegistry::new()), &source, None),
-        Span::unknown(),
-    );
-
-    let token = iterator.next().expect("Token expected.");
-
-    token.expect_var();
-}
--- a/crates/nu-parser/src/lib.rs
+++ b/crates/nu-parser/src/lib.rs
@ -6,6 +6,9 @@ pub mod hir;
 pub mod parse;
 pub mod parse_command;

+#[cfg(test)]
+pub mod test_support;
+
 pub use crate::commands::classified::{
    external::ExternalCommand, internal::InternalCommand, ClassifiedCommand, ClassifiedPipeline,
 };
@ -20,6 +23,11 @@ pub use crate::parse::parser::{module, pipeline};
 pub use crate::parse::token_tree::{Delimiter, SpannedToken, Token};
 pub use crate::parse::token_tree_builder::TokenTreeBuilder;

+pub mod utils {
+    pub use crate::parse::util::parse_line_with_separator;
+    pub use crate::parse::util::LineSeparatedShape;
+}
+
 use log::log_enabled;
 use nu_errors::ShellError;
 use nu_protocol::{errln, outln};
--- a/crates/nu-parser/src/macros.rs
+++ b/crates/nu-parser/src/macros.rs
@ -7,3 +7,49 @@ macro_rules! return_ok {
        }
    };
 }
+
+#[cfg(test)]
+macro_rules! equal_tokens {
+    ($source:tt -> $tokens:expr) => {
+        let result = apply(pipeline, "pipeline", $source);
+        let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
+
+        if result != expected_tree {
+            let debug_result = format!("{}", result.debug($source));
+            let debug_expected = format!("{}", expected_tree.debug(&expected_source));
+
+            if debug_result == debug_expected {
+                assert_eq!(
+                    result, expected_tree,
+                    "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
+                    $source,
+                    debug_expected
+                )
+            } else {
+                assert_eq!(debug_result, debug_expected)
+            }
+        }
+    };
+
+    (<$parser:tt> $source:tt -> $tokens:expr) => {
+        let result = apply($parser, stringify!($parser), $source);
+
+        let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
+
+        if result != expected_tree {
+            let debug_result = format!("{}", result.debug($source));
+            let debug_expected = format!("{}", expected_tree.debug(&expected_source));
+
+            if debug_result == debug_expected {
+                assert_eq!(
+                    result, expected_tree,
+                    "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
+                    $source,
+                    debug_expected
+                )
+            } else {
+                assert_eq!(debug_result, debug_expected)
+            }
+        }
+    };
+}
--- a/crates/nu-parser/src/parse/parser.rs
+++ b/crates/nu-parser/src/parse/parser.rs
@ -1,5 +1,4 @@
 #![allow(unused)]
-
 use crate::parse::{
    call_node::*, flag::*, number::*, operator::*, pipeline::*, token_tree::*,
    token_tree_builder::*, unit::*,
@ -318,6 +317,7 @@ pub fn dq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
    let (input, _) = char('"')(input)?;
    let start1 = input.offset;
    let (input, _) = many0(none_of("\""))(input)?;
+
    let end1 = input.offset;
    let (input, _) = char('"')(input)?;
    let end = input.offset;
@ -939,7 +939,7 @@ pub fn tight_node(input: NomSpan) -> IResult<NomSpan, Vec<SpannedToken>> {
    ))(input)
 }

-fn to_list(
+pub fn to_list(
    parser: impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken>,
 ) -> impl Fn(NomSpan) -> IResult<NomSpan, Vec<SpannedToken>> {
    move |input| {
@ -1017,7 +1017,7 @@ fn parse_int<T>(frag: &str, neg: Option<T>) -> i64 {
    }
 }

-fn is_boundary(c: Option<char>) -> bool {
+pub fn is_boundary(c: Option<char>) -> bool {
    match c {
        None => true,
        Some(')') | Some(']') | Some('}') | Some('(') => true,
@ -1140,59 +1140,13 @@ fn is_member_start(c: char) -> bool {

 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::parse::token_tree_builder::TokenTreeBuilder as b;
-    use crate::parse::token_tree_builder::{CurriedToken, TokenTreeBuilder};
+    use crate::parse::parser::{module, nodes, pipeline};
+    use crate::parse::token_tree_builder::TokenTreeBuilder::{self, self as b};
+    use crate::test_support::apply;
+    use nu_source::PrettyDebugWithSource;
+
    use pretty_assertions::assert_eq;

-    pub type CurriedNode<T> = Box<dyn FnOnce(&mut TokenTreeBuilder) -> T + 'static>;
-
-    macro_rules! equal_tokens {
-        ($source:tt -> $tokens:expr) => {
-            let result = apply(pipeline, "pipeline", $source);
-            let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
-
-            if result != expected_tree {
-                let debug_result = format!("{}", result.debug($source));
-                let debug_expected = format!("{}", expected_tree.debug(&expected_source));
-
-                if debug_result == debug_expected {
-                    assert_eq!(
-                        result, expected_tree,
-                        "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
-                        $source,
-                        debug_expected
-                    )
-                } else {
-                    assert_eq!(debug_result, debug_expected)
-                }
-            }
-        };
-
-        (<$parser:tt> $source:tt -> $tokens:expr) => {
-            let result = apply($parser, stringify!($parser), $source);
-
-            let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
-
-            if result != expected_tree {
-                let debug_result = format!("{}", result.debug($source));
-                let debug_expected = format!("{}", expected_tree.debug(&expected_source));
-
-                if debug_result == debug_expected {
-                    assert_eq!(
-                        result, expected_tree,
-                        "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
-                        $source,
-                        debug_expected
-                    )
-                } else {
-                    assert_eq!(debug_result, debug_expected)
-                }
-            }
-        };
-
-    }
-
    #[test]
    fn test_integer() {
        equal_tokens! {
@ -1339,7 +1293,7 @@ mod tests {
    fn test_flag() {
        equal_tokens! {
            <nodes>
-            "--amigos" -> b::token_list(vec![b::flag("arepas")])
+            "--amigos" -> b::token_list(vec![b::flag("amigos")])
        }

        equal_tokens! {
@ -1721,119 +1675,4 @@ mod tests {
            ])
        );
    }
-
-    // #[test]
-    // fn test_smoke_pipeline() {
-    //     let _ = pretty_env_logger::try_init();
-
-    //     assert_eq!(
-    //         apply(
-    //             pipeline,
-    //             "pipeline",
-    //             r#"git branch --merged | split-row "`n" | where $it != "* master""#
-    //         ),
-    //         build_token(b::pipeline(vec![
-    //             (
-    //                 None,
-    //                 b::call(
-    //                     b::bare("git"),
-    //                     vec![b::sp(), b::bare("branch"), b::sp(), b::flag("merged")]
-    //                 ),
-    //                 Some(" ")
-    //             ),
-    //             (
-    //                 Some(" "),
-    //                 b::call(b::bare("split-row"), vec![b::sp(), b::string("`n")]),
-    //                 Some(" ")
-    //             ),
-    //             (
-    //                 Some(" "),
-    //                 b::call(
-    //                     b::bare("where"),
-    //                     vec![
-    //                         b::sp(),
-    //                         b::it_var(),
-    //                         b::sp(),
-    //                         b::op("!="),
-    //                         b::sp(),
-    //                         b::string("* master")
-    //                     ]
-    //                 ),
-    //                 None
-    //             )
-    //         ]))
-    //     );
-
-    //     assert_eq!(
-    //         apply(pipeline, "pipeline", "ls | where { $it.size > 100 }"),
-    //         build_token(b::pipeline(vec![
-    //             (None, b::call(b::bare("ls"), vec![]), Some(" ")),
-    //             (
-    //                 Some(" "),
-    //                 b::call(
-    //                     b::bare("where"),
-    //                     vec![
-    //                         b::sp(),
-    //                         b::braced(vec![
-    //                             b::path(b::it_var(), vec![b::member("size")]),
-    //                             b::sp(),
-    //                             b::op(">"),
-    //                             b::sp(),
-    //                             b::int(100)
-    //                         ])
-    //                     ]
-    //                 ),
-    //                 None
-    //             )
-    //         ]))
-    //     )
-    // }
-
-    fn apply(
-        f: impl Fn(
-            NomSpan,
-        )
-            -> Result<(NomSpan, SpannedToken), nom::Err<(NomSpan, nom::error::ErrorKind)>>,
-        desc: &str,
-        string: &str,
-    ) -> SpannedToken {
-        let result = f(nom_input(string));
-
-        match result {
-            Ok(value) => value.1,
-            Err(err) => {
-                let err = nu_errors::ShellError::parse_error(err);
-
-                println!("{:?}", string);
-                crate::hir::baseline_parse::tests::print_err(err, &nu_source::Text::from(string));
-                panic!("test failed")
-            }
-        }
-    }
-
-    fn span((left, right): (usize, usize)) -> Span {
-        Span::new(left, right)
-    }
-
-    fn delimited(
-        delimiter: Spanned<Delimiter>,
-        children: Vec<SpannedToken>,
-        left: usize,
-        right: usize,
-    ) -> SpannedToken {
-        let start = Span::for_char(left);
-        let end = Span::for_char(right);
-
-        let node = DelimitedNode::new(delimiter.item, (start, end), children);
-        Token::Delimited(node).into_spanned((left, right))
-    }
-
-    fn build<T>(block: CurriedNode<T>) -> T {
-        let mut builder = TokenTreeBuilder::new();
-        block(&mut builder)
-    }
-
-    fn build_token(block: CurriedToken) -> SpannedToken {
-        TokenTreeBuilder::build(block).0
-    }
 }
--- a/crates/nu-parser/src/parse/token_tree.rs
+++ b/crates/nu-parser/src/parse/token_tree.rs
@ -306,6 +306,13 @@ impl SpannedToken {
        }
    }

+    pub fn is_int(&self) -> bool {
+        match self.unspanned() {
+            Token::Number(RawNumber::Int(_)) => true,
+            _ => false,
+        }
+    }
+
    pub fn as_string(&self) -> Option<(Span, Span)> {
        match self.unspanned() {
            Token::String(inner_span) => Some((self.span(), *inner_span)),
@ -327,16 +334,16 @@ impl SpannedToken {
        }
    }

-    pub fn is_int(&self) -> bool {
+    pub fn is_dot(&self) -> bool {
        match self.unspanned() {
-            Token::Number(RawNumber::Int(_)) => true,
+            Token::EvaluationOperator(EvaluationOperator::Dot) => true,
            _ => false,
        }
    }

-    pub fn is_dot(&self) -> bool {
+    pub fn is_separator(&self) -> bool {
        match self.unspanned() {
-            Token::EvaluationOperator(EvaluationOperator::Dot) => true,
+            Token::Separator => true,
            _ => false,
        }
    }
@ -479,6 +486,13 @@ impl SpannedToken {
        }
    }

+    pub fn expect_number(&self) -> RawNumber {
+        match self.unspanned() {
+            Token::Number(raw_number) => *raw_number,
+            other => panic!("Expected number, found {:?}", other),
+        }
+    }
+
    pub fn expect_string(&self) -> (Span, Span) {
        match self.unspanned() {
            Token::String(inner_span) => (self.span(), *inner_span),
--- a/crates/nu-parser/src/parse/util.rs
+++ b/crates/nu-parser/src/parse/util.rs
@ -1 +0,0 @@
-
--- a/crates/nu-parser/src/parse/util/line_delimited_parser.rs
+++ b/crates/nu-parser/src/parse/util/line_delimited_parser.rs
@ -0,0 +1,2 @@
+pub(crate) mod parser;
+pub(crate) mod shape;
--- a/crates/nu-parser/src/parse/util/line_delimited_parser/parser.rs
+++ b/crates/nu-parser/src/parse/util/line_delimited_parser/parser.rs
@ -0,0 +1,272 @@
+use crate::parse::number::RawNumber;
+use crate::parse::parser::{is_boundary, to_list};
+use crate::parse::token_tree::SpannedToken;
+use crate::parse::token_tree_builder::TokenTreeBuilder;
+use nu_source::{HasSpan, NomSpan, Span, Spanned, SpannedItem};
+
+use nom::branch::alt;
+use nom::bytes::complete::{escaped, tag};
+use nom::character::complete::*;
+use nom::combinator::*;
+use nom::multi::*;
+use nom::IResult;
+use nom_tracable::tracable_parser;
+
+#[tracable_parser]
+pub fn parse_line_with_separator<'a, 'b>(
+    separator: &'b str,
+    input: NomSpan<'a>,
+) -> IResult<NomSpan<'a>, Spanned<Vec<SpannedToken>>> {
+    let start = input.offset;
+    let mut nodes = vec![];
+    let mut next_input = input;
+
+    loop {
+        let node_result = to_list(leaf(separator))(next_input);
+
+        let (after_node_input, next_nodes) = match node_result {
+            Err(_) => break,
+            Ok((after_node_input, next_node)) => (after_node_input, next_node),
+        };
+
+        nodes.extend(next_nodes);
+
+        match separated_by(separator)(after_node_input) {
+            Err(_) => {
+                next_input = after_node_input;
+                break;
+            }
+            Ok((input, s)) => {
+                nodes.push(s);
+                next_input = input;
+            }
+        }
+    }
+
+    let end = next_input.offset;
+
+    Ok((next_input, nodes.spanned(Span::new(start, end))))
+}
+
+#[tracable_parser]
+pub fn fallback_number_without(c: char) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> {
+    move |input| {
+        let (input, number) = fallback_raw_number_without(c)(input)?;
+
+        Ok((
+            input,
+            TokenTreeBuilder::spanned_number(number, number.span()),
+        ))
+    }
+}
+
+#[tracable_parser]
+pub fn fallback_raw_number_without(c: char) -> impl Fn(NomSpan) -> IResult<NomSpan, RawNumber> {
+    move |input| {
+        let _anchoral = input;
+        let start = input.offset;
+        let (input, _neg) = opt(tag("-"))(input)?;
+        let (input, _head) = digit1(input)?;
+        let after_int_head = input;
+
+        match input.fragment.chars().next() {
+            None => return Ok((input, RawNumber::int(Span::new(start, input.offset)))),
+            Some('.') => (),
+            other if is_boundary(other) || other == Some(c) => {
+                return Ok((input, RawNumber::int(Span::new(start, input.offset))))
+            }
+            _ => {
+                return Err(nom::Err::Error(nom::error::make_error(
+                    input,
+                    nom::error::ErrorKind::Tag,
+                )))
+            }
+        }
+
+        let dot: IResult<NomSpan, NomSpan, (NomSpan, nom::error::ErrorKind)> = tag(".")(input);
+
+        let input = match dot {
+            Ok((input, _dot)) => input,
+
+            // it's just an integer
+            Err(_) => return Ok((input, RawNumber::int(Span::new(start, input.offset)))),
+        };
+
+        let tail_digits_result: IResult<NomSpan, _> = digit1(input);
+
+        let (input, _tail) = match tail_digits_result {
+            Ok((input, tail)) => (input, tail),
+            Err(_) => {
+                return Ok((
+                    after_int_head,
+                    RawNumber::int((start, after_int_head.offset)),
+                ))
+            }
+        };
+
+        let end = input.offset;
+
+        let next = input.fragment.chars().next();
+
+        if is_boundary(next) || next == Some(c) {
+            Ok((input, RawNumber::decimal(Span::new(start, end))))
+        } else {
+            Err(nom::Err::Error(nom::error::make_error(
+                input,
+                nom::error::ErrorKind::Tag,
+            )))
+        }
+    }
+}
+
+#[tracable_parser]
+pub fn leaf(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
+    move |input| {
+        let separator = c.chars().next().unwrap_or_else(|| ',');
+
+        let (input, node) = alt((
+            fallback_number_without(separator),
+            string,
+            fallback_string_without(c),
+        ))(input)?;
+
+        Ok((input, node))
+    }
+}
+
+#[tracable_parser]
+pub fn separated_by(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
+    move |input| {
+        let left = input.offset;
+        let (input, _) = tag(c)(input)?;
+        let right = input.offset;
+
+        Ok((input, TokenTreeBuilder::spanned_sep(Span::new(left, right))))
+    }
+}
+
+#[tracable_parser]
+pub fn dq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
+    let start = input.offset;
+    let (input, _) = char('"')(input)?;
+    let start1 = input.offset;
+    let (input, _) = escaped(
+        none_of(r#"\""#),
+        '\\',
+        nom::character::complete::one_of(r#"\"rnt"#),
+    )(input)?;
+
+    let end1 = input.offset;
+    let (input, _) = char('"')(input)?;
+    let end = input.offset;
+    Ok((
+        input,
+        TokenTreeBuilder::spanned_string(Span::new(start1, end1), Span::new(start, end)),
+    ))
+}
+
+#[tracable_parser]
+pub fn sq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
+    let start = input.offset;
+    let (input, _) = char('\'')(input)?;
+    let start1 = input.offset;
+    let (input, _) = many0(none_of("\'"))(input)?;
+    let end1 = input.offset;
+    let (input, _) = char('\'')(input)?;
+    let end = input.offset;
+
+    Ok((
+        input,
+        TokenTreeBuilder::spanned_string(Span::new(start1, end1), Span::new(start, end)),
+    ))
+}
+
+#[tracable_parser]
+pub fn string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
+    alt((sq_string, dq_string))(input)
+}
+
+#[tracable_parser]
+pub fn fallback_string_without(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
+    move |input| {
+        let start = input.offset;
+        let (input, _) = many0(none_of(c))(input)?;
+        let end = input.offset;
+
+        Ok((
+            input,
+            TokenTreeBuilder::spanned_string(Span::new(start, end), Span::new(start, end)),
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::parse::token_tree_builder::TokenTreeBuilder::{self, self as b};
+    use crate::parse::util::parse_line_with_separator;
+    use crate::test_support::apply;
+    use nom::IResult;
+
+    use crate::parse::pipeline::PipelineElement;
+    use crate::parse::token_tree::SpannedToken;
+    use nu_source::NomSpan;
+    use nu_source::PrettyDebugWithSource;
+
+    use pretty_assertions::assert_eq;
+
+    pub fn nodes(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
+        let (input, tokens) = parse_line_with_separator(",", input)?;
+        let span = tokens.span;
+
+        Ok((
+            input,
+            TokenTreeBuilder::spanned_pipeline(vec![PipelineElement::new(None, tokens)], span),
+        ))
+    }
+
+    #[test]
+    fn separators() {
+        equal_tokens! {
+            <nodes>
+            r#""name","lastname","age""# ->  b::token_list(vec![
+                b::string("name"),
+                b::sep(","),
+                b::string("lastname"),
+                b::sep(","),
+                b::string("age")
+            ])
+        }
+
+        equal_tokens! {
+            <nodes>
+            r#""Andrés","Robalino",12"# ->  b::token_list(vec![
+                b::string("Andrés"),
+                b::sep(","),
+                b::string("Robalino"),
+                b::sep(","),
+                b::int(12)
+            ])
+        }
+    }
+
+    #[test]
+    fn strings() {
+        equal_tokens! {
+            <nodes>
+            r#""andres""# ->  b::token_list(vec![b::string("andres")])
+        }
+    }
+
+    #[test]
+    fn numbers() {
+        equal_tokens! {
+            <nodes>
+            "123" -> b::token_list(vec![b::int(123)])
+        }
+
+        equal_tokens! {
+            <nodes>
+            "-123" -> b::token_list(vec![b::int(-123)])
+        }
+    }
+}
--- a/crates/nu-parser/src/parse/util/line_delimited_parser/shape.rs
+++ b/crates/nu-parser/src/parse/util/line_delimited_parser/shape.rs
@ -0,0 +1,91 @@
+use crate::hir::{
+    self, syntax_shape::ExpandSyntax, syntax_shape::FlatShape, syntax_shape::NumberExpressionShape,
+    syntax_shape::StringShape,
+};
+use crate::hir::{Expression, TokensIterator};
+use crate::parse::token_tree::SeparatorType;
+
+use nu_errors::ParseError;
+use nu_protocol::UntaggedValue;
+use nu_source::Span;
+
+#[derive(Debug, Copy, Clone)]
+pub struct LineSeparatedShape;
+
+impl ExpandSyntax for LineSeparatedShape {
+    type Output = Result<Vec<UntaggedValue>, ParseError>;
+
+    fn name(&self) -> &'static str {
+        "any string line separated by"
+    }
+
+    fn expand<'a, 'b>(
+        &self,
+        token_nodes: &mut TokensIterator<'_>,
+    ) -> Result<Vec<UntaggedValue>, ParseError> {
+        let source = token_nodes.source();
+
+        if token_nodes.at_end() {
+            return Ok(vec![]);
+        }
+
+        let mut entries = vec![];
+
+        loop {
+            let field = {
+                token_nodes
+                    .expand_syntax(NumberExpressionShape)
+                    .or_else(|_| {
+                        token_nodes
+                            .expand_syntax(StringShape)
+                            .map(|syntax| Expression::string(syntax.inner).into_expr(syntax.span))
+                    })
+            };
+
+            if let Ok(field) = field {
+                match &field.expr {
+                    Expression::Literal(hir::Literal::Number(crate::Number::Int(i))) => {
+                        entries.push(UntaggedValue::int(i.clone()))
+                    }
+                    Expression::Literal(hir::Literal::Number(crate::Number::Decimal(d))) => {
+                        entries.push(UntaggedValue::decimal(d.clone()))
+                    }
+                    Expression::Literal(hir::Literal::String(span)) => {
+                        if span.is_closed() {
+                            entries.push(UntaggedValue::nothing())
+                        } else {
+                            entries.push(UntaggedValue::string(span.slice(&source)))
+                        }
+                    }
+                    _ => {}
+                }
+            }
+
+            match token_nodes.expand_infallible(SeparatorShape) {
+                Err(err) if !token_nodes.at_end() => return Err(err),
+                _ => {}
+            }
+
+            if token_nodes.at_end() {
+                break;
+            }
+        }
+
+        Ok(entries)
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+pub struct SeparatorShape;
+
+impl ExpandSyntax for SeparatorShape {
+    type Output = Result<Span, ParseError>;
+
+    fn name(&self) -> &'static str {
+        "separated"
+    }
+
+    fn expand<'a, 'b>(&self, token_nodes: &'b mut TokensIterator<'a>) -> Result<Span, ParseError> {
+        token_nodes.expand_token(SeparatorType, |span| Ok((FlatShape::Separator, span)))
+    }
+}
--- a/crates/nu-parser/src/parse/util/mod.rs
+++ b/crates/nu-parser/src/parse/util/mod.rs
@ -0,0 +1,4 @@
+mod line_delimited_parser;
+
+pub use line_delimited_parser::parser::parse_line_with_separator;
+pub use line_delimited_parser::shape::LineSeparatedShape;
--- a/crates/nu-parser/src/test_support/mod.rs
+++ b/crates/nu-parser/src/test_support/mod.rs
@ -0,0 +1,104 @@
+use crate::hir::{syntax_shape::ExpandContext, syntax_shape::SignatureRegistry};
+
+use crate::parse::files::Files;
+use crate::parse::token_tree::{DelimitedNode, Delimiter, SpannedToken, Token};
+use crate::parse::token_tree_builder::{CurriedToken, TokenTreeBuilder};
+
+use nu_errors::ShellError;
+use nu_protocol::Signature;
+use nu_source::{nom_input, NomSpan, Span, Spanned, Text};
+
+pub use nu_source::PrettyDebug;
+
+use derive_new::new;
+
+pub type CurriedNode<T> = Box<dyn FnOnce(&mut TokenTreeBuilder) -> T + 'static>;
+
+#[derive(Debug, Clone, new)]
+pub struct TestRegistry {
+    #[new(default)]
+    signatures: indexmap::IndexMap<String, Signature>,
+}
+
+impl TestRegistry {}
+
+impl SignatureRegistry for TestRegistry {
+    fn has(&self, name: &str) -> bool {
+        self.signatures.contains_key(name)
+    }
+    fn get(&self, name: &str) -> Option<Signature> {
+        self.signatures.get(name).cloned()
+    }
+    fn clone_box(&self) -> Box<dyn SignatureRegistry> {
+        Box::new(self.clone())
+    }
+}
+
+pub fn with_empty_context(source: &Text, callback: impl FnOnce(ExpandContext)) {
+    let registry = TestRegistry::new();
+    callback(ExpandContext::new(Box::new(registry), source, None))
+}
+
+pub fn inner_string_span(span: Span) -> Span {
+    Span::new(span.start() + 1, span.end() - 1)
+}
+
+pub fn print_err(err: ShellError, source: &Text) {
+    let diag = err.into_diagnostic();
+
+    let writer = termcolor::StandardStream::stderr(termcolor::ColorChoice::Auto);
+    let mut source = source.to_string();
+    source.push_str(" ");
+    let files = Files::new(source);
+    let _ = language_reporting::emit(
+        &mut writer.lock(),
+        &files,
+        &diag,
+        &language_reporting::DefaultConfig,
+    );
+}
+
+pub fn apply(
+    f: impl Fn(NomSpan) -> Result<(NomSpan, SpannedToken), nom::Err<(NomSpan, nom::error::ErrorKind)>>,
+    _desc: &str,
+    string: &str,
+) -> SpannedToken {
+    let result = f(nom_input(string));
+
+    match result {
+        Ok(value) => value.1,
+        Err(err) => {
+            let err = nu_errors::ShellError::parse_error(err);
+
+            println!("{:?}", string);
+            crate::hir::baseline_parse::tests::print_err(err, &nu_source::Text::from(string));
+            panic!("test failed")
+        }
+    }
+}
+
+pub fn span((left, right): (usize, usize)) -> Span {
+    Span::new(left, right)
+}
+
+pub fn delimited(
+    delimiter: Spanned<Delimiter>,
+    children: Vec<SpannedToken>,
+    left: usize,
+    right: usize,
+) -> SpannedToken {
+    let start = Span::for_char(left);
+    let end = Span::for_char(right);
+
+    let node = DelimitedNode::new(delimiter.item, (start, end), children);
+    Token::Delimited(node).into_spanned((left, right))
+}
+
+pub fn build<T>(block: CurriedNode<T>) -> T {
+    let mut builder = TokenTreeBuilder::new();
+    block(&mut builder)
+}
+
+pub fn build_token(block: CurriedToken) -> SpannedToken {
+    TokenTreeBuilder::build(block).0
+}
--- a/crates/nu-source/src/meta.rs
+++ b/crates/nu-source/src/meta.rs
@ -659,6 +659,27 @@ impl Span {
        self.start == 0 && self.end == 0
    }

+    /// Returns a bool if the current Span does not cover.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// //  make clean
+    /// //  ----
+    /// //  (0,4)
+    /// //  
+    /// //       ^(5,5)
+    ///    
+    /// let make_span = Span::new(0,4);
+    /// let clean_span = Span::new(5,5);
+    ///
+    /// assert_eq!(make_span.is_closed(), false);
+    /// assert_eq!(clean_span.is_closed(), true);
+    /// ```
+    pub fn is_closed(&self) -> bool {
+        self.start == self.end
+    }
+
    /// Returns a slice of the input that covers the start and end of the current Span.
    pub fn slice<'a>(&self, source: &'a str) -> &'a str {
        &source[self.start..self.end]
--- a/tests/plugins/core_str.rs
+++ b/tests/plugins/core_str.rs
@ -78,16 +78,17 @@ fn converts_to_int() {
    let actual = nu!(
        cwd: "tests/fixtures/formats", pipeline(
        r#"
-            open caco3_plastics.csv
-            | first 1
-            | str tariff_item --to-int
-            | where tariff_item == 2509000000
-            | get tariff_item
+            echo '{number_as_string: "1"}'
+            | from-json
+            | str number_as_string --to-int
+            | rename number
+            | where number == 1
+            | get number
            | echo $it
        "#
    ));

-    assert_eq!(actual, "2509000000");
+    assert_eq!(actual, "1");
 }

 #[test]