From 5e42b140263e9fd63c1c3a8bf6e8a2f73ec2e66f Mon Sep 17 00:00:00 2001 From: JT Date: Wed, 3 Nov 2021 08:53:48 +1300 Subject: [PATCH] Documenting some code and doing cleanups --- crates/nu-command/src/default_context.rs | 4 +- crates/nu-command/src/example_test.rs | 4 +- crates/nu-protocol/src/engine/engine_state.rs | 73 +++++++++++++++++-- crates/nu-protocol/src/engine/stack.rs | 40 +++++----- crates/nu-protocol/src/pipeline_data.rs | 18 +++++ src/main.rs | 4 +- 6 files changed, 105 insertions(+), 38 deletions(-) diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 3d41f27355..52dc3f309b 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -103,9 +103,7 @@ pub fn create_default_context() -> EngineState { working_set.render() }; - { - EngineState::merge_delta(&mut engine_state, delta); - } + engine_state.merge_delta(delta); engine_state } diff --git a/crates/nu-command/src/example_test.rs b/crates/nu-command/src/example_test.rs index f842dc2db8..bc3c819a75 100644 --- a/crates/nu-command/src/example_test.rs +++ b/crates/nu-command/src/example_test.rs @@ -30,7 +30,7 @@ pub fn test_examples(cmd: impl Command + 'static) { working_set.render() }; - EngineState::merge_delta(&mut *engine_state, delta); + engine_state.merge_delta(delta); for example in examples { // Skip tests that don't have results to compare to @@ -50,7 +50,7 @@ pub fn test_examples(cmd: impl Command + 'static) { (output, working_set.render()) }; - EngineState::merge_delta(&mut engine_state, delta); + engine_state.merge_delta(delta); let mut stack = Stack::new(); diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 6e072ddc29..7464354ea2 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -80,6 +80,48 @@ impl Default for ScopeFrame { } } +/// The core global engine state. This includes all global definitions as well as any global state that +/// will persist for the whole session. +/// +/// Declarations, variables, blocks, and other forms of data are held in the global state and referenced +/// elsewhere using their IDs. These IDs are simply their index into the global state. This allows us to +/// more easily handle creating blocks, binding variables and callsites, and more, because each of these +/// will refer to the corresponding IDs rather than their definitions directly. At runtime, this means +/// less copying and smaller structures. +/// +/// Note that the runtime stack is not part of this global state. Runtime stacks are handled differently, +/// but they also rely on using IDs rather than full definitions. +/// +/// A note on implementation: +/// +/// Much of the global definitions are built on the Bodil's 'im' crate. This gives us a way of working with +/// lists of definitions in a way that is very cheap to access, while also allowing us to update them at +/// key points in time (often, the transition between parsing and evaluation). +/// +/// Over the last two years we tried a few different approaches to global state like this. I'll list them +/// here for posterity, so we can more easily know how we got here: +/// +/// * `Rc` - Rc is cheap, but not thread-safe. The moment we wanted to work with external processes, we +/// needed a way send to stdin/stdout. In Rust, the current practice is to spawn a thread to handle both. +/// These threads would need access to the global state, as they'll need to process data as it streams out +/// of the data pipeline. Because Rc isn't thread-safe, this breaks. +/// +/// * `Arc` - Arc is the thread-safe version of the above. Often Arc is used in combination with a Mutex or +/// RwLock, but you can use Arc by itself. We did this a few places in the original Nushell. This *can* work +/// but because of Arc's nature of not allowing mutation if there's a second copy of the Arc around, this +/// ultimately becomes limiting. +/// +/// * `Arc` + `Mutex/RwLock` - the standard practice for thread-safe containers. Unfortunately, this would +/// have meant we would incur a lock penalty every time we needed to access any declaration or block. As we +/// would be reading far more often than writing, it made sense to explore solutions that favor large amounts +/// of reads. +/// +/// * `im` - the `im` crate was ultimately chosen because it has some very nice properties: it gives the +/// ability to cheaply clone these structures, which is nice as EngineState may need to be cloned a fair bit +/// to follow ownership rules for closures and iterators. It also is cheap to access. Favoring reads here fits +/// more closely to what we need with Nushell. And, of course, it's still thread-safe, so we get the same +/// benefits as above. +/// #[derive(Clone)] pub struct EngineState { files: im::Vector<(String, usize, usize)>, @@ -107,15 +149,22 @@ impl EngineState { } } - pub fn merge_delta(this: &mut EngineState, mut delta: StateDelta) { + /// Merges a `StateDelta` onto the current state. These deltas come from a system, like the parser, that + /// creates a new set of definitions and visible symbols in the current scope. We make this transactional + /// as there are times when we want to run the parser and immediately throw away the results (namely: + /// syntax highlighting and completions). + /// + /// When we want to preserve what the parser has created, we can take its output (the `StateDelta`) and + /// use this function to merge it into the global state. + pub fn merge_delta(&mut self, mut delta: StateDelta) { // Take the mutable reference and extend the permanent state from the working set - this.files.extend(delta.files); - this.file_contents.extend(delta.file_contents); - this.decls.extend(delta.decls); - this.vars.extend(delta.vars); - this.blocks.extend(delta.blocks); + self.files.extend(delta.files); + self.file_contents.extend(delta.file_contents); + self.decls.extend(delta.decls); + self.vars.extend(delta.vars); + self.blocks.extend(delta.blocks); - if let Some(last) = this.scope.back_mut() { + if let Some(last) = self.scope.back_mut() { let first = delta.scope.remove(0); for item in first.decls.into_iter() { last.decls.insert(item.0, item.1); @@ -322,11 +371,19 @@ impl Default for EngineState { } } +/// A temporary extension to the global state. This handles bridging between the global state and the +/// additional declarations and scope changes that are not yet part of the global scope. +/// +/// This working set is created by the parser as a way of handling declarations and scope changes that +/// may later be merged or dropped (and not merged) depending on the needs of the code calling the parser. pub struct StateWorkingSet<'a> { pub permanent_state: &'a EngineState, pub delta: StateDelta, } +/// A delta (or change set) between the current global state and a possible future global state. Deltas +/// can be applied to the global state to update it to contain both previous state and the state held +/// within the delta. pub struct StateDelta { files: Vec<(String, usize, usize)>, pub(crate) file_contents: Vec<(Vec, usize, usize)>, @@ -921,7 +978,7 @@ mod engine_state_tests { working_set.render() }; - EngineState::merge_delta(&mut engine_state, delta); + engine_state.merge_delta(delta); assert_eq!(engine_state.num_files(), 2); assert_eq!(&engine_state.files[0].0, "test.nu"); diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 3a0e46a1de..45b013de59 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -2,6 +2,23 @@ use std::collections::HashMap; use crate::{ShellError, Value, VarId}; +/// A runtime value stack used during evaluation +/// +/// A note on implementation: +/// +/// We previously set up the stack in a traditional way, where stack frames had parents which would +/// represent other frames that you might return to when exiting a function. +/// +/// While experimenting with blocks, we found that we needed to have closure captures of variables +/// seen outside of the blocks, so that they blocks could be run in a way that was both thread-safe +/// and followed the restrictions for closures applied to iterators. The end result left us with +/// closure-captured single stack frames that blocks could see. +/// +/// Blocks make up the only scope and stack definition abstraction in Nushell. As a result, we were +/// creating closure captures at any point we wanted to have a Block value we could safely evaluate +/// in any context. This meant that the parents were going largely unused, with captured variables +/// taking their place. The end result is this, where we no longer have separate frames, but instead +/// use the Stack as a way of representing the local and closure-captured state. #[derive(Debug, Clone)] pub struct Stack { pub vars: HashMap, @@ -50,40 +67,18 @@ impl Stack { output } - // pub fn enter_scope(&self) -> Stack { - // // FIXME: VERY EXPENSIVE to clone entire stack - // let mut output = self.clone(); - // output.0.push(StackFrame { - // vars: HashMap::new(), - // env_vars: HashMap::new(), - // }); - - // output - // } - pub fn get_env_vars(&self) -> HashMap { - // let mut output = HashMap::new(); - - // for frame in &self.0 { - // output.extend(frame.env_vars.clone().into_iter()); - // } - - // output self.env_vars.clone() } pub fn get_env_var(&self, name: &str) -> Option { - // for frame in self.0.iter().rev() { if let Some(v) = self.env_vars.get(name) { return Some(v.to_string()); } - // } None } pub fn print_stack(&self) { - // for frame in self.0.iter().rev() { - // println!("===frame==="); println!("vars:"); for (var, val) in &self.vars { println!(" {}: {:?}", var, val); @@ -92,6 +87,5 @@ impl Stack { for (var, val) in &self.env_vars { println!(" {}: {:?}", var, val); } - // } } } diff --git a/crates/nu-protocol/src/pipeline_data.rs b/crates/nu-protocol/src/pipeline_data.rs index fbe8862a98..47b10b40db 100644 --- a/crates/nu-protocol/src/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline_data.rs @@ -2,6 +2,24 @@ use std::sync::{atomic::AtomicBool, Arc}; use crate::{ast::PathMember, ShellError, Span, Value, ValueStream}; +/// The foundational abstraction for input and output to commands +/// +/// This represents either a single Value or a stream of values coming into the command or leaving a command. +/// +/// A note on implementation: +/// +/// We've tried a few variations of this structure. Listing these below so we have a record. +/// +/// * We tried always assuming a stream in Nushell. This was a great 80% solution, but it had some rough edges. +/// Namely, how do you know the difference between a single string and a list of one string. How do you know +/// when to flatten the data given to you from a data source into the stream or to keep it as an unflattened +/// list? +/// * We tried putting the stream into Value. This had some interesting properties as now commands "just worked +/// on values", but the inability to pass Value to threads as-is meant a lot of workarounds for dealing with +/// Value's stream case +/// * A balance of the two approaches is what we've landed on: Values are thread-safe to pass, and we can stream +/// them into any sources. Streams are still available to model the infinite streams approach of original +/// Nushell. pub enum PipelineData { Value(Value), Stream(ValueStream), diff --git a/src/main.rs b/src/main.rs index 9dba312575..9d8f28ff85 100644 --- a/src/main.rs +++ b/src/main.rs @@ -118,7 +118,7 @@ fn main() -> Result<()> { (output, working_set.render()) }; - EngineState::merge_delta(&mut engine_state, delta); + engine_state.merge_delta(delta); let mut stack = nu_protocol::engine::Stack::new(); @@ -347,7 +347,7 @@ fn eval_source( (output, working_set.render()) }; - EngineState::merge_delta(engine_state, delta); + engine_state.merge_delta(delta); match eval_block(engine_state, stack, &block, PipelineData::new()) { Ok(pipeline_data) => {