Merge remote-tracking branch 'origin' into porting-format

This commit is contained in:
Antonio Natilla 2021-11-02 21:23:50 +01:00
commit d134774f4b
6 changed files with 105 additions and 38 deletions

View File

@ -104,9 +104,7 @@ pub fn create_default_context() -> EngineState {
working_set.render() working_set.render()
}; };
{ engine_state.merge_delta(delta);
EngineState::merge_delta(&mut engine_state, delta);
}
engine_state engine_state
} }

View File

@ -30,7 +30,7 @@ pub fn test_examples(cmd: impl Command + 'static) {
working_set.render() working_set.render()
}; };
EngineState::merge_delta(&mut *engine_state, delta); engine_state.merge_delta(delta);
for example in examples { for example in examples {
// Skip tests that don't have results to compare to // Skip tests that don't have results to compare to
@ -50,7 +50,7 @@ pub fn test_examples(cmd: impl Command + 'static) {
(output, working_set.render()) (output, working_set.render())
}; };
EngineState::merge_delta(&mut engine_state, delta); engine_state.merge_delta(delta);
let mut stack = Stack::new(); let mut stack = Stack::new();

View File

@ -80,6 +80,48 @@ impl Default for ScopeFrame {
} }
} }
/// The core global engine state. This includes all global definitions as well as any global state that
/// will persist for the whole session.
///
/// Declarations, variables, blocks, and other forms of data are held in the global state and referenced
/// elsewhere using their IDs. These IDs are simply their index into the global state. This allows us to
/// more easily handle creating blocks, binding variables and callsites, and more, because each of these
/// will refer to the corresponding IDs rather than their definitions directly. At runtime, this means
/// less copying and smaller structures.
///
/// Note that the runtime stack is not part of this global state. Runtime stacks are handled differently,
/// but they also rely on using IDs rather than full definitions.
///
/// A note on implementation:
///
/// Much of the global definitions are built on the Bodil's 'im' crate. This gives us a way of working with
/// lists of definitions in a way that is very cheap to access, while also allowing us to update them at
/// key points in time (often, the transition between parsing and evaluation).
///
/// Over the last two years we tried a few different approaches to global state like this. I'll list them
/// here for posterity, so we can more easily know how we got here:
///
/// * `Rc` - Rc is cheap, but not thread-safe. The moment we wanted to work with external processes, we
/// needed a way send to stdin/stdout. In Rust, the current practice is to spawn a thread to handle both.
/// These threads would need access to the global state, as they'll need to process data as it streams out
/// of the data pipeline. Because Rc isn't thread-safe, this breaks.
///
/// * `Arc` - Arc is the thread-safe version of the above. Often Arc is used in combination with a Mutex or
/// RwLock, but you can use Arc by itself. We did this a few places in the original Nushell. This *can* work
/// but because of Arc's nature of not allowing mutation if there's a second copy of the Arc around, this
/// ultimately becomes limiting.
///
/// * `Arc` + `Mutex/RwLock` - the standard practice for thread-safe containers. Unfortunately, this would
/// have meant we would incur a lock penalty every time we needed to access any declaration or block. As we
/// would be reading far more often than writing, it made sense to explore solutions that favor large amounts
/// of reads.
///
/// * `im` - the `im` crate was ultimately chosen because it has some very nice properties: it gives the
/// ability to cheaply clone these structures, which is nice as EngineState may need to be cloned a fair bit
/// to follow ownership rules for closures and iterators. It also is cheap to access. Favoring reads here fits
/// more closely to what we need with Nushell. And, of course, it's still thread-safe, so we get the same
/// benefits as above.
///
#[derive(Clone)] #[derive(Clone)]
pub struct EngineState { pub struct EngineState {
files: im::Vector<(String, usize, usize)>, files: im::Vector<(String, usize, usize)>,
@ -107,15 +149,22 @@ impl EngineState {
} }
} }
pub fn merge_delta(this: &mut EngineState, mut delta: StateDelta) { /// Merges a `StateDelta` onto the current state. These deltas come from a system, like the parser, that
/// creates a new set of definitions and visible symbols in the current scope. We make this transactional
/// as there are times when we want to run the parser and immediately throw away the results (namely:
/// syntax highlighting and completions).
///
/// When we want to preserve what the parser has created, we can take its output (the `StateDelta`) and
/// use this function to merge it into the global state.
pub fn merge_delta(&mut self, mut delta: StateDelta) {
// Take the mutable reference and extend the permanent state from the working set // Take the mutable reference and extend the permanent state from the working set
this.files.extend(delta.files); self.files.extend(delta.files);
this.file_contents.extend(delta.file_contents); self.file_contents.extend(delta.file_contents);
this.decls.extend(delta.decls); self.decls.extend(delta.decls);
this.vars.extend(delta.vars); self.vars.extend(delta.vars);
this.blocks.extend(delta.blocks); self.blocks.extend(delta.blocks);
if let Some(last) = this.scope.back_mut() { if let Some(last) = self.scope.back_mut() {
let first = delta.scope.remove(0); let first = delta.scope.remove(0);
for item in first.decls.into_iter() { for item in first.decls.into_iter() {
last.decls.insert(item.0, item.1); last.decls.insert(item.0, item.1);
@ -322,11 +371,19 @@ impl Default for EngineState {
} }
} }
/// A temporary extension to the global state. This handles bridging between the global state and the
/// additional declarations and scope changes that are not yet part of the global scope.
///
/// This working set is created by the parser as a way of handling declarations and scope changes that
/// may later be merged or dropped (and not merged) depending on the needs of the code calling the parser.
pub struct StateWorkingSet<'a> { pub struct StateWorkingSet<'a> {
pub permanent_state: &'a EngineState, pub permanent_state: &'a EngineState,
pub delta: StateDelta, pub delta: StateDelta,
} }
/// A delta (or change set) between the current global state and a possible future global state. Deltas
/// can be applied to the global state to update it to contain both previous state and the state held
/// within the delta.
pub struct StateDelta { pub struct StateDelta {
files: Vec<(String, usize, usize)>, files: Vec<(String, usize, usize)>,
pub(crate) file_contents: Vec<(Vec<u8>, usize, usize)>, pub(crate) file_contents: Vec<(Vec<u8>, usize, usize)>,
@ -921,7 +978,7 @@ mod engine_state_tests {
working_set.render() working_set.render()
}; };
EngineState::merge_delta(&mut engine_state, delta); engine_state.merge_delta(delta);
assert_eq!(engine_state.num_files(), 2); assert_eq!(engine_state.num_files(), 2);
assert_eq!(&engine_state.files[0].0, "test.nu"); assert_eq!(&engine_state.files[0].0, "test.nu");

View File

@ -2,6 +2,23 @@ use std::collections::HashMap;
use crate::{ShellError, Value, VarId}; use crate::{ShellError, Value, VarId};
/// A runtime value stack used during evaluation
///
/// A note on implementation:
///
/// We previously set up the stack in a traditional way, where stack frames had parents which would
/// represent other frames that you might return to when exiting a function.
///
/// While experimenting with blocks, we found that we needed to have closure captures of variables
/// seen outside of the blocks, so that they blocks could be run in a way that was both thread-safe
/// and followed the restrictions for closures applied to iterators. The end result left us with
/// closure-captured single stack frames that blocks could see.
///
/// Blocks make up the only scope and stack definition abstraction in Nushell. As a result, we were
/// creating closure captures at any point we wanted to have a Block value we could safely evaluate
/// in any context. This meant that the parents were going largely unused, with captured variables
/// taking their place. The end result is this, where we no longer have separate frames, but instead
/// use the Stack as a way of representing the local and closure-captured state.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Stack { pub struct Stack {
pub vars: HashMap<VarId, Value>, pub vars: HashMap<VarId, Value>,
@ -50,40 +67,18 @@ impl Stack {
output output
} }
// pub fn enter_scope(&self) -> Stack {
// // FIXME: VERY EXPENSIVE to clone entire stack
// let mut output = self.clone();
// output.0.push(StackFrame {
// vars: HashMap::new(),
// env_vars: HashMap::new(),
// });
// output
// }
pub fn get_env_vars(&self) -> HashMap<String, String> { pub fn get_env_vars(&self) -> HashMap<String, String> {
// let mut output = HashMap::new();
// for frame in &self.0 {
// output.extend(frame.env_vars.clone().into_iter());
// }
// output
self.env_vars.clone() self.env_vars.clone()
} }
pub fn get_env_var(&self, name: &str) -> Option<String> { pub fn get_env_var(&self, name: &str) -> Option<String> {
// for frame in self.0.iter().rev() {
if let Some(v) = self.env_vars.get(name) { if let Some(v) = self.env_vars.get(name) {
return Some(v.to_string()); return Some(v.to_string());
} }
// }
None None
} }
pub fn print_stack(&self) { pub fn print_stack(&self) {
// for frame in self.0.iter().rev() {
// println!("===frame===");
println!("vars:"); println!("vars:");
for (var, val) in &self.vars { for (var, val) in &self.vars {
println!(" {}: {:?}", var, val); println!(" {}: {:?}", var, val);
@ -92,6 +87,5 @@ impl Stack {
for (var, val) in &self.env_vars { for (var, val) in &self.env_vars {
println!(" {}: {:?}", var, val); println!(" {}: {:?}", var, val);
} }
// }
} }
} }

View File

@ -2,6 +2,24 @@ use std::sync::{atomic::AtomicBool, Arc};
use crate::{ast::PathMember, ShellError, Span, Value, ValueStream}; use crate::{ast::PathMember, ShellError, Span, Value, ValueStream};
/// The foundational abstraction for input and output to commands
///
/// This represents either a single Value or a stream of values coming into the command or leaving a command.
///
/// A note on implementation:
///
/// We've tried a few variations of this structure. Listing these below so we have a record.
///
/// * We tried always assuming a stream in Nushell. This was a great 80% solution, but it had some rough edges.
/// Namely, how do you know the difference between a single string and a list of one string. How do you know
/// when to flatten the data given to you from a data source into the stream or to keep it as an unflattened
/// list?
/// * We tried putting the stream into Value. This had some interesting properties as now commands "just worked
/// on values", but the inability to pass Value to threads as-is meant a lot of workarounds for dealing with
/// Value's stream case
/// * A balance of the two approaches is what we've landed on: Values are thread-safe to pass, and we can stream
/// them into any sources. Streams are still available to model the infinite streams approach of original
/// Nushell.
pub enum PipelineData { pub enum PipelineData {
Value(Value), Value(Value),
Stream(ValueStream), Stream(ValueStream),

View File

@ -118,7 +118,7 @@ fn main() -> Result<()> {
(output, working_set.render()) (output, working_set.render())
}; };
EngineState::merge_delta(&mut engine_state, delta); engine_state.merge_delta(delta);
let mut stack = nu_protocol::engine::Stack::new(); let mut stack = nu_protocol::engine::Stack::new();
@ -347,7 +347,7 @@ fn eval_source(
(output, working_set.render()) (output, working_set.render())
}; };
EngineState::merge_delta(engine_state, delta); engine_state.merge_delta(delta);
match eval_block(engine_state, stack, &block, PipelineData::new()) { match eval_block(engine_state, stack, &block, PipelineData::new()) {
Ok(pipeline_data) => { Ok(pipeline_data) => {