Inter-Job direct messaging (#15253)

# Description

This PR implements an experimental inter-job communication model,
through direct message passing, aka "mail"ing or "dm"ing:



- `job send <id>`: Sends a message the job with the given id, the root
job has id 0. Messages are stored in the recipient's "mailbox"
- `job recv`: Returns a stored message, blocks if the mailbox is empty
- `job flush`: Clear all messages from mailbox

Additionally, messages can be sent with a numeric tag, which can then be
filtered with `mail recv --tag`.
This is useful for spawning jobs and receiving messages specifically
from those jobs.

This PR is mostly a proof of concept for how inter-job communication
could look like, so people can provide feedback and suggestions

Closes  #15199

May close #15220 since now jobs can access their own id.

# User-Facing Changes

Adds, `job id`, `job send`, `job recv` and `job flush`  commands.

# Tests + Formatting

[X] TODO:  Implement tests
[X] Consider rewriting some of the job-related tests to use this, to
make them a bit less fragile.

# After Submitting
This commit is contained in:
Renan Ribeiro
2025-04-26 12:24:35 -03:00
committed by GitHub
parent 0389815137
commit 2d868323b6
14 changed files with 853 additions and 47 deletions

View File

@@ -8,9 +8,9 @@ use crate::{
},
eval_const::create_nu_constant,
shell_error::io::IoError,
BlockId, Category, Config, DeclId, FileId, GetSpan, Handlers, HistoryConfig, Module, ModuleId,
OverlayId, ShellError, SignalAction, Signals, Signature, Span, SpanId, Type, Value, VarId,
VirtualPathId,
BlockId, Category, Config, DeclId, FileId, GetSpan, Handlers, HistoryConfig, JobId, Module,
ModuleId, OverlayId, ShellError, SignalAction, Signals, Signature, Span, SpanId, Type, Value,
VarId, VirtualPathId,
};
use fancy_regex::Regex;
use lru::LruCache;
@@ -22,6 +22,8 @@ use std::{
path::PathBuf,
sync::{
atomic::{AtomicBool, AtomicU32, Ordering},
mpsc::channel,
mpsc::Sender,
Arc, Mutex, MutexGuard, PoisonError,
},
};
@@ -31,7 +33,7 @@ type PoisonDebuggerError<'a> = PoisonError<MutexGuard<'a, Box<dyn Debugger>>>;
#[cfg(feature = "plugin")]
use crate::{PluginRegistryFile, PluginRegistryItem, RegisteredPlugin};
use super::{Jobs, ThreadJob};
use super::{CurrentJob, Jobs, Mail, Mailbox, ThreadJob};
#[derive(Clone, Debug)]
pub enum VirtualPath {
@@ -117,7 +119,9 @@ pub struct EngineState {
pub jobs: Arc<Mutex<Jobs>>,
// The job being executed with this engine state, or None if main thread
pub current_thread_job: Option<ThreadJob>,
pub current_job: CurrentJob,
pub root_job_sender: Sender<Mail>,
// When there are background jobs running, the interactive behavior of `exit` changes depending on
// the value of this flag:
@@ -141,6 +145,8 @@ pub const UNKNOWN_SPAN_ID: SpanId = SpanId::new(0);
impl EngineState {
pub fn new() -> Self {
let (send, recv) = channel::<Mail>();
Self {
files: vec![],
virtual_paths: vec![],
@@ -196,7 +202,12 @@ impl EngineState {
is_debugging: IsDebugging::new(false),
debugger: Arc::new(Mutex::new(Box::new(NoopDebugger))),
jobs: Arc::new(Mutex::new(Jobs::default())),
current_thread_job: None,
current_job: CurrentJob {
id: JobId::new(0),
background_thread_job: None,
mailbox: Arc::new(Mutex::new(Mailbox::new(recv))),
},
root_job_sender: send,
exit_warning_given: Arc::new(AtomicBool::new(false)),
}
}
@@ -1081,7 +1092,12 @@ impl EngineState {
// Determines whether the current state is being held by a background job
pub fn is_background_job(&self) -> bool {
self.current_thread_job.is_some()
self.current_job.background_thread_job.is_some()
}
// Gets the thread job entry
pub fn current_thread_job(&self) -> Option<&ThreadJob> {
self.current_job.background_thread_job.as_ref()
}
}

View File

@@ -1,11 +1,17 @@
use std::{
collections::{HashMap, HashSet},
sync::{Arc, Mutex},
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
sync::{
mpsc::{Receiver, RecvTimeoutError, Sender, TryRecvError},
Arc, Mutex,
},
};
#[cfg(not(target_family = "wasm"))]
use std::time::{Duration, Instant};
use nu_system::{kill_by_pid, UnfreezeHandle};
use crate::Signals;
use crate::{PipelineData, Signals};
use crate::JobId;
@@ -139,13 +145,15 @@ pub struct ThreadJob {
signals: Signals,
pids: Arc<Mutex<HashSet<u32>>>,
tag: Option<String>,
pub sender: Sender<Mail>,
}
impl ThreadJob {
pub fn new(signals: Signals, tag: Option<String>) -> Self {
pub fn new(signals: Signals, tag: Option<String>, sender: Sender<Mail>) -> Self {
ThreadJob {
signals,
pids: Arc::new(Mutex::new(HashSet::default())),
sender,
tag,
}
}
@@ -238,3 +246,160 @@ impl FrozenJob {
}
}
}
/// Stores the information about the background job currently being executed by this thread, if any
#[derive(Clone)]
pub struct CurrentJob {
pub id: JobId,
// The background thread job associated with this thread.
// If None, it indicates this thread is currently the main job
pub background_thread_job: Option<ThreadJob>,
// note: although the mailbox is Mutex'd, it is only ever accessed
// by the current job's threads
pub mailbox: Arc<Mutex<Mailbox>>,
}
// The storage for unread messages
//
// Messages are initially sent over a mpsc channel,
// and may then be stored in a IgnoredMail struct when
// filtered out by a tag.
pub struct Mailbox {
receiver: Receiver<Mail>,
ignored_mail: IgnoredMail,
}
impl Mailbox {
pub fn new(receiver: Receiver<Mail>) -> Self {
Mailbox {
receiver,
ignored_mail: IgnoredMail::default(),
}
}
#[cfg(not(target_family = "wasm"))]
pub fn recv_timeout(
&mut self,
filter_tag: Option<FilterTag>,
timeout: Duration,
) -> Result<PipelineData, RecvTimeoutError> {
if let Some(value) = self.ignored_mail.pop(filter_tag) {
Ok(value)
} else {
let mut waited_so_far = Duration::ZERO;
let mut before = Instant::now();
while waited_so_far < timeout {
let (tag, value) = self.receiver.recv_timeout(timeout - waited_so_far)?;
if filter_tag.is_none() || filter_tag == tag {
return Ok(value);
} else {
self.ignored_mail.add((tag, value));
let now = Instant::now();
waited_so_far += now - before;
before = now;
}
}
Err(RecvTimeoutError::Timeout)
}
}
#[cfg(not(target_family = "wasm"))]
pub fn try_recv(
&mut self,
filter_tag: Option<FilterTag>,
) -> Result<PipelineData, TryRecvError> {
if let Some(value) = self.ignored_mail.pop(filter_tag) {
Ok(value)
} else {
loop {
let (tag, value) = self.receiver.try_recv()?;
if filter_tag.is_none() || filter_tag == tag {
return Ok(value);
} else {
self.ignored_mail.add((tag, value));
}
}
}
}
pub fn clear(&mut self) {
self.ignored_mail.clear();
while self.receiver.try_recv().is_ok() {}
}
}
// A data structure used to store messages which were received, but currently ignored by a tag filter
// messages are added and popped in a first-in-first-out matter.
#[derive(Default)]
struct IgnoredMail {
next_id: usize,
messages: BTreeMap<usize, Mail>,
by_tag: HashMap<FilterTag, BTreeSet<usize>>,
}
pub type FilterTag = u64;
pub type Mail = (Option<FilterTag>, PipelineData);
impl IgnoredMail {
pub fn add(&mut self, (tag, value): Mail) {
let id = self.next_id;
self.next_id += 1;
self.messages.insert(id, (tag, value));
if let Some(tag) = tag {
self.by_tag.entry(tag).or_default().insert(id);
}
}
pub fn pop(&mut self, tag: Option<FilterTag>) -> Option<PipelineData> {
if let Some(tag) = tag {
self.pop_oldest_with_tag(tag)
} else {
self.pop_oldest()
}
}
pub fn clear(&mut self) {
self.messages.clear();
self.by_tag.clear();
}
fn pop_oldest(&mut self) -> Option<PipelineData> {
let (id, (tag, value)) = self.messages.pop_first()?;
if let Some(tag) = tag {
let needs_cleanup = if let Some(ids) = self.by_tag.get_mut(&tag) {
ids.remove(&id);
ids.is_empty()
} else {
false
};
if needs_cleanup {
self.by_tag.remove(&tag);
}
}
Some(value)
}
fn pop_oldest_with_tag(&mut self, tag: FilterTag) -> Option<PipelineData> {
let ids = self.by_tag.get_mut(&tag)?;
let id = ids.pop_first()?;
if ids.is_empty() {
self.by_tag.remove(&tag);
}
Some(self.messages.remove(&id)?.1)
}
}

View File

@@ -1370,7 +1370,7 @@ On Windows, this would be %USERPROFILE%\AppData\Roaming"#
#[error("Job {id} is not frozen")]
#[diagnostic(
code(nu::shell::os_disabled),
code(nu::shell::job_not_frozen),
help("You tried to unfreeze a job which is not frozen")
)]
JobNotFrozen {
@@ -1379,6 +1379,27 @@ On Windows, this would be %USERPROFILE%\AppData\Roaming"#
span: Span,
},
#[error("The job {id} is frozen")]
#[diagnostic(
code(nu::shell::job_is_frozen),
help("This operation cannot be performed because the job is frozen")
)]
JobIsFrozen {
id: usize,
#[label = "This job is frozen"]
span: Span,
},
#[error("No message was received in the requested time interval")]
#[diagnostic(
code(nu::shell::recv_timeout),
help("No message arrived within the specified time limit")
)]
RecvTimeout {
#[label = "timeout"]
span: Span,
},
#[error(transparent)]
#[diagnostic(transparent)]
ChainedError(ChainedError),

View File

@@ -194,7 +194,7 @@ impl PostWaitCallback {
child_pid: Option<u32>,
tag: Option<String>,
) -> Self {
let this_job = engine_state.current_thread_job.clone();
let this_job = engine_state.current_thread_job().cloned();
let jobs = engine_state.jobs.clone();
let is_interactive = engine_state.is_interactive;