ls now collects metadata in a separate thread (#14627)

Closes #6174

# Description

This PR aims to improve the performance of `ls` within large
directories. `ls` now delegates the metadata collection to
a thread in its thread pool.

Before:

![image](https://github.com/user-attachments/assets/1967ab78-177c-485f-9b2f-f9d625678171)

Now:

![image](https://github.com/user-attachments/assets/fc215d0a-4b26-4791-a3a1-77cecff133e2)

# User-Facing Changes

If an error occurs while file metadata is being collected in another
thread, the `ls` command now notifies the user about this error by
sending an error value through a channel (which then gets collected into
an iterator and shown to the user later on).

However, if an error occurs _while_ sending this error value to the
channel (i.e the resulting value iterator has been dropped), then the
user is not notified of this error. I think this behavior is acceptable,
since behavior only occurs when the `ls` pipeline has been dropped and
the user is no longer interested in output from `ls`.

# Tests + Formatting

I do not know if it is a good idea to test this performance with
`timeit`, since it can be unreliable.
This commit is contained in:
Renan Ribeiro 2024-12-25 10:36:02 -03:00 committed by GitHub
parent 6ebc0fc3ff
commit 81baf53814
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -316,6 +316,7 @@ fn ls_for_one_pattern(
}; };
let hidden_dir_specified = is_hidden_dir(pattern_arg.as_ref()); let hidden_dir_specified = is_hidden_dir(pattern_arg.as_ref());
let path = pattern_arg.into_spanned(p_tag); let path = pattern_arg.into_spanned(p_tag);
let (prefix, paths) = if just_read_dir { let (prefix, paths) = if just_read_dir {
let expanded = nu_path::expand_path_with(path.item.as_ref(), &cwd, path.item.is_expand()); let expanded = nu_path::expand_path_with(path.item.as_ref(), &cwd, path.item.is_expand());
@ -358,40 +359,42 @@ fn ls_for_one_pattern(
}; };
pool.install(|| { pool.install(|| {
paths_peek rayon::spawn(move || {
.par_bridge() let result = paths_peek
.filter_map(move |x| match x { .par_bridge()
Ok(path) => { .filter_map(move |x| match x {
let metadata = match std::fs::symlink_metadata(&path) { Ok(path) => {
Ok(metadata) => Some(metadata), let metadata = match std::fs::symlink_metadata(&path) {
Err(_) => None, Ok(metadata) => Some(metadata),
}; Err(_) => None,
let hidden_dir_clone = Arc::clone(&hidden_dirs); };
let mut hidden_dir_mutex = hidden_dir_clone let hidden_dir_clone = Arc::clone(&hidden_dirs);
.lock() let mut hidden_dir_mutex = hidden_dir_clone
.expect("Unable to acquire lock for hidden_dirs"); .lock()
if path_contains_hidden_folder(&path, &hidden_dir_mutex) { .expect("Unable to acquire lock for hidden_dirs");
return None; if path_contains_hidden_folder(&path, &hidden_dir_mutex) {
} return None;
if !all && !hidden_dir_specified && is_hidden_dir(&path) {
if path.is_dir() {
hidden_dir_mutex.push(path);
drop(hidden_dir_mutex);
} }
return None;
}
let display_name = if short_names { if !all && !hidden_dir_specified && is_hidden_dir(&path) {
path.file_name().map(|os| os.to_string_lossy().to_string()) if path.is_dir() {
} else if full_paths || absolute_path { hidden_dir_mutex.push(path);
Some(path.to_string_lossy().to_string()) drop(hidden_dir_mutex);
} else if let Some(prefix) = &prefix { }
if let Ok(remainder) = path.strip_prefix(prefix) { return None;
if directory { }
// When the path is the same as the cwd, path_diff should be "."
let path_diff = let display_name = if short_names {
if let Some(path_diff_not_dot) = diff_paths(&path, &cwd) { path.file_name().map(|os| os.to_string_lossy().to_string())
} else if full_paths || absolute_path {
Some(path.to_string_lossy().to_string())
} else if let Some(prefix) = &prefix {
if let Ok(remainder) = path.strip_prefix(prefix) {
if directory {
// When the path is the same as the cwd, path_diff should be "."
let path_diff = if let Some(path_diff_not_dot) =
diff_paths(&path, &cwd)
{
let path_diff_not_dot = path_diff_not_dot.to_string_lossy(); let path_diff_not_dot = path_diff_not_dot.to_string_lossy();
if path_diff_not_dot.is_empty() { if path_diff_not_dot.is_empty() {
".".to_string() ".".to_string()
@ -402,70 +405,75 @@ fn ls_for_one_pattern(
path.to_string_lossy().to_string() path.to_string_lossy().to_string()
}; };
Some(path_diff) Some(path_diff)
} else {
let new_prefix = if let Some(pfx) = diff_paths(prefix, &cwd) {
pfx
} else { } else {
prefix.to_path_buf() let new_prefix = if let Some(pfx) = diff_paths(prefix, &cwd) {
}; pfx
} else {
prefix.to_path_buf()
};
Some(new_prefix.join(remainder).to_string_lossy().to_string()) Some(new_prefix.join(remainder).to_string_lossy().to_string())
}
} else {
Some(path.to_string_lossy().to_string())
} }
} else { } else {
Some(path.to_string_lossy().to_string()) Some(path.to_string_lossy().to_string())
} }
} else { .ok_or_else(|| ShellError::GenericError {
Some(path.to_string_lossy().to_string()) error: format!("Invalid file name: {:}", path.to_string_lossy()),
msg: "invalid file name".into(),
span: Some(call_span),
help: None,
inner: vec![],
});
match display_name {
Ok(name) => {
let entry = dir_entry_dict(
&path,
&name,
metadata.as_ref(),
call_span,
long,
du,
&signals_clone,
use_mime_type,
args.full_paths,
);
match entry {
Ok(value) => Some(value),
Err(err) => Some(Value::error(err, call_span)),
}
}
Err(err) => Some(Value::error(err, call_span)),
}
} }
.ok_or_else(|| ShellError::GenericError { Err(err) => Some(Value::error(err, call_span)),
error: format!("Invalid file name: {:}", path.to_string_lossy()), })
msg: "invalid file name".into(), .try_for_each(|stream| {
tx.send(stream).map_err(|e| ShellError::GenericError {
error: "Error streaming data".into(),
msg: e.to_string(),
span: Some(call_span), span: Some(call_span),
help: None, help: None,
inner: vec![], inner: vec![],
}); })
})
match display_name { .map_err(|err| ShellError::GenericError {
Ok(name) => { error: "Unable to create a rayon pool".into(),
let entry = dir_entry_dict( msg: err.to_string(),
&path,
&name,
metadata.as_ref(),
call_span,
long,
du,
&signals_clone,
use_mime_type,
args.full_paths,
);
match entry {
Ok(value) => Some(value),
Err(err) => Some(Value::error(err, call_span)),
}
}
Err(err) => Some(Value::error(err, call_span)),
}
}
Err(err) => Some(Value::error(err, call_span)),
})
.try_for_each(|stream| {
tx.send(stream).map_err(|e| ShellError::GenericError {
error: "Error streaming data".into(),
msg: e.to_string(),
span: Some(call_span), span: Some(call_span),
help: None, help: None,
inner: vec![], inner: vec![],
}) });
})
}) if let Err(error) = result {
.map_err(|err| ShellError::GenericError { let _ = tx.send(Value::error(error, call_span));
error: "Unable to create a rayon pool".into(), }
msg: err.to_string(), });
span: Some(call_span), });
help: None,
inner: vec![],
})?;
Ok(rx Ok(rx
.into_iter() .into_iter()