From 81baf53814f586cd236f87a44b55483a39c640ad Mon Sep 17 00:00:00 2001 From: Renan Ribeiro <55855728+cosineblast@users.noreply.github.com> Date: Wed, 25 Dec 2024 10:36:02 -0300 Subject: [PATCH] `ls` now collects metadata in a separate thread (#14627) Closes #6174 # Description This PR aims to improve the performance of `ls` within large directories. `ls` now delegates the metadata collection to a thread in its thread pool. Before: ![image](https://github.com/user-attachments/assets/1967ab78-177c-485f-9b2f-f9d625678171) Now: ![image](https://github.com/user-attachments/assets/fc215d0a-4b26-4791-a3a1-77cecff133e2) # User-Facing Changes If an error occurs while file metadata is being collected in another thread, the `ls` command now notifies the user about this error by sending an error value through a channel (which then gets collected into an iterator and shown to the user later on). However, if an error occurs _while_ sending this error value to the channel (i.e the resulting value iterator has been dropped), then the user is not notified of this error. I think this behavior is acceptable, since behavior only occurs when the `ls` pipeline has been dropped and the user is no longer interested in output from `ls`. # Tests + Formatting I do not know if it is a good idea to test this performance with `timeit`, since it can be unreliable. --- crates/nu-command/src/filesystem/ls.rs | 174 +++++++++++++------------ 1 file changed, 91 insertions(+), 83 deletions(-) diff --git a/crates/nu-command/src/filesystem/ls.rs b/crates/nu-command/src/filesystem/ls.rs index ef16199958..2df34af855 100644 --- a/crates/nu-command/src/filesystem/ls.rs +++ b/crates/nu-command/src/filesystem/ls.rs @@ -316,6 +316,7 @@ fn ls_for_one_pattern( }; let hidden_dir_specified = is_hidden_dir(pattern_arg.as_ref()); + let path = pattern_arg.into_spanned(p_tag); let (prefix, paths) = if just_read_dir { let expanded = nu_path::expand_path_with(path.item.as_ref(), &cwd, path.item.is_expand()); @@ -358,40 +359,42 @@ fn ls_for_one_pattern( }; pool.install(|| { - paths_peek - .par_bridge() - .filter_map(move |x| match x { - Ok(path) => { - let metadata = match std::fs::symlink_metadata(&path) { - Ok(metadata) => Some(metadata), - Err(_) => None, - }; - let hidden_dir_clone = Arc::clone(&hidden_dirs); - let mut hidden_dir_mutex = hidden_dir_clone - .lock() - .expect("Unable to acquire lock for hidden_dirs"); - if path_contains_hidden_folder(&path, &hidden_dir_mutex) { - return None; - } - - if !all && !hidden_dir_specified && is_hidden_dir(&path) { - if path.is_dir() { - hidden_dir_mutex.push(path); - drop(hidden_dir_mutex); + rayon::spawn(move || { + let result = paths_peek + .par_bridge() + .filter_map(move |x| match x { + Ok(path) => { + let metadata = match std::fs::symlink_metadata(&path) { + Ok(metadata) => Some(metadata), + Err(_) => None, + }; + let hidden_dir_clone = Arc::clone(&hidden_dirs); + let mut hidden_dir_mutex = hidden_dir_clone + .lock() + .expect("Unable to acquire lock for hidden_dirs"); + if path_contains_hidden_folder(&path, &hidden_dir_mutex) { + return None; } - return None; - } - let display_name = if short_names { - path.file_name().map(|os| os.to_string_lossy().to_string()) - } else if full_paths || absolute_path { - Some(path.to_string_lossy().to_string()) - } else if let Some(prefix) = &prefix { - if let Ok(remainder) = path.strip_prefix(prefix) { - if directory { - // When the path is the same as the cwd, path_diff should be "." - let path_diff = - if let Some(path_diff_not_dot) = diff_paths(&path, &cwd) { + if !all && !hidden_dir_specified && is_hidden_dir(&path) { + if path.is_dir() { + hidden_dir_mutex.push(path); + drop(hidden_dir_mutex); + } + return None; + } + + let display_name = if short_names { + path.file_name().map(|os| os.to_string_lossy().to_string()) + } else if full_paths || absolute_path { + Some(path.to_string_lossy().to_string()) + } else if let Some(prefix) = &prefix { + if let Ok(remainder) = path.strip_prefix(prefix) { + if directory { + // When the path is the same as the cwd, path_diff should be "." + let path_diff = if let Some(path_diff_not_dot) = + diff_paths(&path, &cwd) + { let path_diff_not_dot = path_diff_not_dot.to_string_lossy(); if path_diff_not_dot.is_empty() { ".".to_string() @@ -402,70 +405,75 @@ fn ls_for_one_pattern( path.to_string_lossy().to_string() }; - Some(path_diff) - } else { - let new_prefix = if let Some(pfx) = diff_paths(prefix, &cwd) { - pfx + Some(path_diff) } else { - prefix.to_path_buf() - }; + let new_prefix = if let Some(pfx) = diff_paths(prefix, &cwd) { + pfx + } else { + prefix.to_path_buf() + }; - Some(new_prefix.join(remainder).to_string_lossy().to_string()) + Some(new_prefix.join(remainder).to_string_lossy().to_string()) + } + } else { + Some(path.to_string_lossy().to_string()) } } else { Some(path.to_string_lossy().to_string()) } - } else { - Some(path.to_string_lossy().to_string()) + .ok_or_else(|| ShellError::GenericError { + error: format!("Invalid file name: {:}", path.to_string_lossy()), + msg: "invalid file name".into(), + span: Some(call_span), + help: None, + inner: vec![], + }); + + match display_name { + Ok(name) => { + let entry = dir_entry_dict( + &path, + &name, + metadata.as_ref(), + call_span, + long, + du, + &signals_clone, + use_mime_type, + args.full_paths, + ); + match entry { + Ok(value) => Some(value), + Err(err) => Some(Value::error(err, call_span)), + } + } + Err(err) => Some(Value::error(err, call_span)), + } } - .ok_or_else(|| ShellError::GenericError { - error: format!("Invalid file name: {:}", path.to_string_lossy()), - msg: "invalid file name".into(), + Err(err) => Some(Value::error(err, call_span)), + }) + .try_for_each(|stream| { + tx.send(stream).map_err(|e| ShellError::GenericError { + error: "Error streaming data".into(), + msg: e.to_string(), span: Some(call_span), help: None, inner: vec![], - }); - - match display_name { - Ok(name) => { - let entry = dir_entry_dict( - &path, - &name, - metadata.as_ref(), - call_span, - long, - du, - &signals_clone, - use_mime_type, - args.full_paths, - ); - match entry { - Ok(value) => Some(value), - Err(err) => Some(Value::error(err, call_span)), - } - } - Err(err) => Some(Value::error(err, call_span)), - } - } - Err(err) => Some(Value::error(err, call_span)), - }) - .try_for_each(|stream| { - tx.send(stream).map_err(|e| ShellError::GenericError { - error: "Error streaming data".into(), - msg: e.to_string(), + }) + }) + .map_err(|err| ShellError::GenericError { + error: "Unable to create a rayon pool".into(), + msg: err.to_string(), span: Some(call_span), help: None, inner: vec![], - }) - }) - }) - .map_err(|err| ShellError::GenericError { - error: "Unable to create a rayon pool".into(), - msg: err.to_string(), - span: Some(call_span), - help: None, - inner: vec![], - })?; + }); + + if let Err(error) = result { + let _ = tx.send(Value::error(error, call_span)); + } + }); + }); Ok(rx .into_iter()