du command (#916)

* wip on `du` command

* working
This commit is contained in:
Darren Schroeder 2022-02-03 11:35:06 -06:00 committed by GitHub
parent 0043b9da74
commit 2f0bbf5adb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 535 additions and 45 deletions

10
Cargo.lock generated
View File

@ -979,6 +979,15 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "filesize"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12d741e2415d4e2e5bd1c1d00409d1a8865a57892c2d689b504365655d237d43"
dependencies = [
"winapi",
]
[[package]]
name = "flatbuffers"
version = "2.0.0"
@ -2135,6 +2144,7 @@ dependencies = [
"dtparse",
"eml-parser",
"encoding_rs",
"filesize",
"glob",
"hamcrest2",
"htmlescape",

View File

@ -8,73 +8,73 @@ build = "build.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nu-ansi-term = "0.42.0"
nu-color-config = { path = "../nu-color-config" }
nu-engine = { path = "../nu-engine" }
nu-json = { path = "../nu-json" }
nu-parser = { path = "../nu-parser" }
nu-path = { path = "../nu-path" }
nu-pretty-hex = { path = "../nu-pretty-hex" }
nu-protocol = { path = "../nu-protocol" }
nu-system = { path = "../nu-system" }
nu-table = { path = "../nu-table" }
nu-term-grid = { path = "../nu-term-grid" }
nu-test-support = { path = "../nu-test-support" }
nu-parser = { path = "../nu-parser" }
nu-system = { path = "../nu-system" }
# nu-ansi-term = { path = "../nu-ansi-term" }
nu-ansi-term = "0.42.0"
nu-color-config = { path = "../nu-color-config" }
# Potential dependencies for extras
url = "2.2.1"
csv = "1.1.3"
glob = "0.3.0"
pathdiff = "0.2.1"
Inflector = "0.11"
thiserror = "1.0.29"
sysinfo = "0.22.2"
base64 = "0.13.0"
bytesize = "1.1.0"
calamine = "0.18.0"
chrono = { version = "0.4.19", features = ["serde"] }
chrono-humanize = "0.2.1"
chrono-tz = "0.6.0"
dtparse = "1.2.0"
terminal_size = "0.1.17"
indexmap = { version="1.7", features=["serde-1"] }
lscolors = { version = "0.8.0", features = ["crossterm"] }
bytesize = "1.1.0"
crossterm = "0.22.1"
csv = "1.1.3"
dialoguer = "0.9.0"
digest = "0.10.0"
dtparse = "1.2.0"
eml-parser = "0.1.0"
encoding_rs = "0.8.30"
filesize = "0.2.0"
glob = "0.3.0"
htmlescape = "0.3.1"
ical = "0.7.0"
indexmap = { version="1.7", features=["serde-1"] }
Inflector = "0.11"
itertools = "0.10.0"
lazy_static = "1.4.0"
log = "0.4.14"
lscolors = { version = "0.8.0", features = ["crossterm"] }
md5 = { package = "md-5", version = "0.10.0" }
meval = "0.2.0"
mime = "0.3.16"
num = { version = "0.4.0", optional = true }
pathdiff = "0.2.1"
quick-xml = "0.22"
rand = "0.8"
rayon = "1.5.1"
regex = "1.5.4"
titlecase = "1.1.0"
meval = "0.2.0"
serde = { version="1.0.123", features=["derive"] }
serde_yaml = "0.8.16"
serde_urlencoded = "0.7.0"
serde_ini = "0.2.0"
eml-parser = "0.1.0"
toml = "0.5.8"
itertools = "0.10.0"
ical = "0.7.0"
calamine = "0.18.0"
reqwest = {version = "0.11", features = ["blocking"] }
roxmltree = "0.14.0"
rand = "0.8"
rust-embed = "6.3.0"
serde = { version="1.0.123", features=["derive"] }
serde_ini = "0.2.0"
serde_urlencoded = "0.7.0"
serde_yaml = "0.8.16"
sha2 = "0.10.0"
shadow-rs = "0.8.1"
strip-ansi-escapes = "0.1.1"
sysinfo = "0.22.2"
terminal_size = "0.1.17"
thiserror = "1.0.29"
titlecase = "1.1.0"
toml = "0.5.8"
trash = { version = "2.0.2", optional = true }
unicode-segmentation = "1.8.0"
url = "2.2.1"
uuid = { version = "0.8.2", features = ["v4"] }
htmlescape = "0.3.1"
zip = { version="0.5.9", optional=true }
lazy_static = "1.4.0"
strip-ansi-escapes = "0.1.1"
crossterm = "0.22.1"
shadow-rs = "0.8.1"
quick-xml = "0.22"
digest = "0.10.0"
md5 = { package = "md-5", version = "0.10.0" }
sha2 = "0.10.0"
base64 = "0.13.0"
encoding_rs = "0.8.30"
num = { version = "0.4.0", optional = true }
reqwest = {version = "0.11", features = ["blocking"] }
mime = "0.3.16"
log = "0.4.14"
which = { version = "4.2.2", optional = true }
zip = { version="0.5.9", optional=true }
[target.'cfg(unix)'.dependencies]
umask = "1.0.0"

View File

@ -31,6 +31,7 @@ pub fn create_default_context(cwd: impl AsRef<Path>) -> EngineState {
DefEnv,
Describe,
Do,
Du,
Echo,
ExportCommand,
ExportDef,

View File

@ -0,0 +1,292 @@
use filesize::file_real_size_fast;
use glob::Pattern;
use nu_protocol::{ShellError, Span, Value};
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct DirBuilder {
pub tag: Span,
pub min: Option<u64>,
pub deref: bool,
pub exclude: Option<Pattern>,
pub all: bool,
}
impl DirBuilder {
pub fn new(
tag: Span,
min: Option<u64>,
deref: bool,
exclude: Option<Pattern>,
all: bool,
) -> DirBuilder {
DirBuilder {
tag,
min,
deref,
exclude,
all,
}
}
}
#[derive(Debug, Clone)]
pub struct DirInfo {
dirs: Vec<DirInfo>,
files: Vec<FileInfo>,
errors: Vec<ShellError>,
size: u64,
blocks: u64,
path: PathBuf,
tag: Span,
}
#[derive(Debug, Clone)]
pub struct FileInfo {
path: PathBuf,
size: u64,
blocks: Option<u64>,
tag: Span,
}
impl FileInfo {
pub fn new(path: impl Into<PathBuf>, deref: bool, tag: Span) -> Result<Self, ShellError> {
let path = path.into();
let m = if deref {
std::fs::metadata(&path)
} else {
std::fs::symlink_metadata(&path)
};
match m {
Ok(d) => {
let block_size = file_real_size_fast(&path, &d).ok();
Ok(FileInfo {
path,
blocks: block_size,
size: d.len(),
tag,
})
}
Err(e) => Err(e.into()),
}
}
}
impl DirInfo {
pub fn new(
path: impl Into<PathBuf>,
params: &DirBuilder,
depth: Option<u64>,
ctrl_c: Option<Arc<AtomicBool>>,
) -> Self {
let path = path.into();
let mut s = Self {
dirs: Vec::new(),
errors: Vec::new(),
files: Vec::new(),
size: 0,
blocks: 0,
tag: params.tag,
path,
};
match std::fs::metadata(&s.path) {
Ok(d) => {
s.size = d.len(); // dir entry size
s.blocks = file_real_size_fast(&s.path, &d).ok().unwrap_or(0);
}
Err(e) => s = s.add_error(e.into()),
};
match std::fs::read_dir(&s.path) {
Ok(d) => {
for f in d {
match ctrl_c {
Some(ref cc) => {
if cc.load(Ordering::SeqCst) {
break;
}
}
None => continue,
}
match f {
Ok(i) => match i.file_type() {
Ok(t) if t.is_dir() => {
s = s.add_dir(i.path(), depth, params, ctrl_c.clone())
}
Ok(_t) => s = s.add_file(i.path(), params),
Err(e) => s = s.add_error(e.into()),
},
Err(e) => s = s.add_error(e.into()),
}
}
}
Err(e) => s = s.add_error(e.into()),
}
s
}
fn add_dir(
mut self,
path: impl Into<PathBuf>,
mut depth: Option<u64>,
params: &DirBuilder,
ctrl_c: Option<Arc<AtomicBool>>,
) -> Self {
if let Some(current) = depth {
if let Some(new) = current.checked_sub(1) {
depth = Some(new);
} else {
return self;
}
}
let d = DirInfo::new(path, params, depth, ctrl_c);
self.size += d.size;
self.blocks += d.blocks;
self.dirs.push(d);
self
}
fn add_file(mut self, f: impl Into<PathBuf>, params: &DirBuilder) -> Self {
let f = f.into();
let include = params
.exclude
.as_ref()
.map_or(true, |x| !x.matches_path(&f));
if include {
match FileInfo::new(f, params.deref, self.tag) {
Ok(file) => {
let inc = params.min.map_or(true, |s| file.size >= s);
if inc {
self.size += file.size;
self.blocks += file.blocks.unwrap_or(0);
if params.all {
self.files.push(file);
}
}
}
Err(e) => self = self.add_error(e),
}
}
self
}
fn add_error(mut self, e: ShellError) -> Self {
self.errors.push(e);
self
}
pub fn get_size(&self) -> u64 {
self.size
}
}
impl From<DirInfo> for Value {
fn from(d: DirInfo) -> Self {
let mut cols = vec![];
let mut vals = vec![];
cols.push("path".into());
vals.push(Value::string(d.path.display().to_string(), d.tag));
cols.push("apparent".into());
vals.push(Value::Filesize {
val: d.size as i64,
span: d.tag,
});
cols.push("physical".into());
vals.push(Value::Filesize {
val: d.blocks as i64,
span: d.tag,
});
cols.push("directories".into());
vals.push(value_from_vec(d.dirs, &d.tag));
cols.push("files".into());
vals.push(value_from_vec(d.files, &d.tag));
// if !d.errors.is_empty() {
// let v = d
// .errors
// .into_iter()
// .map(move |e| Value::Error { error: e })
// .collect::<Vec<Value>>();
// cols.push("errors".into());
// vals.push(Value::List {
// vals: v,
// span: d.tag,
// })
// }
Value::Record {
cols,
vals,
span: d.tag,
}
}
}
impl From<FileInfo> for Value {
fn from(f: FileInfo) -> Self {
let mut cols = vec![];
let mut vals = vec![];
cols.push("path".into());
vals.push(Value::string(f.path.display().to_string(), f.tag));
cols.push("apparent".into());
vals.push(Value::Filesize {
val: f.size as i64,
span: f.tag,
});
cols.push("physical".into());
vals.push(Value::Filesize {
val: match f.blocks {
Some(b) => b as i64,
None => 0i64,
},
span: f.tag,
});
cols.push("directories".into());
vals.push(Value::nothing(Span::test_data()));
cols.push("files".into());
vals.push(Value::nothing(Span::test_data()));
// cols.push("errors".into());
// vals.push(Value::nothing(Span::test_data()));
Value::Record {
cols,
vals,
span: f.tag,
}
}
}
fn value_from_vec<V>(vec: Vec<V>, tag: &Span) -> Value
where
V: Into<Value>,
{
if vec.is_empty() {
Value::nothing(*tag)
} else {
let values = vec.into_iter().map(Into::into).collect::<Vec<Value>>();
Value::List {
vals: values,
span: *tag,
}
}
}

View File

@ -0,0 +1,183 @@
use crate::{DirBuilder, DirInfo, FileInfo};
use glob::{GlobError, MatchOptions, Pattern};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, IntoInterruptiblePipelineData, PipelineData, ShellError, Signature, Spanned,
SyntaxShape, Value,
};
use serde::Deserialize;
use std::path::PathBuf;
const GLOB_PARAMS: MatchOptions = MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
};
#[derive(Clone)]
pub struct Du;
#[derive(Deserialize, Clone, Debug)]
pub struct DuArgs {
path: Option<Spanned<PathBuf>>,
all: bool,
deref: bool,
exclude: Option<Spanned<String>>,
#[serde(rename = "max-depth")]
max_depth: Option<i64>,
#[serde(rename = "min-size")]
min_size: Option<i64>,
}
impl Command for Du {
fn name(&self) -> &str {
"du"
}
fn usage(&self) -> &str {
"Find disk usage sizes of specified items."
}
fn signature(&self) -> Signature {
Signature::build("du")
.optional("path", SyntaxShape::GlobPattern, "starting directory")
.switch(
"all",
"Output file sizes as well as directory sizes",
Some('a'),
)
.switch(
"deref",
"Dereference symlinks to their targets for size",
Some('r'),
)
.named(
"exclude",
SyntaxShape::GlobPattern,
"Exclude these file names",
Some('x'),
)
.named(
"max-depth",
SyntaxShape::Int,
"Directory recursion limit",
Some('d'),
)
.named(
"min-size",
SyntaxShape::Int,
"Exclude files below this size",
Some('m'),
)
.category(Category::Core)
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let tag = call.head;
let args = DuArgs {
path: call.opt(engine_state, stack, 0)?,
all: call.has_flag("all"),
deref: call.has_flag("deref"),
exclude: call.get_flag(engine_state, stack, "exclude")?,
max_depth: call
.get_flag::<i64>(engine_state, stack, "max-depth")?
.map(|n| (n as u64).try_into().expect("error converting i64 to u64")),
min_size: call.get_flag(engine_state, stack, "min_size")?,
};
let exclude = args.exclude.map_or(Ok(None), move |x| {
Pattern::new(&x.item).map(Some).map_err(|e| {
ShellError::SpannedLabeledError(e.msg.to_string(), "glob error".to_string(), x.span)
})
})?;
let include_files = args.all;
let mut paths = match args.path {
Some(p) => {
let p = p.item.to_str().expect("Why isn't this encoded properly?");
glob::glob_with(p, GLOB_PARAMS)
}
None => glob::glob_with("*", GLOB_PARAMS),
}
.map_err(|e| {
ShellError::SpannedLabeledError(e.msg.to_string(), "glob error".to_string(), tag)
})?
.filter(move |p| {
if include_files {
true
} else {
match p {
Ok(f) if f.is_dir() => true,
Err(e) if e.path().is_dir() => true,
_ => false,
}
}
})
.map(|v| v.map_err(glob_err_into));
let all = args.all;
let deref = args.deref;
let max_depth = args.max_depth.map(|f| f as u64);
let min_size = args.min_size.map(|f| f as u64);
let params = DirBuilder {
tag,
min: min_size,
deref,
exclude,
all,
};
let mut output: Vec<Value> = vec![];
for p in paths.by_ref() {
match p {
Ok(a) => {
if a.is_dir() {
output.push(
DirInfo::new(a, &params, max_depth, engine_state.ctrlc.clone()).into(),
);
} else if let Ok(v) = FileInfo::new(a, deref, tag) {
output.push(v.into());
}
}
Err(e) => {
output.push(Value::Error { error: e });
}
}
}
Ok(output.into_pipeline_data(engine_state.ctrlc.clone()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Disk usage of the current directory",
example: "du",
result: None,
}]
}
}
fn glob_err_into(e: GlobError) -> ShellError {
let e = e.into_error();
ShellError::from(e)
}
#[cfg(test)]
mod tests {
use super::Du;
#[test]
fn examples_work_as_expected() {
use crate::test_examples;
test_examples(Du {})
}
}

View File

@ -1,5 +1,7 @@
mod ansi;
mod clear;
mod dir_info;
mod du;
mod input;
mod input_keys;
mod kill;
@ -8,6 +10,8 @@ mod term_size;
pub use ansi::{Ansi, AnsiGradient, AnsiStrip};
pub use clear::Clear;
pub use dir_info::{DirBuilder, DirInfo, FileInfo};
pub use du::Du;
pub use input::Input;
pub use input_keys::InputKeys;
pub use kill::Kill;