mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 19:17:44 +02:00
[nu-std] std-rfc/random: add random choice
(#16270)
# Description Adds `random choice` suggested in #16241. # User-Facing Changes New `random` module in `std-rfc` with the `choice` subcommand. # Tests + Formatting Unsure how do to do tests. Sampling and a histogram should be enough, but they'll be non-deterministic. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --> --------- Co-authored-by: sholderbach <sholderbach@users.noreply.github.com>
This commit is contained in:
@ -132,6 +132,11 @@ pub fn load_standard_library(
|
||||
"std-rfc/iter",
|
||||
include_str!("../std-rfc/iter/mod.nu"),
|
||||
),
|
||||
(
|
||||
"mod.nu",
|
||||
"std-rfc/random",
|
||||
include_str!("../std-rfc/random/mod.nu"),
|
||||
),
|
||||
];
|
||||
|
||||
for (filename, std_rfc_subdir_name, content) in std_rfc_submodules.drain(..) {
|
||||
|
@ -4,6 +4,7 @@ export use path *
|
||||
export module clip
|
||||
export module str
|
||||
export module iter
|
||||
export module random
|
||||
|
||||
# kv module depends on sqlite feature, which may not be available in some builds
|
||||
const kv_module = if ("sqlite" in (version).features) { "std-rfc/kv" } else { null }
|
||||
|
72
crates/nu-std/std-rfc/random/mod.nu
Normal file
72
crates/nu-std/std-rfc/random/mod.nu
Normal file
@ -0,0 +1,72 @@
|
||||
# for examples
|
||||
alias "random choice" = choice
|
||||
|
||||
# Sample `k` elements from a list
|
||||
#
|
||||
# This function will pick a simple random sample from input without replacement
|
||||
# (each element from the input can only be picked once).
|
||||
#
|
||||
# The sample is treated as a set. This means that the combined probability of
|
||||
# `[1 2 3 4] | random choice 2` returning `[3, 4]` or `[4, 3]` equals that of
|
||||
# `[1, 2]`. To ensure that all permutations are equally probable, use
|
||||
# `shuffle` or `sort`.
|
||||
#
|
||||
# The current implementation collects the input stream. This might change in
|
||||
# the future.
|
||||
@example "Pick 2 random items" {
|
||||
[1 2 3 4 5] | random choice 2
|
||||
}
|
||||
@example "Verify that the elements are picked uniformly" {
|
||||
0..100_000
|
||||
| each {
|
||||
[1 2 3 4 5] | random choice 2 | sort | to nuon
|
||||
}
|
||||
| histogram
|
||||
}
|
||||
export def choice [
|
||||
n: int = 1 # number of items to sample
|
||||
]: list -> list {
|
||||
# XXX: this collects the stream
|
||||
let input = $in
|
||||
|
||||
let len = $input | length
|
||||
if $n > ($input | length) {
|
||||
error make {
|
||||
msg: "Can't sample more elements than there are in input"
|
||||
label: {
|
||||
text: $"Tried to sample ($n) out of ($len)"
|
||||
span: (metadata $n).span
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# always return a list, even though `first 1` returns standalone T
|
||||
mut output = $input | if $n == 1 {
|
||||
first | [$in]
|
||||
} else {
|
||||
first $n
|
||||
}
|
||||
|
||||
# reservoir sampling, algorithm L
|
||||
# https://doi.org/10.1145/198429.198435
|
||||
|
||||
mut w = (random float) ** (1 / $n)
|
||||
mut i = $n - 1
|
||||
|
||||
loop {
|
||||
$i += (random float | math ln) / (1.0 - $w | math ln)
|
||||
| math floor
|
||||
| $in + 1
|
||||
|
||||
if $i < $len {
|
||||
let el = $input | get $i
|
||||
$output = $output | update (random int 0..<$n) $el
|
||||
|
||||
$w *= (random float) ** (1 / $n)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
$output
|
||||
}
|
Reference in New Issue
Block a user