From b5e09b8a30836e4b2bbe6ec119ef235286d5aa6c Mon Sep 17 00:00:00 2001 From: Christopher Durham Date: Mon, 23 Oct 2023 08:21:27 -0400 Subject: [PATCH] Improve `registry value` return types (#10806) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit r? @fdncred Last one, I hope. At least short of completely redesigning `registry query`'s interface. (Which I wouldn't implement without asking around first.) # Description User-Facing Changes has the general overview. Inline comments provide a lot of justification on specific choices. Most of the type conversions should be reasonably noncontroversial, but expanding `REG_EXPAND_SZ` needs some justification. First, an example of the behavior there: ```shell > # release nushell: > version | select version commit_hash | to md --pretty | version | commit_hash | | ------- | ---------------------------------------- | | 0.85.0 | a6f62e05ae5b4e9ba4027fbfffd21025a898783e | > registry query --hkcu Environment TEMP | get value %USERPROFILE%\AppData\Local\Temp > # with this patch: > version | select version commit_hash | to md --pretty | version | commit_hash | | ------- | ---------------------------------------- | | 0.86.1 | 0c5a4c991f1a77bcbe5a86bc8f4469ecf1218fe9 | > registry query --hkcu Environment TEMP | get value C:\Users\CAD\AppData\Local\Temp > # Microsoft CLI tooling behavior: > ^pwsh -c `(Get-ItemProperty HKCU:\Environment).TEMP` C:\Users\CAD\AppData\Local\Temp > ^reg query HKCU\Environment /v TEMP HKEY_CURRENT_USER\Environment TEMP REG_EXPAND_SZ %USERPROFILE%\AppData\Local\Temp ``` As noted in the inline comments, I'm arguing that it makes more sense to eagerly expand the %EnvironmentString% placeholders, as none of Nushell's path functionality will interpret these placeholders. This makes the behavior of `registry query` match the behavior of pwsh's `Get-ItemProperty` registry access, and means that paths (the most common use of `REG_EXPAND_SZ`) are actually usable. This does *not* break nu_script's [`update-path`](https://github.com/nushell/nu_scripts/blob/main/sourced/update-path.nu); it will just be slightly inefficient as it will not find any `%Placeholder%`s to manually expand anymore. But also, note that `update-path` is currently *wrong*, as a path including `%LocalAppData%Low` is perfectly valid and sometimes used (to go to `Appdata\LocalLow`); expansion isn't done solely on a path segment basis, as is implemented by `update-path`. I believe that the type conversions implemented by this patch are essentially always desired. But if we want to keep `registry query` "pure", we could easily introduce a `registry get`[^get] which does the more complete interpretation of registry types, and leave `registry query` alone as doing the bare minimum. Or we could teach `path expand` to do `ExpandEnvironmentStringsW`. But REG_EXPAND_SZ being the odd one out of not getting its registry type semantics decoded by `registry query` seems wrong. [^get]: This is the potential redesign I alluded to at the top. One potential change could be to make `registry get Environment` produce `record` instead of `registry query`'s `table`, the idea being to make it feel as native as possible. We could even translate between Nu's cell-path and registry paths -- cell paths with spaces do actually work, if a bit awkwardly -- or even introduce lazy records so the registry can be traversed with normal data manipulation ... but that all seems a bit much. # User-Facing Changes - `registry query`'s produced `value` has changed. Specifically: - ❗ Rows `where type == REG_EXPAND_SZ` now expand `%EnvironmentVarable%` placeholders for you. For example, `registry query --hkcu Environment TEMP | get value` returns `C:\Users\CAD\AppData\Local\Temp` instead of `%USERPROFILE%\AppData\Local\Temp`. - You can restore the old behavior and preserve the placeholders by passing a new `--no-expand` switch. - Rows `where type == REG_MULTI_SZ` now provide a `list` value. They previously had that same list, but `| str join "\n"`. - Rows `where type == REG_DWORD_BIG_ENDIAN` now provide the correct numeric value instead of a byte-swapped value. - Rows `where type == REG_QWORD` now provide the correct numeric value[^sign] instead of the value modulo 232. - Rows `where type == REG_LINK` now provide a string value of the link target registry path instead of an internal debug string representation. (This should never be visible, as links should be transparently followed.) - Rows `where type =~ RESOURCE` now provide a binary value instead of an internal debug string representation. [^sign]: Nu's `int` is a signed 64-bit integer. As such, values >= 263 will be reported as their negative two's compliment value. This might sometimes be the correct interpretation -- the registry does not distinguish between signed and unsigned integer values -- but regedit and pwsh display all values as unsigned. --- crates/nu-command/Cargo.toml | 1 + .../nu-command/src/system/registry_query.rs | 205 +++++++++++++----- 2 files changed, 148 insertions(+), 58 deletions(-) diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index cfbc33eff..4a5444e75 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -110,6 +110,7 @@ version = "3.1" features = [ "Win32_Foundation", "Win32_Storage_FileSystem", + "Win32_System_Environment", "Win32_System_SystemServices", "Win32_Security", "Win32_System_Threading", diff --git a/crates/nu-command/src/system/registry_query.rs b/crates/nu-command/src/system/registry_query.rs index b9e3f9de7..5cb5f9d29 100644 --- a/crates/nu-command/src/system/registry_query.rs +++ b/crates/nu-command/src/system/registry_query.rs @@ -5,7 +5,8 @@ use nu_protocol::{ record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; -use winreg::{enums::*, RegKey}; +use windows::{core::PCWSTR, Win32::System::Environment::ExpandEnvironmentStringsW}; +use winreg::{enums::*, types::FromRegValue, RegKey}; #[derive(Clone)] pub struct RegistryQuery; @@ -32,6 +33,11 @@ impl Command for RegistryQuery { "query the hkey_current_user_local_settings hive", None, ) + .switch( + "no-expand", + "do not expand %ENV% placeholders in REG_EXPAND_SZ", + Some('u'), + ) .required("key", SyntaxShape::String, "registry key to query") .optional( "value", @@ -82,6 +88,8 @@ fn registry_query( ) -> Result { let call_span = call.head; + let skip_expand = call.has_flag("no-expand"); + let registry_key: Spanned = call.req(engine_state, stack, 0)?; let registry_key_span = ®istry_key.clone().span; let registry_value: Option> = call.opt(engine_state, stack, 1)?; @@ -92,12 +100,13 @@ fn registry_query( if registry_value.is_none() { let mut reg_values = vec![]; for (name, val) in reg_key.enum_values().flatten() { - let (nu_value, reg_type) = reg_value_to_nu_value(val, call_span); + let reg_type = format!("{:?}", val.vtype); + let nu_value = reg_value_to_nu_value(val, call_span, skip_expand); reg_values.push(Value::record( record! { "name" => Value::string(name, call_span), "value" => nu_value, - "type" => Value::string(format!("{:?}", reg_type), call_span), + "type" => Value::string(reg_type, call_span), }, *registry_key_span, )) @@ -109,12 +118,13 @@ fn registry_query( let reg_value = reg_key.get_raw_value(value.item.as_str()); match reg_value { Ok(val) => { - let (nu_value, reg_type) = reg_value_to_nu_value(val, call_span); + let reg_type = format!("{:?}", val.vtype); + let nu_value = reg_value_to_nu_value(val, call_span, skip_expand); Ok(Value::record( record! { "name" => Value::string(value.item, call_span), "value" => nu_value, - "type" => Value::string(format!("{:?}", reg_type), call_span), + "type" => Value::string(reg_type, call_span), }, value.span, ) @@ -175,60 +185,139 @@ fn get_reg_hive(call: &Call) -> Result { } fn reg_value_to_nu_value( - reg_value: winreg::RegValue, + mut reg_value: winreg::RegValue, call_span: Span, -) -> (nu_protocol::Value, winreg::enums::RegType) { + skip_expand: bool, +) -> nu_protocol::Value { match reg_value.vtype { - REG_NONE => (Value::nothing(call_span), reg_value.vtype), - REG_SZ => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_EXPAND_SZ => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_BINARY => (Value::binary(reg_value.bytes, call_span), reg_value.vtype), - REG_DWORD => ( - Value::int( - unsafe { *(reg_value.bytes.as_ptr() as *const u32) } as i64, - call_span, - ), - reg_value.vtype, - ), - REG_DWORD_BIG_ENDIAN => ( - Value::int( - unsafe { *(reg_value.bytes.as_ptr() as *const u32) } as i64, - call_span, - ), - reg_value.vtype, - ), - REG_LINK => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_MULTI_SZ => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_RESOURCE_LIST => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_FULL_RESOURCE_DESCRIPTOR => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_RESOURCE_REQUIREMENTS_LIST => ( - Value::string(reg_value.to_string(), call_span), - reg_value.vtype, - ), - REG_QWORD => ( - Value::int( - unsafe { *(reg_value.bytes.as_ptr() as *const u32) } as i64, - call_span, - ), - reg_value.vtype, - ), + REG_NONE => Value::nothing(call_span), + REG_BINARY => Value::binary(reg_value.bytes, call_span), + REG_MULTI_SZ => reg_value_to_nu_list_string(reg_value, call_span), + REG_SZ | REG_EXPAND_SZ => reg_value_to_nu_string(reg_value, call_span, skip_expand), + REG_DWORD | REG_DWORD_BIG_ENDIAN | REG_QWORD => reg_value_to_nu_int(reg_value, call_span), + + // This should be impossible, as registry symlinks should be automatically transparent + // to the registry API as it's used by winreg, since it never uses REG_OPTION_OPEN_LINK. + // If it happens, decode as if the link is a string; it should be a registry path string. + REG_LINK => { + reg_value.vtype = REG_SZ; + reg_value_to_nu_string(reg_value, call_span, skip_expand) + } + + // Decode these as binary; that seems to be the least bad option available to us. + // REG_RESOURCE_LIST is a struct CM_RESOURCE_LIST. + // REG_FULL_RESOURCE_DESCRIPTOR is a struct CM_FULL_RESOURCE_DESCRIPTOR. + // REG_RESOURCE_REQUIREMENTS_LIST is a struct IO_RESOURCE_REQUIREMENTS_LIST. + REG_RESOURCE_LIST | REG_FULL_RESOURCE_DESCRIPTOR | REG_RESOURCE_REQUIREMENTS_LIST => { + reg_value.vtype = REG_BINARY; + Value::binary(reg_value.bytes, call_span) + } } } + +fn reg_value_to_nu_string( + reg_value: winreg::RegValue, + call_span: Span, + skip_expand: bool, +) -> nu_protocol::Value { + let value = String::from_reg_value(®_value) + .expect("registry value type should be REG_SZ or REG_EXPAND_SZ"); + + // REG_EXPAND_SZ contains unexpanded references to environment variables, for example, %PATH%. + // winreg not expanding these is arguably correct, as it's just wrapping raw registry access. + // These placeholder-having strings work in *some* Windows contexts, but Rust's fs/path APIs + // don't handle them, so they won't work in Nu unless we expand them here. Eagerly expanding the + // strings here seems to be the least bad option. This is what PowerShell does, for example, + // although reg.exe does not. We could do the substitution with our env, but the officially + // correct way to expand these strings is to call Win32's ExpandEnvironmentStrings function. + // ref: + + // We can skip the dance if the string doesn't actually have any unexpanded placeholders. + if skip_expand || reg_value.vtype != REG_EXPAND_SZ || !value.contains('%') { + return Value::string(value, call_span); + } + + // The encoding dance is unfortunate since we read "Windows Unicode" from the registry, but + // it's the most resilient option and avoids making potentially wrong alignment assumptions. + let value_utf16 = value.encode_utf16().chain([0]).collect::>(); + + // Like most Win32 string functions, the return value is the number of TCHAR written, + // or the required buffer size (in TCHAR) if the buffer is too small, or 0 for error. + // Since we already checked for the case where no expansion is done, we can start with + // an empty output buffer, since we expect to require at least one resize loop anyway. + let mut out_buffer = vec![]; + loop { + match unsafe { + ExpandEnvironmentStringsW(PCWSTR(value_utf16.as_ptr()), Some(&mut *out_buffer)) + } { + 0 => { + // 0 means error, but we don't know what the error is. We could try to get + // the error code with GetLastError, but that's a whole other can of worms. + // Instead, we'll just return the original string and hope for the best. + // Presumably, registry strings shouldn't ever cause this to error anyway. + return Value::string(value, call_span); + } + size if size as usize <= out_buffer.len() => { + // The buffer was large enough, so we're done. Remember to remove the trailing nul! + let out_value_utf16 = &out_buffer[..size as usize - 1]; + let out_value = String::from_utf16_lossy(out_value_utf16); + return Value::string(out_value, call_span); + } + size => { + // The buffer was too small, so we need to resize and try again. + // Clear first to indicate we don't care about the old contents. + out_buffer.clear(); + out_buffer.resize(size as usize, 0); + continue; + } + } + } +} + +#[test] +fn no_expand_does_not_expand() { + let unexpanded = "%AppData%"; + let reg_val = || winreg::RegValue { + bytes: unexpanded + .encode_utf16() + .chain([0]) + .flat_map(u16::to_ne_bytes) + .collect(), + vtype: REG_EXPAND_SZ, + }; + + // normally we do expand + let nu_val_expanded = reg_value_to_nu_string(reg_val(), Span::unknown(), false); + assert!(nu_val_expanded.as_string().is_ok()); + assert_ne!(nu_val_expanded.as_string().unwrap(), unexpanded); + + // unless we skip expansion + let nu_val_skip_expand = reg_value_to_nu_string(reg_val(), Span::unknown(), true); + assert!(nu_val_skip_expand.as_string().is_ok()); + assert_eq!(nu_val_skip_expand.as_string().unwrap(), unexpanded); +} + +fn reg_value_to_nu_list_string(reg_value: winreg::RegValue, call_span: Span) -> nu_protocol::Value { + let values = >::from_reg_value(®_value) + .expect("registry value type should be REG_MULTI_SZ") + .into_iter() + .map(|s| Value::string(s, call_span)); + + // There's no REG_MULTI_EXPAND_SZ, so no need to do placeholder expansion here. + Value::list(values.collect(), call_span) +} + +fn reg_value_to_nu_int(reg_value: winreg::RegValue, call_span: Span) -> nu_protocol::Value { + let value = match reg_value.vtype { + REG_DWORD => u32::from_reg_value(®_value).unwrap() as i64, + REG_DWORD_BIG_ENDIAN => { + // winreg (v0.51.0) doesn't natively decode REG_DWORD_BIG_ENDIAN + u32::from_be_bytes(unsafe { *reg_value.bytes.as_ptr().cast() }) as i64 + } + REG_QWORD => u64::from_reg_value(®_value).unwrap() as i64, + _ => unreachable!( + "registry value type should be REG_DWORD, REG_DWORD_BIG_ENDIAN, or REG_QWORD" + ), + }; + Value::int(value, call_span) +}