From 66ad83c15ce0289a9901b5f618e9ea95ea763974 Mon Sep 17 00:00:00 2001
From: Christian Friedow <christian@friedow.com>
Date: Wed, 22 Mar 2023 23:54:18 +0100
Subject: [PATCH] from ssv --aligned-columns should separate lines by character
 index instead of byte index (#8558)

# Description

## Symptom
Lines which are input into `from ssv --aligned-columns` are split
incorrectly of they contain utf-8 characters which have the length of
multiple bytes. Notice how the values of the `Bars` column bleeds into
the `Security` column in the following output (the big grey areas are
censored data ;) ):

![before-patch](https://user-images.githubusercontent.com/17351844/226757737-be7ca493-5c64-4a91-9153-984df515bb8c.png)

## Problem
The function behind `from ssv --aligned-columns` splits lines into
fields by byte index (which is default behavior of str.get(...) in Rust)
instead of character index. If the header row has a different length in
bytes than the remaining table rows, the split is executed incorrectly.

## Solution
The function behind `from ssv --aligned-columns1 now separates lines by
character index instead of byte index. This productes the following
(correct) output (the big grey areas are censored data ;) ):

![after-patch](https://user-images.githubusercontent.com/17351844/226757850-7acaebf3-2d40-4f85-b76e-64e465254bda.png)
---
 crates/nu-command/src/formats/from/ssv.rs | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/crates/nu-command/src/formats/from/ssv.rs b/crates/nu-command/src/formats/from/ssv.rs
index 15e931074..2b9490a79 100644
--- a/crates/nu-command/src/formats/from/ssv.rs
+++ b/crates/nu-command/src/formats/from/ssv.rs
@@ -85,15 +85,23 @@ fn parse_aligned_columns<'a>(
                     .iter()
                     .enumerate()
                     .map(|(i, (header_name, start_position))| {
+                        let char_index_start = match l.char_indices().nth(*start_position) {
+                            Some(idx) => idx.0,
+                            None => *start_position,
+                        };
                         let val = match headers.get(i + 1) {
                             Some((_, end)) => {
                                 if *end < l.len() {
-                                    l.get(*start_position..*end)
+                                    let char_index_end = match l.char_indices().nth(*end) {
+                                        Some(idx) => idx.0,
+                                        None => *end,
+                                    };
+                                    l.get(char_index_start..char_index_end)
                                 } else {
-                                    l.get(*start_position..)
+                                    l.get(char_index_start..)
                                 }
                             }
-                            None => l.get(*start_position..),
+                            None => l.get(char_index_start..),
                         }
                         .unwrap_or("")
                         .trim()