mirror of
https://github.com/ascii-boxes/boxes.git
synced 2025-01-19 04:18:12 +01:00
Support unicode in expand_tabs_into() of tools.c
Declare ucs4_t character constants in boxes.h Start unicode support in boxes.c #1
This commit is contained in:
parent
f536d45f7a
commit
f2ddb6d6df
56
src/boxes.c
56
src/boxes.c
@ -5,12 +5,12 @@
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License, version 2, as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
@ -27,7 +27,15 @@
|
||||
#include <strings.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <uniconv.h>
|
||||
#include <unistdio.h>
|
||||
#include <unistr.h>
|
||||
#include <unitypes.h>
|
||||
#include <uniwidth.h>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include "shape.h"
|
||||
#include "boxes.h"
|
||||
#include "tools.h"
|
||||
@ -84,6 +92,14 @@ int anz_designs = 0; /* no of designs after parsing */
|
||||
|
||||
opt_t opt; /* command line options */
|
||||
|
||||
char *encoding; /* the character encoding that we use */
|
||||
|
||||
ucs4_t char_tab = 0x00000009; /* ucs4_t character '\t' (tab) */
|
||||
ucs4_t char_space = 0x00000020; /* ucs4_t character ' ' (space) */
|
||||
ucs4_t char_cr = 0x0000000d; /* ucs4_t character '\r' (carriage return) */
|
||||
ucs4_t char_newline = 0x0000000a; /* ucs4_t character '\n' (newline) */
|
||||
ucs4_t char_nul = 0x00000000; /* ucs4_t character '\0' (zero) */
|
||||
|
||||
input_t input = INPUT_INITIALIZER; /* input lines */
|
||||
|
||||
|
||||
@ -1353,16 +1369,18 @@ static int read_all_input (const int use_stdin)
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
{
|
||||
char buf[LINE_MAX_BYTES+2]; /* input buffer */
|
||||
char c;
|
||||
size_t invis; /* counts invisible characters */
|
||||
int ansipos; /* progression of ansi sequence */
|
||||
size_t input_size = 0; /* number of elements allocated */
|
||||
line_t *tmp = NULL;
|
||||
char *temp = NULL; /* string resulting from tab exp. */
|
||||
size_t newlen; /* line length after tab expansion */
|
||||
size_t i;
|
||||
int rc;
|
||||
char buf[LINE_MAX_BYTES + 2]; /* input buffer */
|
||||
size_t len_bytes;
|
||||
char c;
|
||||
size_t invis; /* counts invisible characters */
|
||||
int ansipos; /* progression of ansi sequence */
|
||||
size_t input_size = 0; /* number of elements allocated */
|
||||
line_t *tmp = NULL;
|
||||
char *temp = NULL; /* string resulting from tab exp. */
|
||||
uint8_t *mbtemp = NULL; /* temp string for preparing the multi-byte input */
|
||||
size_t newlen; /* line length after tab expansion */
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
input.indent = LINE_MAX_BYTES;
|
||||
input.maxline = 0;
|
||||
@ -1386,12 +1404,14 @@ static int read_all_input (const int use_stdin)
|
||||
input.lines = tmp;
|
||||
}
|
||||
|
||||
input.lines[input.anz_lines].len = strlen (buf);
|
||||
len_bytes = strlen(buf);
|
||||
mbtemp = u8_strconv_from_locale(buf);
|
||||
input.lines[input.anz_lines].len = u8_strwidth(mbtemp, encoding);
|
||||
input.lines[input.anz_lines].num_leading_blanks = 0;
|
||||
input.final_newline = has_linebreak(buf, input.lines[input.anz_lines].len);
|
||||
input.final_newline = has_linebreak(buf, len_bytes);
|
||||
|
||||
if (opt.r) {
|
||||
input.lines[input.anz_lines].len -= 1;
|
||||
input.lines[input.anz_lines].len -= 1; /* TODO HERE */
|
||||
if (buf[input.lines[input.anz_lines].len] == '\n')
|
||||
buf[input.lines[input.anz_lines].len] = '\0';
|
||||
}
|
||||
@ -1582,6 +1602,12 @@ int main (int argc, char *argv[])
|
||||
if (rc)
|
||||
exit (EXIT_FAILURE);
|
||||
|
||||
/*
|
||||
* Store system character encoding
|
||||
*/
|
||||
setlocale(LC_ALL, ""); /* switch from default "C" encoding to system encoding */
|
||||
encoding = locale_charset();
|
||||
|
||||
/*
|
||||
* Parse config file, then reset design pointer
|
||||
*/
|
||||
|
@ -30,7 +30,8 @@
|
||||
/* #define PARSER_DEBUG */
|
||||
/* #define LEXER_DEBUG */
|
||||
|
||||
#include "regexp.h"
|
||||
#include <unitypes.h>
|
||||
#include "regexp/regexp.h"
|
||||
|
||||
|
||||
|
||||
@ -144,23 +145,33 @@ typedef struct { /* Command line options: */
|
||||
extern opt_t opt;
|
||||
|
||||
|
||||
extern char *encoding; /* the character encoding that we use */
|
||||
|
||||
extern ucs4_t char_tab; /* ucs4_t character '\t' (tab) */
|
||||
extern ucs4_t char_space; /* ucs4_t character ' ' (space) */
|
||||
extern ucs4_t char_cr; /* ucs4_t character '\r' (carriage return) */
|
||||
extern ucs4_t char_newline; /* ucs4_t character '\n' (newline) */
|
||||
extern ucs4_t char_nul; /* ucs4_t character '\0' (zero) */
|
||||
|
||||
|
||||
typedef struct {
|
||||
size_t len; /* length of text in characters */
|
||||
char *text; /* line content, tabs expanded */
|
||||
size_t invis; /* number of characters part of an ansi sequence */
|
||||
size_t vischar; /* number of normal printable characters */
|
||||
size_t *tabpos; /* tab positions in expanded work strings */
|
||||
size_t tabpos_len; /* number of tabs in a line */
|
||||
size_t num_leading_blanks; /* number of spaces at the start of the line after justification */
|
||||
size_t len; /* length of text in columns (character positions in a text terminal) */
|
||||
char *text; /* ASCII line content, tabs expanded, multi-byte chars replaced with 'x' */
|
||||
uint8_t *mbtext; /* multi-byte (original) line content, tabs expanded. We use UTF-8 so that our old regex code can find ASCII characters in it. */
|
||||
size_t invis; /* number of characters part of an ansi sequence */
|
||||
size_t vischar; /* number of normal printable characters */
|
||||
size_t *tabpos; /* tab positions in expanded work strings */
|
||||
size_t tabpos_len; /* number of tabs in a line */
|
||||
size_t num_leading_blanks; /* number of spaces at the start of the line after justification */
|
||||
} line_t;
|
||||
|
||||
#ifndef FILE_LEXER_L
|
||||
typedef struct {
|
||||
line_t *lines;
|
||||
size_t anz_lines; /* number of entries in input */
|
||||
size_t maxline; /* length of longest input line */
|
||||
size_t indent; /* number of leading spaces found */
|
||||
int final_newline; /* true if the last line of input ends with newline */
|
||||
size_t anz_lines; /* number of entries in input */
|
||||
size_t maxline; /* length of longest input line */
|
||||
size_t indent; /* number of leading spaces found */
|
||||
int final_newline; /* true if the last line of input ends with newline */
|
||||
} input_t;
|
||||
|
||||
#define INPUT_INITIALIZER {NULL, 0, 0, LINE_MAX_BYTES, 0}
|
||||
|
51
src/tools.c
51
src/tools.c
@ -29,6 +29,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <unistr.h>
|
||||
#include <unitypes.h>
|
||||
|
||||
#include "shape.h"
|
||||
#include "boxes.h"
|
||||
#include "tools.h"
|
||||
@ -235,13 +239,13 @@ int empty_line(const line_t *line)
|
||||
|
||||
|
||||
|
||||
size_t expand_tabs_into(const char *input_buffer, const size_t in_len,
|
||||
const int tabstop, char **text, size_t **tabpos, size_t *tabpos_len)
|
||||
size_t expand_tabs_into(const uint32_t *input_buffer, const size_t in_len,
|
||||
const int tabstop, uint32_t **text, size_t **tabpos, size_t *tabpos_len)
|
||||
/*
|
||||
* Expand tab chars in input_buffer and store result in text.
|
||||
*
|
||||
* input_buffer Line of text with tab chars
|
||||
* in_len length of the string in input_buffer
|
||||
* in_len length of the string in input_buffer in characters
|
||||
* tabstop tab stop distance
|
||||
* text address of the pointer that will take the result
|
||||
* tabpos array of ints giving the positions of the first
|
||||
@ -257,22 +261,24 @@ size_t expand_tabs_into(const char *input_buffer, const size_t in_len,
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
*/
|
||||
{
|
||||
static char temp[LINE_MAX_BYTES * MAX_TABSTOP + 1]; /* work string */
|
||||
size_t ii; /* position in input string */
|
||||
size_t io; /* position in work string */
|
||||
size_t jp; /* tab expansion jump point */
|
||||
size_t tabnum; /* number of tabs in input */
|
||||
static uint32_t temp[LINE_MAX_BYTES * MAX_TABSTOP + 1]; /* work string */
|
||||
size_t io; /* character position in work string */
|
||||
size_t tabnum; /* index of the current tab */
|
||||
|
||||
*text = NULL;
|
||||
|
||||
for (ii = 0, *tabpos_len = 0; ii < in_len; ++ii) {
|
||||
if (input_buffer[ii] == '\t') {
|
||||
(*tabpos_len)++;
|
||||
}
|
||||
}
|
||||
if (opt.tabexp != 'k') {
|
||||
*tabpos_len = 0;
|
||||
} else {
|
||||
ucs4_t puc;
|
||||
const uint32_t *rest = input_buffer;
|
||||
while (rest = u32_next(&puc, rest)) {
|
||||
if (u32_cmp(&char_tab, &puc, 1) == 0) {
|
||||
(*tabpos_len)++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*tabpos_len > 0) {
|
||||
*tabpos = (size_t *) calloc((*tabpos_len) + 1, sizeof(size_t));
|
||||
if (*tabpos == NULL) {
|
||||
@ -280,23 +286,26 @@ size_t expand_tabs_into(const char *input_buffer, const size_t in_len,
|
||||
}
|
||||
}
|
||||
|
||||
for (ii = 0, io = 0, tabnum = 0; ii < in_len && ((int) io) < (LINE_MAX_BYTES * tabstop - 1); ++ii) {
|
||||
if (input_buffer[ii] == '\t') {
|
||||
ucs4_t puc;
|
||||
const uint32_t *rest = input_buffer;
|
||||
io = 0;
|
||||
while (rest = u32_next(&puc, rest)) {
|
||||
if (u32_cmp(&char_tab, &puc, 1) == 0) { /* Is it a tab char? */
|
||||
if (*tabpos_len > 0) {
|
||||
(*tabpos)[tabnum++] = io;
|
||||
}
|
||||
for (jp = io + tabstop - (io % tabstop); io < jp; ++io) {
|
||||
temp[io] = ' ';
|
||||
}
|
||||
size_t num_spc = tabstop - (io % tabstop);
|
||||
u32_set(temp + io, char_space, num_spc);
|
||||
io += num_spc;
|
||||
}
|
||||
else {
|
||||
temp[io] = input_buffer[ii];
|
||||
u32_set(temp + io, puc, 1);
|
||||
++io;
|
||||
}
|
||||
}
|
||||
temp[io] = '\0';
|
||||
temp[io] = 0;
|
||||
|
||||
*text = (char *) strdup(temp);
|
||||
*text = u32_strdup(temp);
|
||||
if (*text == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -43,8 +43,8 @@ void regerror(char *msg);
|
||||
|
||||
int empty_line(const line_t *line);
|
||||
|
||||
size_t expand_tabs_into(const char *input_buffer, const size_t in_len,
|
||||
const int tabstop, char **text, size_t **tabpos, size_t *tabpos_len);
|
||||
size_t expand_tabs_into(const uint32_t *input_buffer, const size_t in_len,
|
||||
const int tabstop, uint32_t **text, size_t **tabpos, size_t *tabpos_len);
|
||||
|
||||
void btrim(char *text, size_t *len);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user