mirror of
https://github.com/ascii-boxes/boxes.git
synced 2025-06-20 09:48:12 +02:00
Add command line option -n to set character encoding of input text #1
This commit is contained in:
parent
4c656727ec
commit
44c2c526af
13
.gitattributes
vendored
Normal file
13
.gitattributes
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
* text=auto
|
||||
|
||||
# shell scripts
|
||||
*.sh text eol=lf
|
||||
|
||||
# Windows batch files
|
||||
*.bat text eol=crlf
|
||||
|
||||
# the test cases
|
||||
/test/*.txt text eol=lf
|
||||
|
||||
# special test case for testing ISO encoding
|
||||
/test/111_manual_encoding_iso.txt text working-tree-encoding=ISO_8859-15
|
@ -10,7 +10,7 @@ boxes \- text mode box and comment drawing filter
|
||||
.SH SYNOPSIS
|
||||
.B boxes
|
||||
[\-hlmrv] [\-a\ format] [\-d\ design] [\-f\ file] [\-i\ indent] [\-k\ bool]
|
||||
[\-p\ pad] [\-s\ size] [\-t\ tabopts] [infile [outfile]]
|
||||
[\-n\ encoding] [\-p\ pad] [\-s\ size] [\-t\ tabopts] [infile [outfile]]
|
||||
.SH DESCRIPTION
|
||||
.I Boxes
|
||||
is a text filter which can draw any kind of box around its input text. Box
|
||||
@ -185,6 +185,14 @@ padding, indentation, etc. for the mended box. Implies
|
||||
false.
|
||||
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
.TP 0.6i
|
||||
.B \-n \fIencoding\fP
|
||||
Character encoding. Overrides the character encoding of the input and output
|
||||
text. Choose from the list shown by \fIiconv -l\fP. If an invalid character
|
||||
encoding is specified here, \fIUTF-8\fP is used as a fallback. The default
|
||||
is to use the system encoding, which is normally the best course of action.
|
||||
So don't specify this option unless you have to.
|
||||
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
.TP 0.6i
|
||||
.B \-p \fIstring\fP
|
||||
Padding. Specify padding in spaces around the input text block for all
|
||||
sides of the box. The argument string may not contain whitespace and must
|
||||
@ -357,4 +365,5 @@ configuration file (takes precedence over system-wide configuration file)
|
||||
system\-wide configuration file
|
||||
.\" =======================================================================
|
||||
.SH "SEE ALSO"
|
||||
.I figlet(6)
|
||||
.BR figlet (6),
|
||||
.BR iconv (1)
|
||||
|
@ -79,7 +79,7 @@ lex.yy.c: lexer.l boxes.h
|
||||
cat lexer.tmp.c >> lex.yy.c
|
||||
rm lexer.tmp.c
|
||||
|
||||
|
||||
# TODO In the end, check declared dependencies again
|
||||
boxes.o: boxes.c boxes.h regulex.h shape.h tools.h unicode.h generate.h remove.h config.h
|
||||
tools.o: tools.c tools.h boxes.h shape.h config.h
|
||||
unicode.o: unicode.c unicode.h config.h
|
||||
|
33
src/boxes.c
33
src/boxes.c
@ -29,7 +29,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <uniconv.h>
|
||||
#include <unictype.h>
|
||||
#include <unistdio.h>
|
||||
#include <unistr.h>
|
||||
@ -107,6 +106,7 @@ static void usage(FILE *st)
|
||||
fprintf(st, " -k bool leading/trailing blank line retention on removal\n");
|
||||
fprintf(st, " -l list available box designs w/ samples\n");
|
||||
fprintf(st, " -m mend box, i.e. remove it and redraw it afterwards\n");
|
||||
fprintf(st, " -n enc Character encoding of input and output\n");
|
||||
fprintf(st, " -p fmt padding [default: none]\n");
|
||||
/* fprintf(st, " -q modify command for needs of the web UI (undocumented)\n"); */
|
||||
fprintf(st, " -r remove box\n");
|
||||
@ -370,6 +370,7 @@ static int process_commandline(int argc, char *argv[])
|
||||
opt.tabstop = DEF_TABSTOP;
|
||||
opt.tabexp = 'e';
|
||||
opt.killblank = -1;
|
||||
opt.encoding = NULL;
|
||||
for (idummy = 0; idummy < ANZ_SIDES; ++idummy) {
|
||||
opt.padding[idummy] = -1;
|
||||
}
|
||||
@ -388,7 +389,7 @@ static int process_commandline(int argc, char *argv[])
|
||||
* Parse Command Line
|
||||
*/
|
||||
do {
|
||||
oc = getopt(argc, argv, "a:c:d:f:hi:k:lmp:qrs:t:v");
|
||||
oc = getopt(argc, argv, "a:c:d:f:hi:k:lmn:p:qrs:t:v");
|
||||
|
||||
switch (oc) {
|
||||
|
||||
@ -580,6 +581,17 @@ static int process_commandline(int argc, char *argv[])
|
||||
opt.killblank = 0;
|
||||
break;
|
||||
|
||||
case 'n':
|
||||
/*
|
||||
* Character encoding
|
||||
*/
|
||||
opt.encoding = (char *) strdup(optarg);
|
||||
if (opt.encoding == NULL) {
|
||||
perror(PROJECT);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'p':
|
||||
/*
|
||||
* Padding. format is ([ahvtrbl]n)+
|
||||
@ -1257,6 +1269,9 @@ static int apply_substitutions(const int mode)
|
||||
/*
|
||||
* Compile regular expressions
|
||||
*/
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf(stderr, "Compiling %d %s rule patterns\n", (int) anz_rules, mode ? "reversion" : "replacement");
|
||||
#endif
|
||||
errno = 0;
|
||||
opt.design->current_rule = rules;
|
||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||
@ -1278,13 +1293,13 @@ static int apply_substitutions(const int mode)
|
||||
for (j = 0; j < anz_rules; ++j, ++(opt.design->current_rule)) {
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "regex_replace(0x%p, \"%s\", \"%s\", %d, \'%c\') == ",
|
||||
rules[j].prog, rules[j].repstr, u32_strconv_to_locale(input.lines[k].mbtext),
|
||||
rules[j].prog, rules[j].repstr, u32_strconv_to_output(input.lines[k].mbtext),
|
||||
(int) input.lines[k].num_chars, rules[j].mode);
|
||||
#endif
|
||||
uint32_t *newtext = regex_replace(rules[j].prog, rules[j].repstr,
|
||||
input.lines[k].mbtext, input.lines[k].num_chars, rules[j].mode == 'g');
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_locale(newtext) : "NULL");
|
||||
fprintf (stderr, "\"%s\"\n", newtext ? u32_strconv_to_output(newtext) : "NULL");
|
||||
#endif
|
||||
if (newtext == NULL) {
|
||||
return 1;
|
||||
@ -1298,7 +1313,7 @@ static int apply_substitutions(const int mode)
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
fprintf (stderr, "input.lines[%d] == {%d, \"%s\"}\n", (int) k,
|
||||
(int) input.lines[k].num_chars, u32_strconv_to_locale(input.lines[k].mbtext));
|
||||
(int) input.lines[k].num_chars, u32_strconv_to_output(input.lines[k].mbtext));
|
||||
#endif
|
||||
}
|
||||
opt.design->current_rule = NULL;
|
||||
@ -1393,7 +1408,7 @@ static int read_all_input(const int use_stdin)
|
||||
input.lines = tmp;
|
||||
}
|
||||
|
||||
mbtemp = u32_strconv_from_locale(buf);
|
||||
mbtemp = u32_strconv_from_input(buf);
|
||||
len_chars = u32_strlen(mbtemp);
|
||||
input.final_newline = has_linebreak(mbtemp, len_chars);
|
||||
input.lines[input.anz_lines].posmap = NULL;
|
||||
@ -1484,7 +1499,7 @@ static int read_all_input(const int use_stdin)
|
||||
for (i = 0; i < input.anz_lines; ++i) {
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
|
||||
u32_strconv_to_locale(input.lines[i].mbtext), (int) input.lines[i].num_chars);
|
||||
u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars);
|
||||
#endif
|
||||
if (input.lines[i].num_chars >= input.indent) {
|
||||
memmove(input.lines[i].text, input.lines[i].text + input.indent,
|
||||
@ -1496,7 +1511,7 @@ static int read_all_input(const int use_stdin)
|
||||
}
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "%2d: mbtext = \"%s\" (%d chars)\n", (int) i,
|
||||
u32_strconv_to_locale(input.lines[i].mbtext), (int) input.lines[i].num_chars);
|
||||
u32_strconv_to_output(input.lines[i].mbtext), (int) input.lines[i].num_chars);
|
||||
#endif
|
||||
}
|
||||
input.maxline -= input.indent;
|
||||
@ -1557,7 +1572,7 @@ int main(int argc, char *argv[])
|
||||
* Store system character encoding
|
||||
*/
|
||||
setlocale(LC_ALL, ""); /* switch from default "C" encoding to system encoding */
|
||||
encoding = locale_charset();
|
||||
encoding = check_encoding(opt.encoding, locale_charset());
|
||||
#ifdef DEBUG
|
||||
fprintf (stderr, "Character Encoding = %s\n", encoding);
|
||||
#endif
|
||||
|
@ -139,6 +139,7 @@ typedef struct { /* Command line options: */
|
||||
char indentmode; /* 'b', 't', 'n', or '\0' */
|
||||
char justify; /* 'l', 'c', 'r', or '\0' */
|
||||
int killblank; /* -1 if not set */
|
||||
char *encoding; /* character encoding override for input and output text */
|
||||
FILE *infile; /* where we get our input */
|
||||
FILE *outfile; /* where we put our output */
|
||||
} opt_t;
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <uniconv.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "shape.h"
|
||||
@ -729,7 +728,7 @@ static int justify_line(line_t *line, int skew)
|
||||
|
||||
#if defined(DEBUG) || 0
|
||||
fprintf (stderr, "justify_line(%c): Input: real: (%02d) \"%s\", text: (%02d) \"%s\", invisible=%d, skew=%d",
|
||||
opt.justify ? opt.justify : '0', (int) line->num_chars, u32_strconv_to_locale(line->mbtext),
|
||||
opt.justify ? opt.justify : '0', (int) line->num_chars, u32_strconv_to_output(line->mbtext),
|
||||
(int) line->len, line->text, (int) line->invis, skew);
|
||||
#endif
|
||||
|
||||
@ -998,7 +997,7 @@ int output_box(const sentry_t *thebox)
|
||||
concat_strings(obuf, LINE_MAX_BYTES + 1, 8, restored_indent,
|
||||
skip_left ? "" : thebox[BLEF].chars[j], hfill1,
|
||||
ti >= 0 && shift > 0 ? nspaces(shift) : "",
|
||||
ti >= 0 ? u32_strconv_to_locale(mbtext_shifted) : "",
|
||||
ti >= 0 ? u32_strconv_to_output(mbtext_shifted) : "",
|
||||
hfill2, nspaces(input.maxline - input.lines[ti].len - shift),
|
||||
thebox[BRIG].chars[j]);
|
||||
}
|
||||
|
@ -26,9 +26,10 @@
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <uniconv.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "tools.h"
|
||||
#include "unicode.h"
|
||||
#include "regulex.h"
|
||||
|
||||
|
||||
@ -37,7 +38,10 @@ pcre2_code *compile_pattern(char *pattern)
|
||||
{
|
||||
int errornumber;
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SPTR pattern32 = u32_strconv_from_locale(pattern);
|
||||
PCRE2_SPTR pattern32 = u32_strconv_from_arg(pattern, config_encoding);
|
||||
if (pattern32 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pcre2_code *re = pcre2_compile(
|
||||
pattern32, /* the pattern */
|
||||
@ -51,7 +55,7 @@ pcre2_code *compile_pattern(char *pattern)
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
fprintf(stderr, "Regular expression pattern \"%s\" failed to compile at offset %d: %s\n",
|
||||
pattern, (int) erroroffset, u32_strconv_to_locale(buffer));
|
||||
pattern, (int) erroroffset, u32_strconv_to_output(buffer));
|
||||
}
|
||||
return re;
|
||||
}
|
||||
@ -60,7 +64,10 @@ pcre2_code *compile_pattern(char *pattern)
|
||||
|
||||
uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, const size_t input_len, const int global)
|
||||
{
|
||||
PCRE2_SPTR replacement = u32_strconv_from_locale(replace);
|
||||
PCRE2_SPTR replacement = u32_strconv_from_arg(replace, config_encoding);
|
||||
if (replacement == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED
|
||||
| (global ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
||||
PCRE2_SIZE outlen = input_len * 2; /* estimated length of output buffer in characters, fine if too small */
|
||||
@ -101,7 +108,7 @@ uint32_t *regex_replace(pcre2_code *search, char *replace, uint32_t *input, cons
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(pcre2_rc, buffer, sizeof(buffer));
|
||||
/* buffer will normally contain "invalid replacement string" */
|
||||
fprintf(stderr, "Error substituting \"%s\": %s\n", replace, u32_strconv_to_locale(buffer));
|
||||
fprintf(stderr, "Error substituting \"%s\": %s\n", replace, u32_strconv_to_output(buffer));
|
||||
BFREE(output);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -26,7 +26,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <uniconv.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "shape.h"
|
||||
@ -1098,7 +1097,7 @@ void output_input(const int trim_only)
|
||||
indent = 0;
|
||||
}
|
||||
|
||||
fprintf(opt.outfile, "%s%s%s", indentspc, u32_strconv_to_locale(advance32(input.lines[j].mbtext, indent)),
|
||||
fprintf(opt.outfile, "%s%s%s", indentspc, u32_strconv_to_output(advance32(input.lines[j].mbtext, indent)),
|
||||
(input.final_newline || j < input.anz_lines - 1 ? "\n" : ""));
|
||||
BFREE (indentspc);
|
||||
}
|
||||
|
@ -30,7 +30,6 @@
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <uniconv.h>
|
||||
#include <unictype.h>
|
||||
#include <unistr.h>
|
||||
#include <unitypes.h>
|
||||
@ -514,7 +513,7 @@ void print_input_lines(const char *heading)
|
||||
fprintf(stderr, " [num_chars] \"real text\" [num_cols] \"ascii_text\"\n");
|
||||
for (size_t i = 0; i < input.anz_lines; ++i) {
|
||||
fprintf(stderr, "%4d [%02d] \"%s\" [%02d] \"%s\"", (int) i,
|
||||
(int) input.lines[i].num_chars, u32_strconv_to_locale(input.lines[i].mbtext),
|
||||
(int) input.lines[i].num_chars, u32_strconv_to_output(input.lines[i].mbtext),
|
||||
(int) input.lines[i].len, input.lines[i].text);
|
||||
fprintf(stderr, "\tTabs: [");
|
||||
if (input.lines[i].tabpos != NULL) {
|
||||
|
108
src/unicode.c
108
src/unicode.c
@ -24,22 +24,42 @@
|
||||
|
||||
#include "config.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <uniconv.h>
|
||||
#include <unictype.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "boxes.h"
|
||||
#include "tools.h"
|
||||
#include "unicode.h"
|
||||
|
||||
|
||||
const char *encoding; /* the character encoding that we use */
|
||||
|
||||
const ucs4_t char_tab = 0x00000009; /* ucs4_t character '\t' (tab) */
|
||||
const ucs4_t char_space = 0x00000020; /* ucs4_t character ' ' (space) */
|
||||
const ucs4_t char_cr = 0x0000000d; /* ucs4_t character '\r' (carriage return) */
|
||||
const ucs4_t char_newline = 0x0000000a; /* ucs4_t character '\n' (newline) */
|
||||
const ucs4_t char_esc = 0x0000001b; /* ucs4_t character 0x1b (escape) */
|
||||
const ucs4_t char_nul = 0x00000000; /* ucs4_t character '\0' (zero) */
|
||||
const char *config_encoding = "ISO_8859-15";
|
||||
|
||||
/* effective character encoding of input and output text */
|
||||
const char *encoding;
|
||||
|
||||
/* ucs4_t character '\t' (tab) */
|
||||
const ucs4_t char_tab = 0x00000009;
|
||||
|
||||
/* ucs4_t character ' ' (space) */
|
||||
const ucs4_t char_space = 0x00000020;
|
||||
|
||||
/* ucs4_t character '\r' (carriage return) */
|
||||
const ucs4_t char_cr = 0x0000000d;
|
||||
|
||||
/* ucs4_t character '\n' (newline) */
|
||||
const ucs4_t char_newline = 0x0000000a;
|
||||
|
||||
/* ucs4_t character 0x1b (escape) */
|
||||
const ucs4_t char_esc = 0x0000001b;
|
||||
|
||||
/* ucs4_t character '\0' (zero) */
|
||||
const ucs4_t char_nul = 0x00000000;
|
||||
|
||||
|
||||
|
||||
@ -176,4 +196,78 @@ uint32_t *advance32(uint32_t *s, const size_t offset)
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t *u32_strconv_from_input(const char *src)
|
||||
{
|
||||
return u32_strconv_from_arg(src, encoding);
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding)
|
||||
{
|
||||
if (src == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (src[0] == '\0') {
|
||||
return new_empty_string32();
|
||||
}
|
||||
|
||||
uint32_t *result = u32_strconv_from_encoding(
|
||||
src, /* the source string to convert */
|
||||
sourceEncoding, /* the character encoding from which to convert */
|
||||
iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
|
||||
|
||||
if (result == NULL) {
|
||||
fprintf(stderr, "%s: failed to convert from '%s' to UTF-32: %s\n", PROJECT, sourceEncoding, strerror(errno));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
char *u32_strconv_to_output(const uint32_t *src)
|
||||
{
|
||||
return u32_strconv_to_arg(src, encoding);
|
||||
}
|
||||
|
||||
|
||||
|
||||
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding)
|
||||
{
|
||||
if (src == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (is_empty(src)) {
|
||||
return strdup("");
|
||||
}
|
||||
|
||||
char *result = u32_strconv_to_encoding(
|
||||
src, /* the source string to convert */
|
||||
targetEncoding, /* the character encoding to which to convert */
|
||||
iconveh_question_mark); /* produce one question mark '?' per unconvertible character */
|
||||
|
||||
if (result == NULL) {
|
||||
fprintf(stderr, "%s: failed to convert from UTF-32 to '%s': %s\n", PROJECT, targetEncoding, strerror(errno));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char *check_encoding(const char *manual_encoding, const char *system_encoding)
|
||||
{
|
||||
if (manual_encoding != NULL) {
|
||||
uint32_t *unicode = u32_strconv_from_encoding(" ", manual_encoding, iconveh_error);
|
||||
if (unicode != NULL) {
|
||||
BFREE(unicode);
|
||||
return manual_encoding;
|
||||
}
|
||||
fprintf(stderr, "%s: Invalid character encoding: %s - falling back to %s\n",
|
||||
PROJECT, manual_encoding, system_encoding);
|
||||
}
|
||||
return system_encoding;
|
||||
}
|
||||
|
||||
|
||||
/*EOF*/ /* vim: set sw=4: */
|
||||
|
@ -28,16 +28,31 @@
|
||||
#include <unitypes.h>
|
||||
|
||||
|
||||
extern const char *encoding; /* the character encoding that we use */
|
||||
|
||||
extern const ucs4_t char_tab; /* ucs4_t character '\t' (tab) */
|
||||
extern const ucs4_t char_space; /* ucs4_t character ' ' (space) */
|
||||
extern const ucs4_t char_cr; /* ucs4_t character '\r' (carriage return) */
|
||||
extern const ucs4_t char_newline; /* ucs4_t character '\n' (newline) */
|
||||
extern const ucs4_t char_esc; /* ucs4_t character 0x1b (escape) */
|
||||
extern const ucs4_t char_nul; /* ucs4_t character '\0' (zero) */
|
||||
/** The boxes config file is still encoded with a single-byte character set. Officially, it is ASCII!
|
||||
* However, people might not conform to this, so we use ISO_8859-15 as a reasonable superset. */
|
||||
extern const char *config_encoding;
|
||||
|
||||
/** the character encoding of input (and output) text */
|
||||
extern const char *encoding;
|
||||
|
||||
/** ucs4_t character '\t' (tab) */
|
||||
extern const ucs4_t char_tab;
|
||||
|
||||
/** ucs4_t character ' ' (space) */
|
||||
extern const ucs4_t char_space;
|
||||
|
||||
/** ucs4_t character '\r' (carriage return) */
|
||||
extern const ucs4_t char_cr;
|
||||
|
||||
/** ucs4_t character '\n' (newline) */
|
||||
extern const ucs4_t char_newline;
|
||||
|
||||
/** ucs4_t character 0x1b (escape) */
|
||||
extern const ucs4_t char_esc;
|
||||
|
||||
/** ucs4_t character '\0' (zero) */
|
||||
extern const ucs4_t char_nul;
|
||||
|
||||
int is_char_at(const uint32_t *text, const size_t idx, const ucs4_t expected_char);
|
||||
|
||||
@ -75,6 +90,55 @@ uint32_t *advance_next32(const uint32_t *s, size_t *invis);
|
||||
*/
|
||||
uint32_t *advance32(uint32_t *s, const size_t offset);
|
||||
|
||||
/**
|
||||
* Convert a string from the input/output encoding (`encoding` in this .h file) to UTF-32 internal representation.
|
||||
* Memory will be allocated for the converted string.
|
||||
*
|
||||
* @param <src> string to convert, zero-terminated
|
||||
* @return UTF-32 string, or NULL in case of error (then an error message was already printed on stderr)
|
||||
*/
|
||||
uint32_t *u32_strconv_from_input(const char *src);
|
||||
|
||||
/**
|
||||
* Convert a string from the given source encoding to UTF-32 internal representation.
|
||||
* Memory will be allocated for the converted string.
|
||||
*
|
||||
* @param <src> string to convert, zero-terminated
|
||||
* @param <sourceEncoding> the character encoding of <src>
|
||||
* @return UTF-32 string, or NULL in case of error (then an error message was already printed on stderr)
|
||||
*/
|
||||
uint32_t *u32_strconv_from_arg(const char *src, const char *sourceEncoding);
|
||||
|
||||
/**
|
||||
* Convert a string from UTF-32 internal representation to input/output encoding (`encoding` in this .h file).
|
||||
* Memory will be allocated for the converted string.
|
||||
*
|
||||
* @param <src> UTF-32 string to convert, zero-terminated
|
||||
* @return string in input/output encoding, or NULL on error (then an error message was already printed on stderr)
|
||||
*/
|
||||
char *u32_strconv_to_output(const uint32_t *src);
|
||||
|
||||
/**
|
||||
* Convert a string from UTF-32 internal representation to the given target encoding.
|
||||
* Memory will be allocated for the converted string.
|
||||
*
|
||||
* @param <src> UTF-32 string to convert, zero-terminated
|
||||
* @param <targetEncoding> the character encoding of the result
|
||||
* @return string in target encoding, or NULL in case of error (then an error message was already printed on stderr)
|
||||
*/
|
||||
char *u32_strconv_to_arg(const uint32_t *src, const char *targetEncoding);
|
||||
|
||||
/**
|
||||
* Check if the given <manual_encoding> can be used to covert anything. This should reveal invalid encoding names that
|
||||
* have been specified on the command line. If no <manual_encoding> was specified, or if an invalid encoding is
|
||||
* detected, we fall back to the system encoding. No new memory is allocated.
|
||||
*
|
||||
* @param <manual_encoding> the encoding set on the command line, may be NULL
|
||||
* @param <system_encoding> the system encoding
|
||||
* @return <manual_encoding> if it is set to a valid value, <system_encoding> otherwise
|
||||
*/
|
||||
const char *check_encoding(const char *manual_encoding, const char *system_encoding);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
24
test/111_manual_encoding_iso.txt
Normal file
24
test/111_manual_encoding_iso.txt
Normal file
@ -0,0 +1,24 @@
|
||||
:ARGS
|
||||
-ac -n ISO_8859-15
|
||||
:INPUT
|
||||
ä
|
||||
äb
|
||||
äbç
|
||||
äbçd
|
||||
äbçdé
|
||||
äbçdéf
|
||||
äbçdéfg
|
||||
äbçdéfgh
|
||||
:OUTPUT-FILTER
|
||||
:EXPECTED
|
||||
/**************/
|
||||
/* ä */
|
||||
/* äb */
|
||||
/* äbç */
|
||||
/* äbçd */
|
||||
/* äbçdé */
|
||||
/* äbçdéf */
|
||||
/* äbçdéfg */
|
||||
/* äbçdéfgh */
|
||||
/**************/
|
||||
:EOF
|
Loading…
x
Reference in New Issue
Block a user