mirror of
https://github.com/ascii-boxes/boxes.git
synced 2025-02-15 01:00:40 +01:00
530 lines
14 KiB
Plaintext
530 lines
14 KiB
Plaintext
%top{
|
|
/*
|
|
* boxes - Command line filter to draw/remove ASCII boxes around text
|
|
* Copyright (c) 1999-2023 Thomas Jensen and the boxes contributors
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
|
|
* License, version 3, as published by the Free Software Foundation.
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
* details.
|
|
* You should have received a copy of the GNU General Public License along with this program.
|
|
* If not, see <https://www.gnu.org/licenses/>.
|
|
*
|
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
*/
|
|
|
|
/*
|
|
* flex lexical analyzer for boxes configuration files
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "bxstring.h"
|
|
|
|
typedef struct {
|
|
int yyerrcnt;
|
|
|
|
/** the currently active string delimiter character */
|
|
char sdel;
|
|
|
|
/** the currently active string escape character */
|
|
char sesc;
|
|
} pass_to_flex;
|
|
|
|
|
|
/*
|
|
* Valid characters to be used as string delimiters.
|
|
* The following list must correspond to the SDELIM definition below.
|
|
*/
|
|
#define LEX_SDELIM "\"~'`!@%&*=:;<>?/|.\\"
|
|
#define LEX_SDELIM_RECOMMENDED "\"~'!|"
|
|
|
|
|
|
/**
|
|
* User-defined initializations for the lexer.
|
|
*
|
|
* Since this scanner must use REJECT in order to be able to process the string delimiter commands, it cannot
|
|
* dynamically enlarge its input buffer to accomodate larger tokens. Thus, we simply set the buffer size to the
|
|
* input file size plus 10 bytes margin-of-error.
|
|
*
|
|
* @param yyscanner pointer to the scanner data block
|
|
* @param configfile the path to the config file we are reading
|
|
*/
|
|
void inflate_inbuf(void *yyscanner, const bxstr_t *configfile);
|
|
|
|
}
|
|
|
|
%{
|
|
#include <string.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <unitypes.h>
|
|
|
|
#include "boxes.h"
|
|
#include "shape.h"
|
|
#include "tools.h"
|
|
#include "parsing.h"
|
|
#include "parser.h"
|
|
#include "unicode.h"
|
|
|
|
|
|
#define LEX_MAX_WARN 3 /* number of lex errors per design */
|
|
|
|
static void report_state_char(char *symbol, char c, char *expected_state_str);
|
|
|
|
static void report_state(char *symbol, char *text, char *expected_state_str);
|
|
|
|
static int change_string_delimiters(pass_to_flex *extra, char *delim_expr);
|
|
|
|
%}
|
|
|
|
|
|
%option 8bit
|
|
%option bison-bridge
|
|
%option case-insensitive
|
|
%option ecs
|
|
%option extra-type="pass_to_flex *"
|
|
%option never-interactive
|
|
%option nodefault
|
|
%option noinput
|
|
%option nounput
|
|
%option noyywrap
|
|
%option reentrant
|
|
%option warn
|
|
%option yylineno
|
|
|
|
|
|
%x BOX
|
|
%x SAMPLE
|
|
%x SHAPES
|
|
%x ELASTIC
|
|
%x DELIMSPEC
|
|
%x PARENT
|
|
|
|
/*
|
|
* The following paragraph contains patterns to recognize UTF-8 characters from a byte stream, based on
|
|
* - https://stackoverflow.com/a/10253320/1005481 by Zack Weinberg (under CC-BY-SA 3.0 license)
|
|
* - https://www.w3.org/2005/03/23-lex-U by Eric Prud'hommeaux, W3C (under the W3C Document License)
|
|
*/
|
|
PBOM \xEF\xBB\xBF
|
|
U2A [\xC2-\xDF][\x80-\xBF]
|
|
U2B \xE0[\xA0-\xBF][\x80-\xBF]
|
|
U3A [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
|
|
U3B \xED[\x80-\x9F][\x80-\xBF]
|
|
U4A \xF0[\x90-\xBF][\x80-\xBF]{2}
|
|
U4B [\xF1-\xF3][\x80-\xBF]{3}
|
|
U4C \xF4[\x80-\x8F][\x80-\xBF]{2}
|
|
UTF_8 {U2A}|{U2B}|{U3A}|{U3B}|{U4A}|{U4B}|{U4C}
|
|
|
|
PWORD (?:[a-zA-Z]|{UTF_8})(?:[a-zA-Z0-9_-]|{UTF_8})*
|
|
PASCII_ID [a-zA-Z][a-zA-Z0-9_-]*
|
|
PWHITE [ \t\r\n]
|
|
SDELIM [\"~\'`!@\%\&\*=:;<>\?/|\.\\]
|
|
PPARENT parent
|
|
PFILENAME [^\r\n]+
|
|
|
|
|
|
%%
|
|
|
|
/*
|
|
* Precedence of rules:
|
|
* - The rule that matches the most text wins.
|
|
* - If two rules match the same amount of text, the one defined first (further up) wins.
|
|
*/
|
|
|
|
|
|
<INITIAL,BOX,DELIMSPEC,ELASTIC,SHAPES>{PWHITE}|{PBOM} /* ignore whitespace and a byte order mark */
|
|
|
|
<DELIMSPEC>[^ \t\r\n]+ {
|
|
/*
|
|
* String delimiter spec - like WORD, but allow any character
|
|
*/
|
|
yylval->s = bxs_from_ascii("IGNORED");
|
|
char *str = (char *) strdup(yytext);
|
|
BEGIN(BOX);
|
|
report_state("YDELIMS", str, "INITIAL");
|
|
if (change_string_delimiters(yyextra, str) != 0) {
|
|
return YUNREC;
|
|
}
|
|
return YDELIMSPEC;
|
|
}
|
|
|
|
|
|
<BOX,SHAPES>{SDELIM}.*$ {
|
|
/*
|
|
* Strings -- first match everything starting from a potential string delimiter until the end of the line. We
|
|
* will give back what we don't need and also detect unterminated strings. Strings always end on the same line.
|
|
*/
|
|
int rest_len = yyleng - 1; /* length of string pointed to by p */
|
|
int qcnt = 0; /* esc char count in current string */
|
|
|
|
if (yytext[0] != yyextra->sdel) {
|
|
REJECT; /* that was not our delimiter */
|
|
}
|
|
|
|
char *str = (char *) strdup(yytext + 1);
|
|
if (str == NULL) {
|
|
perror (PROJECT);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
char *p = str;
|
|
|
|
while (*p) {
|
|
if (*p == yyextra->sesc) {
|
|
memmove (p, p+1, rest_len); /* incl. '\0' */
|
|
++qcnt;
|
|
--rest_len;
|
|
if (*p == '\0') {
|
|
break;
|
|
}
|
|
}
|
|
else if (*p == yyextra->sdel) {
|
|
*p = '\0';
|
|
yyless ((p - str) + 2 + qcnt); /* string plus quotes */
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, " STRING: \"%s\"\n", str);
|
|
#endif
|
|
|
|
uint32_t *utf8 = u32_strconv_from_arg(str, CONFIG_FILE_ENCODING);
|
|
yylval->s = bxs_from_unicode(utf8);
|
|
BFREE(utf8);
|
|
BFREE(str);
|
|
return STRING;
|
|
}
|
|
--rest_len;
|
|
++p;
|
|
}
|
|
if ((yyextra->yyerrcnt)++ < 5) {
|
|
yyerror(NULL, "Unterminated String -- %s", yytext);
|
|
}
|
|
BFREE(str);
|
|
return YUNREC;
|
|
}
|
|
|
|
|
|
<INITIAL>{PPARENT} {
|
|
BEGIN(PARENT);
|
|
report_state("YPARENT", yytext, "PARENT");
|
|
return YPARENT;
|
|
}
|
|
|
|
<PARENT>{PFILENAME} {
|
|
uint32_t *utf8 = u32_strconv_from_arg(yytext, CONFIG_FILE_ENCODING);
|
|
bxstr_t *bxstr = bxs_from_unicode(utf8);
|
|
yylval->s = bxs_trim(bxstr);
|
|
|
|
BFREE(utf8);
|
|
bxs_free(bxstr);
|
|
|
|
BEGIN(INITIAL);
|
|
report_state("FILENAM", bxs_to_output(yylval->s), "INITIAL");
|
|
return FILENAME;
|
|
}
|
|
|
|
<PARENT>\r?\n {
|
|
/* This is triggered only when no parent filename was specified. */
|
|
BEGIN(INITIAL);
|
|
report_state(" NL", "", "INITIAL");
|
|
}
|
|
|
|
|
|
<BOX>Sample {
|
|
BEGIN(SAMPLE);
|
|
report_state("YSAMPLE", yytext, "SAMPLE");
|
|
return YSAMPLE;
|
|
}
|
|
|
|
<SAMPLE>\n {
|
|
if (yyleng > 1)
|
|
yymore();
|
|
}
|
|
|
|
<SAMPLE>^[ \t]*ends[ \t\r]*$ {
|
|
char *p = yytext + yyleng -1;
|
|
size_t len; /* length of sample */
|
|
|
|
while (*p == ' ' || *p == '\t' || *p == '\r')
|
|
--p; /* skip trailing whitespace */
|
|
p -= 2; /* almost skip "ends" statement */
|
|
*p = '\0'; /* p now points to 'n' */
|
|
char *sample = (char *) strdup(yytext);
|
|
if (sample == NULL) {
|
|
perror (PROJECT);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
*p-- = 'n';
|
|
|
|
len = p - yytext; /* yyless(n): push back all but the first n */
|
|
yyless (len); /* allow the lexer to return YENDSAMPLE */
|
|
|
|
sample[len] = '\n'; /* replace 'e' with newline */
|
|
btrim(sample, &len);
|
|
if (len > 0) {
|
|
uint32_t *utf8 = u32_strconv_from_arg(sample, CONFIG_FILE_ENCODING);
|
|
uint32_t *nl = u32_strconv_from_arg("\n", CONFIG_FILE_ENCODING);
|
|
bxstr_t *bxstr = bxs_from_unicode(utf8);
|
|
bxstr_t *bxstr2 = bxs_rtrim(bxstr);
|
|
bxs_free(bxstr);
|
|
bxstr = bxs_strcat(bxstr2, nl);
|
|
BFREE(nl);
|
|
BFREE(utf8);
|
|
BFREE(sample);
|
|
bxs_free(bxstr2);
|
|
yylval->s = bxstr;
|
|
return STRING;
|
|
}
|
|
else {
|
|
if ((yyextra->yyerrcnt)++ < 5) {
|
|
yyerror(NULL, "SAMPLE block must not be empty");
|
|
}
|
|
BFREE(sample);
|
|
return YUNREC;
|
|
}
|
|
}
|
|
|
|
<SAMPLE>. yymore();
|
|
|
|
<SAMPLE>ends[ \t\r]*$ {
|
|
/* reached because the other rule pushes it back so a proper end token can be returned */
|
|
BEGIN(BOX);
|
|
report_state("YENDSAM", yytext, "BOX");
|
|
return YENDSAMPLE;
|
|
}
|
|
|
|
|
|
<BOX>Tags {
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, " YTAGS: %s\n", yytext);
|
|
#endif
|
|
return YTAGS;
|
|
}
|
|
|
|
<BOX>Elastic {
|
|
BEGIN(ELASTIC);
|
|
report_state("YELASTC", yytext, "ELASTIC");
|
|
return YELASTIC;
|
|
}
|
|
|
|
<BOX>Shapes {
|
|
BEGIN(SHAPES);
|
|
report_state("YSHAPES", yytext, "SHAPES");
|
|
return YSHAPES;
|
|
}
|
|
|
|
<INITIAL>Box {
|
|
BEGIN(BOX);
|
|
report_state(" YBOX", yytext, "BOX");
|
|
yyextra->yyerrcnt = 0;
|
|
change_string_delimiters(yyextra, "\\\"");
|
|
return YBOX;
|
|
}
|
|
|
|
<BOX>Replace { return YREPLACE; }
|
|
<BOX>Reverse { return YREVERSE; }
|
|
<BOX>Padding { return YPADDING; }
|
|
<BOX>To { return YTO; }
|
|
<BOX>With { return YWITH; }
|
|
<BOX>Global { yylval->c = 'g'; return YRXPFLAG; }
|
|
<BOX>Once { yylval->c = 'o'; return YRXPFLAG; }
|
|
<BOX>End {
|
|
BEGIN(INITIAL);
|
|
report_state(" YEND", yytext, "INITIAL");
|
|
change_string_delimiters(yyextra, "\\\"");
|
|
return YEND;
|
|
}
|
|
|
|
|
|
<SHAPES,ELASTIC>nw { yylval->shape = NW; return SHAPE; }
|
|
<SHAPES,ELASTIC>nnw { yylval->shape = NNW; return SHAPE; }
|
|
<SHAPES,ELASTIC>n { yylval->shape = N; return SHAPE; }
|
|
<SHAPES,ELASTIC>nne { yylval->shape = NNE; return SHAPE; }
|
|
<SHAPES,ELASTIC>ne { yylval->shape = NE; return SHAPE; }
|
|
<SHAPES,ELASTIC>ene { yylval->shape = ENE; return SHAPE; }
|
|
<SHAPES,ELASTIC>e { yylval->shape = E; return SHAPE; }
|
|
<SHAPES,ELASTIC>ese { yylval->shape = ESE; return SHAPE; }
|
|
<SHAPES,ELASTIC>se { yylval->shape = SE; return SHAPE; }
|
|
<SHAPES,ELASTIC>sse { yylval->shape = SSE; return SHAPE; }
|
|
<SHAPES,ELASTIC>s { yylval->shape = S; return SHAPE; }
|
|
<SHAPES,ELASTIC>ssw { yylval->shape = SSW; return SHAPE; }
|
|
<SHAPES,ELASTIC>sw { yylval->shape = SW; return SHAPE; }
|
|
<SHAPES,ELASTIC>wsw { yylval->shape = WSW; return SHAPE; }
|
|
<SHAPES,ELASTIC>w { yylval->shape = W; return SHAPE; }
|
|
<SHAPES,ELASTIC>wnw { yylval->shape = WNW; return SHAPE; }
|
|
|
|
<ELASTIC>\) {
|
|
BEGIN(BOX);
|
|
report_state_char("SYMBOL", yytext[0], "BOX");
|
|
return yytext[0];
|
|
}
|
|
|
|
<SHAPES>\} {
|
|
BEGIN(BOX);
|
|
report_state_char("SYMBOL", yytext[0], "BOX");
|
|
return yytext[0];
|
|
}
|
|
|
|
|
|
<BOX>author|designer|created|revision|revdate|indent {
|
|
/*
|
|
* general key words
|
|
*/
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, "KEYWORD: %s\n", yytext);
|
|
#endif
|
|
yylval->ascii = strdup(yytext);
|
|
if (yylval->ascii == NULL) {
|
|
perror (PROJECT);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
return KEYWORD;
|
|
}
|
|
|
|
|
|
<BOX>Delimiter|Delim {
|
|
/*
|
|
* Change string delimiting characters
|
|
*/
|
|
BEGIN(DELIMSPEC);
|
|
report_state("YCHGDEL", yytext, "DELIMSPEC");
|
|
return YCHGDEL;
|
|
}
|
|
|
|
<INITIAL,BOX>{PASCII_ID} {
|
|
/*
|
|
* a free-floating word which is not a string, i.e. it does not have delimiting characters (ASCII version)
|
|
*/
|
|
yylval->ascii = strdup(yytext);
|
|
if (yylval->ascii == NULL) {
|
|
perror (PROJECT);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, "ASCIIID: %s\n", yylval->ascii);
|
|
#endif
|
|
return ASCII_ID;
|
|
}
|
|
|
|
<INITIAL,BOX>{PWORD} {
|
|
/*
|
|
* a free-floating word which is not a string, i.e. it does not have delimiting characters
|
|
*/
|
|
uint32_t *utf8 = u32_strconv_from_arg(yytext, CONFIG_FILE_ENCODING);
|
|
yylval->s = bxs_from_unicode(utf8);
|
|
if (yylval->s == NULL) {
|
|
perror (PROJECT);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, " WORD: %s\n", u32_strconv_to_output(utf8));
|
|
#endif
|
|
BFREE(utf8);
|
|
return WORD;
|
|
}
|
|
|
|
|
|
<BOX>[\+-]?[0-9]+ {
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, "YNUMBER: %s\n", yytext);
|
|
#endif
|
|
yylval->num = atoi (yytext);
|
|
return YNUMBER;
|
|
}
|
|
|
|
|
|
<BOX,SHAPES,ELASTIC>[,(){}] {
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, " SYMBOL: \'%c\'\n", yytext[0]);
|
|
#endif
|
|
return yytext[0];
|
|
}
|
|
|
|
|
|
<INITIAL,BOX,SHAPES,ELASTIC>#.*$ {
|
|
/* ignore comments */
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, "COMMENT: %s\n", yytext+1);
|
|
#endif
|
|
}
|
|
|
|
|
|
<INITIAL,BOX,SHAPES,ELASTIC>. {
|
|
/* a character that made no sense where it was encountered. Let the parser handle it. */
|
|
#ifdef LEXER_DEBUG
|
|
fprintf (stderr, " YUNREC: \'%c\'\n", yytext[0]);
|
|
#endif
|
|
return YUNREC;
|
|
}
|
|
|
|
|
|
%%
|
|
|
|
|
|
void inflate_inbuf(void *yyscanner, const bxstr_t *configfile)
|
|
{
|
|
struct stat sinf;
|
|
|
|
char *utf8 = u32_strconv_to_arg(configfile->memory, "UTF-8");
|
|
if (stat(utf8, &sinf)) {
|
|
perror (PROJECT);
|
|
BFREE(utf8);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
BFREE(utf8);
|
|
struct yyguts_t *yyg = (struct yyguts_t *) yyscanner;
|
|
yy_delete_buffer(YY_CURRENT_BUFFER, yyscanner);
|
|
yy_switch_to_buffer (yy_create_buffer(yyin, sinf.st_size+10, yyscanner), yyscanner);
|
|
BEGIN(INITIAL);
|
|
}
|
|
|
|
|
|
|
|
static void report_state_char(char *symbol, char c, char *expected_state_str)
|
|
{
|
|
char *s = (char *) malloc(4);
|
|
sprintf(s, "'%c'", c >= ' ' && c <= 126 ? c : '?');
|
|
report_state(symbol, s, expected_state_str);
|
|
BFREE(s);
|
|
}
|
|
|
|
|
|
|
|
static void report_state(char *symbol, char *text, char *expected_state_str)
|
|
{
|
|
int lexerDebug = 0;
|
|
#ifdef LEXER_DEBUG
|
|
lexerDebug = 1;
|
|
#endif
|
|
if (lexerDebug) {
|
|
fprintf(stderr, "%7s: %s -- STATE %s\n", symbol, text, expected_state_str);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int change_string_delimiters(pass_to_flex *extra, char *delim_expr)
|
|
{
|
|
if (strlen(delim_expr) != 2) {
|
|
yyerror(NULL, "invalid string delimiter specification -- %s", delim_expr);
|
|
return 1;
|
|
}
|
|
if (delim_expr[0] == delim_expr[1]) {
|
|
yyerror(NULL, "string delimiter and escape char may not be the same");
|
|
return 1;
|
|
}
|
|
if (strchr (LEX_SDELIM, delim_expr[1]) == NULL) {
|
|
yyerror(NULL, "invalid string delimiter -- %c (try one of %s)", delim_expr[1], LEX_SDELIM_RECOMMENDED);
|
|
return 1;
|
|
}
|
|
|
|
#ifdef LEXER_DEBUG
|
|
fprintf(stderr, "YDELIMS: change_string_delimiters('%c', '%c')\n", delim_expr[0], delim_expr[1]);
|
|
#endif
|
|
extra->sesc = delim_expr[0];
|
|
extra->sdel = delim_expr[1];
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*EOF*/ /* vim: set cindent sw=4: */
|