mirror of
https://github.com/NikitaIvanovV/ctpv.git
synced 2025-01-23 08:18:34 +01:00
Add lexer for config parsing
This commit is contained in:
parent
0ae355868d
commit
55cd11f545
363
lexer.c
Normal file
363
lexer.c
Normal file
@ -0,0 +1,363 @@
|
||||
#include <ctype.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "error.h"
|
||||
#include "lexer.h"
|
||||
#include "vector.h"
|
||||
|
||||
#define PARSEERROR(c, format, ...) \
|
||||
print_errorf("config parse error:%u:%u " format, (c).line, (c).col \
|
||||
__VA_OPT__(, ) __VA_ARGS__)
|
||||
|
||||
#define TOK_TYPE_ALIAS(t) ((Token){ .type = t })
|
||||
|
||||
#define NULL_TOK TOK_TYPE_ALIAS(TOK_NULL)
|
||||
#define EOF_TOK TOK_TYPE_ALIAS(TOK_EOF)
|
||||
#define END_TOK TOK_TYPE_ALIAS(TOK_END)
|
||||
#define ERR_TOK TOK_TYPE_ALIAS(TOK_ERR)
|
||||
|
||||
#define READ_PUNCT(c, t, s) read_punct((c), (t), (s), LEN(s) - 1)
|
||||
|
||||
typedef int (*Predicate)(int);
|
||||
|
||||
typedef struct {
|
||||
unsigned int pos, len, eof;
|
||||
FILE *f;
|
||||
char buf[1024];
|
||||
} InputBuffer;
|
||||
|
||||
typedef struct {
|
||||
unsigned int back, front;
|
||||
Token toks[16];
|
||||
} TokenQueue;
|
||||
|
||||
struct Lexer {
|
||||
unsigned int line, col;
|
||||
InputBuffer input_buf;
|
||||
TokenQueue tok_queue;
|
||||
VectorChar *text_buf;
|
||||
};
|
||||
|
||||
static char block_open[] = "{{{", block_close[] = "}}}";
|
||||
|
||||
static void add_token_queue(Lexer *ctx, Token tok)
|
||||
{
|
||||
ctx->tok_queue.toks[ctx->tok_queue.back] = tok;
|
||||
ctx->tok_queue.back = (ctx->tok_queue.back + 1) % LEN(ctx->tok_queue.toks);
|
||||
}
|
||||
|
||||
static Token remove_token_queue(Lexer *ctx)
|
||||
{
|
||||
Token tok = ctx->tok_queue.toks[ctx->tok_queue.front];
|
||||
ctx->tok_queue.front = (ctx->tok_queue.front + 1) % LEN(ctx->tok_queue.toks);
|
||||
return tok;
|
||||
}
|
||||
|
||||
static inline int is_empty_token_queue(Lexer *ctx)
|
||||
{
|
||||
return ctx->tok_queue.back == ctx->tok_queue.front;
|
||||
}
|
||||
|
||||
static void init_input_buf(InputBuffer *b, FILE *f)
|
||||
{
|
||||
b->pos = 0;
|
||||
b->len = 0;
|
||||
b->eof = 0;
|
||||
b->f = f;
|
||||
}
|
||||
|
||||
static int peekn_char(Lexer *ctx, unsigned int i)
|
||||
{
|
||||
InputBuffer *b = &ctx->input_buf;
|
||||
|
||||
if (b->pos + i < b->len)
|
||||
goto exit;
|
||||
|
||||
if (b->eof || (i > 0 && i >= b->len))
|
||||
return -1;
|
||||
|
||||
if (i > 0) {
|
||||
assert(i < LEN(b->buf));
|
||||
memmove(b->buf, b->buf + (b->len - i) * sizeof(*b->buf),
|
||||
i * sizeof(*b->buf));
|
||||
}
|
||||
|
||||
b->pos = 0;
|
||||
b->len = fread(b->buf + i * sizeof(*b->buf), sizeof(*b->buf),
|
||||
LEN(b->buf) - i, b->f);
|
||||
|
||||
if (b->len != LEN(b->buf)) {
|
||||
if (feof(b->f))
|
||||
b->eof = 1;
|
||||
else if (ferror(b->f))
|
||||
PRINTINTERR("fread() failed");
|
||||
|
||||
if (b->len == 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
exit:
|
||||
return b->buf[b->pos + i];
|
||||
}
|
||||
|
||||
static inline char peek_char(Lexer *ctx)
|
||||
{
|
||||
return peekn_char(ctx, 0);
|
||||
}
|
||||
|
||||
static char nextn_char(Lexer *ctx, unsigned int i)
|
||||
{
|
||||
char c = peekn_char(ctx, i);
|
||||
|
||||
ctx->col++;
|
||||
|
||||
if (c == '\n') {
|
||||
ctx->col = 1;
|
||||
ctx->line++;
|
||||
}
|
||||
|
||||
ctx->input_buf.pos++;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline char next_char(Lexer *ctx)
|
||||
{
|
||||
return nextn_char(ctx, 0);
|
||||
}
|
||||
|
||||
static void skipn_char(Lexer *ctx, int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
next_char(ctx);
|
||||
}
|
||||
|
||||
static inline void add_text_buf(Lexer *ctx, char c)
|
||||
{
|
||||
vectorChar_append(ctx->text_buf, c);
|
||||
}
|
||||
|
||||
static inline char *get_text_buf_at(Lexer *ctx, size_t i)
|
||||
{
|
||||
return vector_get((Vector *)ctx->text_buf, i);
|
||||
}
|
||||
|
||||
static inline size_t get_text_buf_len(Lexer *ctx)
|
||||
{
|
||||
return ctx->text_buf->len;
|
||||
}
|
||||
|
||||
static inline void set_text_buf_len(Lexer *ctx, size_t len)
|
||||
{
|
||||
vectorChar_resize(ctx->text_buf, len);
|
||||
}
|
||||
|
||||
Lexer *lexer_init(FILE *f)
|
||||
{
|
||||
Lexer *ctx;
|
||||
|
||||
if (!(ctx = malloc(sizeof(*ctx)))) {
|
||||
PRINTINTERR(FUNCFAILED("malloc"), ERRNOS);
|
||||
abort();
|
||||
}
|
||||
|
||||
init_input_buf(&ctx->input_buf, f);
|
||||
ctx->text_buf = vectorChar_new(1024);
|
||||
ctx->line = 1;
|
||||
ctx->col = 1;
|
||||
ctx->tok_queue.back = 0;
|
||||
ctx->tok_queue.front = 0;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void lexer_free(Lexer *ctx)
|
||||
{
|
||||
vectorChar_free(ctx->text_buf);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
static int cmp_nextn(Lexer *ctx, int n, char *s)
|
||||
{
|
||||
int i = 0;
|
||||
char c;
|
||||
|
||||
while (1) {
|
||||
c = peekn_char(ctx, i);
|
||||
if (i >= n || *s == '\0' || c != *s)
|
||||
break;
|
||||
|
||||
s += sizeof(*s);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i == n)
|
||||
return 0;
|
||||
else
|
||||
return ((unsigned char)c - *(unsigned char *)s);
|
||||
}
|
||||
|
||||
static void read_while(Lexer *ctx, Predicate p, int add)
|
||||
{
|
||||
char c;
|
||||
|
||||
while (1) {
|
||||
c = peek_char(ctx);
|
||||
|
||||
if (c < 0 || !p(c))
|
||||
break;
|
||||
|
||||
if (add)
|
||||
add_text_buf(ctx, c);
|
||||
|
||||
next_char(ctx);
|
||||
}
|
||||
|
||||
if (add)
|
||||
add_text_buf(ctx, '\0');
|
||||
}
|
||||
|
||||
static inline Token read_eof(Lexer *ctx)
|
||||
{
|
||||
char c = peek_char(ctx);
|
||||
|
||||
if (c >= 0)
|
||||
return NULL_TOK;
|
||||
|
||||
next_char(ctx);
|
||||
|
||||
return EOF_TOK;
|
||||
}
|
||||
|
||||
static inline Token read_newline(Lexer *ctx)
|
||||
{
|
||||
char c = peek_char(ctx);
|
||||
|
||||
if (c != '\n')
|
||||
return NULL_TOK;
|
||||
|
||||
next_char(ctx);
|
||||
|
||||
return END_TOK;
|
||||
}
|
||||
|
||||
static inline Token read_symbol(Lexer *ctx)
|
||||
{
|
||||
char c = peek_char(ctx);
|
||||
|
||||
if (!isalpha(c))
|
||||
return NULL_TOK;
|
||||
|
||||
size_t p = get_text_buf_len(ctx);
|
||||
read_while(ctx, isalnum, 1);
|
||||
|
||||
return (Token){ TOK_STR, { .sp = p } };
|
||||
}
|
||||
|
||||
static inline Token read_digit(Lexer *ctx)
|
||||
{
|
||||
char c = peek_char(ctx);
|
||||
|
||||
if (!isdigit(c))
|
||||
return NULL_TOK;
|
||||
|
||||
size_t len = get_text_buf_len(ctx);
|
||||
read_while(ctx, isdigit, 1);
|
||||
|
||||
int i = atoi(get_text_buf_at(ctx, len));
|
||||
set_text_buf_len(ctx, len);
|
||||
|
||||
return (Token){ TOK_INT, { .i = i } };
|
||||
}
|
||||
|
||||
static Token read_punct(Lexer *ctx, int type, char *s, int n)
|
||||
{
|
||||
Token tok;
|
||||
|
||||
if (peek_char(ctx) < 0)
|
||||
return EOF_TOK;
|
||||
|
||||
int ret = cmp_nextn(ctx, n, s);
|
||||
|
||||
if (ret == 0)
|
||||
tok.type = type;
|
||||
else
|
||||
return NULL_TOK;
|
||||
|
||||
skipn_char(ctx, n);
|
||||
|
||||
return tok;
|
||||
}
|
||||
|
||||
static inline Token read_block_open(Lexer *ctx)
|
||||
{
|
||||
return READ_PUNCT(ctx, TOK_BLK_OPEN, block_open);
|
||||
}
|
||||
|
||||
static inline Token read_block_close(Lexer *ctx)
|
||||
{
|
||||
return READ_PUNCT(ctx, TOK_BLK_CLS, block_close);
|
||||
}
|
||||
|
||||
static Token read_block(Lexer *ctx)
|
||||
{
|
||||
Token open_tok, body_tok, close_tok;
|
||||
|
||||
if ((open_tok = read_block_open(ctx)).type == TOK_NULL)
|
||||
return NULL_TOK;
|
||||
|
||||
body_tok = (Token){ TOK_STR, { .sp = get_text_buf_len(ctx) } };
|
||||
|
||||
while (1) {
|
||||
close_tok = read_block_close(ctx);
|
||||
|
||||
if (close_tok.type == TOK_EOF) {
|
||||
PARSEERROR(*ctx, "unclosed block");
|
||||
return ERR_TOK;
|
||||
} else if (close_tok.type != TOK_NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
add_text_buf(ctx, next_char(ctx));
|
||||
}
|
||||
|
||||
add_text_buf(ctx, '\0');
|
||||
add_token_queue(ctx, body_tok);
|
||||
|
||||
if (close_tok.type != TOK_NULL)
|
||||
add_token_queue(ctx, close_tok);
|
||||
|
||||
return open_tok;
|
||||
}
|
||||
|
||||
#define ATTEMPT_READ(c, func) \
|
||||
do { \
|
||||
Token t = (func)(c); \
|
||||
if (t.type != TOK_NULL) \
|
||||
return t; \
|
||||
} while (0)
|
||||
|
||||
Token lexer_get_token(Lexer *ctx)
|
||||
{
|
||||
if (!is_empty_token_queue(ctx))
|
||||
return remove_token_queue(ctx);
|
||||
|
||||
read_while(ctx, isblank, 0);
|
||||
|
||||
ATTEMPT_READ(ctx, read_eof);
|
||||
ATTEMPT_READ(ctx, read_newline);
|
||||
ATTEMPT_READ(ctx, read_symbol);
|
||||
ATTEMPT_READ(ctx, read_digit);
|
||||
ATTEMPT_READ(ctx, read_block);
|
||||
|
||||
PARSEERROR((*ctx), "cannot handle character: %c", peek_char(ctx));
|
||||
return ERR_TOK;
|
||||
}
|
||||
|
||||
char *lexer_get_string(Lexer *ctx, Token tok)
|
||||
{
|
||||
if (tok.type != TOK_STR)
|
||||
return NULL;
|
||||
|
||||
return get_text_buf_at(ctx, tok.val.sp);
|
||||
}
|
31
lexer.h
Normal file
31
lexer.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct Lexer Lexer;
|
||||
|
||||
typedef struct {
|
||||
enum {
|
||||
TOK_NULL,
|
||||
TOK_EOF,
|
||||
TOK_ERR,
|
||||
TOK_END,
|
||||
TOK_BLK_OPEN,
|
||||
TOK_BLK_CLS,
|
||||
TOK_INT,
|
||||
TOK_STR,
|
||||
} type;
|
||||
union {
|
||||
int i;
|
||||
size_t sp;
|
||||
} val;
|
||||
} Token;
|
||||
|
||||
Lexer *lexer_init(FILE *f);
|
||||
void lexer_free(Lexer *ctx);
|
||||
Token lexer_get_token(Lexer *ctx);
|
||||
char *lexer_get_string(Lexer *ctx, Token tok);
|
||||
|
||||
#endif
|
10
vector.c
10
vector.c
@ -58,17 +58,21 @@ static void resize_if_needed(Vector *vec, size_t new_len)
|
||||
vec->cap = cap;
|
||||
}
|
||||
|
||||
void vector_append_arr(Vector *vec, void *arr, size_t len)
|
||||
size_t vector_append_arr(Vector *vec, void *arr, size_t len)
|
||||
{
|
||||
size_t old_len = vec->len;
|
||||
|
||||
resize_if_needed(vec, vec->len + len);
|
||||
|
||||
memcpy(vec->buf + vec->len * vec->size, arr, len * vec->size);
|
||||
vec->len += len;
|
||||
|
||||
return old_len;
|
||||
}
|
||||
|
||||
void vector_append(Vector *vec, void *val)
|
||||
size_t vector_append(Vector *vec, void *val)
|
||||
{
|
||||
vector_append_arr(vec, val, 1);
|
||||
return vector_append_arr(vec, val, 1);
|
||||
}
|
||||
|
||||
void *vector_get(Vector *vec, size_t i)
|
||||
|
16
vector.h
16
vector.h
@ -18,9 +18,9 @@
|
||||
|
||||
#define VECTOR_SIGN_NEW(name, type) VECTOR_SIGN(name, type, new, Vector##name *, size_t cap)
|
||||
#define VECTOR_SIGN_FREE(name, type) VECTOR_SIGN_V(name, type, free, void)
|
||||
#define VECTOR_SIGN_APPEND_ARR(name, type) VECTOR_SIGN_V(name, type, append_arr, void, type *arr, size_t len)
|
||||
#define VECTOR_SIGN_APPEND(name, type) VECTOR_SIGN_V(name, type, append, void, type val)
|
||||
#define VECTOR_SIGN_GET(name, type) VECTOR_SIGN_V(name, type, get, type *, size_t i)
|
||||
#define VECTOR_SIGN_APPEND_ARR(name, type) VECTOR_SIGN_V(name, type, append_arr, size_t, type *arr, size_t len)
|
||||
#define VECTOR_SIGN_APPEND(name, type) VECTOR_SIGN_V(name, type, append, size_t, type val)
|
||||
#define VECTOR_SIGN_GET(name, type) VECTOR_SIGN_V(name, type, get, type, size_t i)
|
||||
#define VECTOR_SIGN_RESIZE(name, type) VECTOR_SIGN_V(name, type, resize, void, size_t len)
|
||||
|
||||
#define VECTOR_GEN_SOURCE_(name, type, spec) \
|
||||
@ -34,15 +34,15 @@
|
||||
} \
|
||||
inline spec VECTOR_SIGN_APPEND_ARR(name, type) \
|
||||
{ \
|
||||
vector_append_arr((Vector *)vec, arr, len); \
|
||||
return vector_append_arr((Vector *)vec, arr, len); \
|
||||
} \
|
||||
inline spec VECTOR_SIGN_APPEND(name, type) \
|
||||
{ \
|
||||
vector_append((Vector *)vec, &val); \
|
||||
return vector_append((Vector *)vec, &val); \
|
||||
} \
|
||||
inline spec VECTOR_SIGN_GET(name, type) \
|
||||
{ \
|
||||
return (type *)vector_get((Vector *)vec, i); \
|
||||
return *(type *)vector_get((Vector *)vec, i); \
|
||||
} \
|
||||
inline spec VECTOR_SIGN_RESIZE(name, type) \
|
||||
{ \
|
||||
@ -67,8 +67,8 @@ VECTOR_TYPE(, void);
|
||||
|
||||
Vector *vector_new(size_t size, size_t cap);
|
||||
void vector_free(Vector *vec);
|
||||
void vector_append_arr(Vector *vec, void *arr, size_t len);
|
||||
void vector_append(Vector *vec, void *arr);
|
||||
size_t vector_append_arr(Vector *vec, void *arr, size_t len);
|
||||
size_t vector_append(Vector *vec, void *arr);
|
||||
void *vector_get(Vector *vec, size_t i);
|
||||
void vector_resize(Vector *vec, size_t len);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user