Fix: broken strings if parsed file is large

Use unrolled linked list instead of vector
for storing text
This commit is contained in:
Nikita Ivanov 2022-06-11 11:13:04 +05:00
parent 88a2766221
commit 773df4c35a
No known key found for this signature in database
GPG Key ID: 6E656AC5B97B5133
5 changed files with 176 additions and 51 deletions

View File

@ -115,18 +115,13 @@ static int expect(enum TokenType type)
return STAT_ERR;
}
static inline char *get_str(Token tok)
{
return lexer_get_string(lexer, tok);
}
static int preview_type_ext(char **ext)
{
ACCEPT(TOK_DOT);
Token tok = token;
EXPECT(TOK_STR);
*ext = get_str(tok);
*ext = tok.val.s;
return STAT_OK;
}
@ -137,7 +132,7 @@ static int preview_type_mime_part(char **s)
Token t = token;
EXPECT(TOK_STR);
*s = get_str(t);
*s = t.val.s;
return STAT_OK;
}
@ -175,7 +170,7 @@ static int cmd_preview(void)
EXPECT(TOK_BLK_CLS);
add_preview(get_str(name), get_str(script), type, subtype, ext);
add_preview(name.val.s, script.val.s, type, subtype, ext);
return STAT_OK;
}
@ -187,9 +182,8 @@ static int cmd_priority(Token tok)
Token number = token;
int i = accept(TOK_INT) == STAT_OK ? number.val.i : 1;
char *name_str = get_str(name);
if (add_priority(name_str, i) != OK) {
PARSEERROR(name, "preview '%s' not found", name_str);
if (add_priority(name.val.s, i) != OK) {
PARSEERROR(name, "preview '%s' not found", name.val.s);
return STAT_ERR;
}
@ -200,10 +194,9 @@ static int cmd_remove(Token tok)
{
Token name = token;
EXPECT(TOK_STR);
char *name_str = get_str(name);
if (remove_preview(name_str) != OK) {
PARSEERROR(name, "preview '%s' not found", name_str);
if (remove_preview(name.val.s) != OK) {
PARSEERROR(name, "preview '%s' not found", name.val.s);
return STAT_ERR;
}
@ -215,15 +208,14 @@ static int command(void)
Token cmd = token;
EXPECT(TOK_STR);
char *cmd_str = get_str(cmd);
if (strcmp(cmd_str, "preview") == 0)
if (strcmp(cmd.val.s, "preview") == 0)
return cmd_preview();
else if (strcmp(cmd_str, "priority") == 0)
else if (strcmp(cmd.val.s, "priority") == 0)
return cmd_priority(cmd);
else if (strcmp(cmd_str, "remove") == 0)
else if (strcmp(cmd.val.s, "remove") == 0)
return cmd_remove(cmd);
PARSEERROR(cmd, "unknown command: %s", cmd_str);
PARSEERROR(cmd, "unknown command: %s", cmd.val.s);
return STAT_ERR;
}

View File

@ -3,7 +3,7 @@
#include "error.h"
#include "lexer.h"
#include "vector.h"
#include "ulist.h"
#define READ_PUNCT(c, t, s) read_punct((c), (t), (s), LEN(s) - 1)
@ -29,7 +29,7 @@ struct Lexer {
} tok_pos;
InputBuffer input_buf;
TokenQueue tok_queue;
VectorChar *text_buf;
UList *text_buf;
};
static char block_open[] = "{{",
@ -130,22 +130,17 @@ static void skipn_char(Lexer *ctx, int n)
static inline void add_text_buf(Lexer *ctx, char c)
{
vectorChar_append(ctx->text_buf, c);
ulist_append(ctx->text_buf, &c);
}
static inline char *get_text_buf_at(Lexer *ctx, size_t i)
static inline void record_text(Lexer *ctx)
{
return vector_get((Vector *)ctx->text_buf, i);
ulist_lock(ctx->text_buf);
}
static inline size_t get_text_buf_len(Lexer *ctx)
static inline char *get_text(Lexer *ctx)
{
return ctx->text_buf->len;
}
static inline void set_text_buf_len(Lexer *ctx, size_t len)
{
vectorChar_resize(ctx->text_buf, len);
return ulist_unlock(ctx->text_buf);
}
Lexer *lexer_init(FILE *f)
@ -158,7 +153,7 @@ Lexer *lexer_init(FILE *f)
}
init_input_buf(&ctx->input_buf, f);
ctx->text_buf = vectorChar_new(1024);
ctx->text_buf = ulist_new(sizeof(char), 1024);
ctx->line = 1;
ctx->col = 1;
ctx->tok_queue.back = 0;
@ -169,7 +164,7 @@ Lexer *lexer_init(FILE *f)
void lexer_free(Lexer *ctx)
{
vectorChar_free(ctx->text_buf);
ulist_free(ctx->text_buf);
free(ctx);
}
@ -256,11 +251,11 @@ static inline Token read_symbol(Lexer *ctx)
if (!isalpha(c))
return get_tok(ctx, TOK_NULL);
size_t p = get_text_buf_len(ctx);
record_text(ctx);
read_while(ctx, issymbol, 1);
Token tok = get_tok(ctx, TOK_STR);
tok.val.sp = p;
tok.val.s = get_text(ctx);
return tok;
}
@ -277,11 +272,9 @@ static inline Token read_int(Lexer *ctx)
if (!isdigit(peek_char(ctx)))
return get_tok(ctx, TOK_NULL);
size_t len = get_text_buf_len(ctx);
record_text(ctx);
read_while(ctx, isdigit, 1);
int i = atoi(get_text_buf_at(ctx, len));
set_text_buf_len(ctx, len);
int i = atoi(get_text(ctx));
if (!positive)
i *= -1;
@ -328,8 +321,7 @@ static Token read_block(Lexer *ctx)
if ((open_tok = read_block_open(ctx)).type == TOK_NULL)
return get_tok(ctx, TOK_NULL);
body_tok = get_tok(ctx, TOK_STR);
body_tok.val.sp = get_text_buf_len(ctx);
record_text(ctx);
while (1) {
close_tok = read_block_close(ctx);
@ -345,6 +337,10 @@ static Token read_block(Lexer *ctx)
}
add_text_buf(ctx, '\0');
body_tok = get_tok(ctx, TOK_STR);
body_tok.val.s = get_text(ctx);
add_token_queue(ctx, body_tok);
if (close_tok.type != TOK_NULL)
@ -397,14 +393,6 @@ Token lexer_get_token(Lexer *ctx)
return get_tok(ctx, TOK_ERR);
}
char *lexer_get_string(Lexer *ctx, Token tok)
{
if (tok.type != TOK_STR)
return NULL;
return get_text_buf_at(ctx, tok.val.sp);
}
char *lexer_token_type_str(enum TokenType type)
{
switch (type) {

View File

@ -27,14 +27,13 @@ typedef struct {
} type;
union {
int i;
size_t sp;
char *s;
} val;
} Token;
Lexer *lexer_init(FILE *f);
void lexer_free(Lexer *ctx);
Token lexer_get_token(Lexer *ctx);
char *lexer_get_string(Lexer *ctx, Token tok);
char *lexer_token_type_str(enum TokenType type);
#endif

127
src/ulist.c Normal file
View File

@ -0,0 +1,127 @@
#include <string.h>
#include "ulist.h"
#include "error.h"
/*
* Unrolled linked list
*/
#define DEFAULT_CAP 256
#define NO_LOCK -1
#define ULIST_NODE_SIZE(cap, size) \
(sizeof(struct UListNode) - sizeof(void *) + (cap * size))
#define ULIST_BUF(list) ((void *)&(list).buf)
struct UList {
size_t size;
ssize_t lock_i;
struct UListNode *head, *tail;
};
struct UListNode {
size_t len, cap;
struct UListNode *next;
void *buf;
};
static inline int is_locked(UList *l)
{
return l->lock_i != NO_LOCK;
}
static struct UListNode *ulist_node_new(UList *l, size_t cap)
{
struct UListNode *n;
if (cap == 0)
cap = DEFAULT_CAP;
if (!(n = malloc(ULIST_NODE_SIZE(cap, l->size)))) {
PRINTINTERR(FUNCFAILED("malloc"), ERRNOS);
abort();
}
n->cap = cap;
n->len = 0;
n->next = NULL;
return n;
}
UList *ulist_new(size_t size, size_t cap)
{
UList *l;
if (!(l = malloc(sizeof(*l)))) {
PRINTINTERR(FUNCFAILED("malloc"), ERRNOS);
abort();
}
l->size = size;
l->lock_i = NO_LOCK;
l->head = ulist_node_new(l, cap);
l->tail = l->head;
return l;
}
void ulist_free(UList *l)
{
struct UListNode *node = l->head, *next;
while (node) {
next = node->next;
free(node);
node = next;
}
free(l);
}
void ulist_append_arr(UList *l, void *arr, size_t len)
{
struct UListNode *new, *node = l->tail;
size_t cap = node->cap;
while (node->len + len > cap)
cap *= 2;
if (cap != node->cap) {
node->next = new = ulist_node_new(l, cap);
if (is_locked(l)) {
new->len += node->len - l->lock_i;
memcpy(ULIST_BUF(*new), ULIST_BUF(*node) + l->lock_i * l->size,
new->len * l->size);
node->len = l->lock_i;
l->lock_i = 0;
}
node = l->tail = new;
}
memcpy(ULIST_BUF(*node) + node->len * l->size, arr, len * l->size);
node->len += len;
}
void ulist_append(UList *l, void *val)
{
ulist_append_arr(l, val, 1);
}
void ulist_lock(UList *l)
{
l->lock_i = l->tail->len;
}
void *ulist_unlock(UList *l)
{
ssize_t i = l->lock_i;
l->lock_i = NO_LOCK;
return ULIST_BUF(*l->tail) + i * l->size;
}

19
src/ulist.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef ULIST_H
#define ULIST_H
#include <stdlib.h>
/*
* Unrolled linked list
*/
typedef struct UList UList;
UList *ulist_new(size_t size, size_t cap);
void ulist_free(UList *l);
void ulist_append_arr(UList *l, void *arr, size_t len);
void ulist_append(UList *l, void *val);
void ulist_lock(UList *l);
void *ulist_unlock(UList *l);
#endif