aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <kartofen.mail.0@protonmail.com>2025-08-26 01:17:10 +0300
committerkartofen <kartofen.mail.0@protonmail.com>2025-08-26 01:17:10 +0300
commit46e786db9d1b48b8fbc3502e36f093b755f3e09f (patch)
tree9e279216e68f3fe4b0849d1e07184fe674dc551f
parent1c83c514c8108fccfec9764da5e4563b98eb871b (diff)
grammar for the grammar and lexing and parsing of a new language lbp
-rw-r--r--README.md17
-rwxr-xr-xbuild.sh26
-rw-r--r--demos/sample-files/gram-defs.c23
-rw-r--r--demos/sample-files/gram-skeleton.c12
-rw-r--r--demos/sample-files/gram.g42
-rw-r--r--demos/sample-files/lbp-code.lbp40
-rw-r--r--demos/sample-files/lbp-skeleton.c258
-rw-r--r--demos/sample-files/lbp.g39
-rw-r--r--lr-parser.c23
-rw-r--r--util/dict.h2
-rw-r--r--util/queue.h46
11 files changed, 496 insertions, 32 deletions
diff --git a/README.md b/README.md
index 5dade64..9e853ba 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,24 @@
### Parser things
-This repo has simple implementations of concepts seen in parsing
+This repo has simple implementations of concepts seen in parsing
and compiler design, written in a way to be easily integrated and
combined while each file/concept has a standalone example.
-The main example if this is a simple compiler generator where the
+The main example if this is a simple compiler generator where the
table to be generated (lalr, clr, slr) and the definitions are loaded
as shared libraries.
The idea is to extend it to many types of table generation, parsing
-techniques and ways to add semanitic meaning.
+techniques and ways to add semanitic meaning.
### TODO
-#### NOW:
-
-- Deal with conflicts (copy lemon for precedence)
-- EBNF parser to get the whatever-def.c file
-
#### Goals
+- The LR parser implementation is very dirty and bad
+- The building process is too compilated, the grammar parser
+ should also do the table generation
+
- Proper LALR generation
- LL table generation and parsing
- Possibly recursive ascent and recursive descent generation (a bit pointless)
@@ -27,7 +26,7 @@ techniques and ways to add semanitic meaning.
- Proper attribute grammar implementation, evaluation, and dealing with cycles
- (S)GLR - Scannerless Generalized LR (Masaru Tomita)
- It would be good to implemented a compiler of C language (C, B, BCPL),
- and a high level language, maybe Prolog or something mine and with all
+ and a high level language, maybe Prolog or something mine and with all
beingfairly optimized
### Buildling
diff --git a/build.sh b/build.sh
index ea1daef..ae18cd0 100755
--- a/build.sh
+++ b/build.sh
@@ -80,12 +80,24 @@ shared demos/sample-files/gram-defs
leak generate-parser "-o bin/gram -t lalr-table bin/gram-defs.so"
cc demos/sample-files/gram-skeleton "" gram-parser
-leak gram-parser < demos/sample-files/calc.g > bin/calc-gram.c
+# leak gram-parser < demos/sample-files/gram.g > bin/gram-gram.c
+# shared bin/gram-gram
+# leak generate-parser "-o bin/gram -t lalr-table bin/gram-gram.so"
+# cc demos/sample-files/gram-skeleton "" gram2-parser
-shared bin/calc-gram
-leak generate-parser "-o bin/calc -t lalr-table bin/calc-gram.so"
-cc demos/sample-files/calc-skeleton "" calc-parser
+# leak gram2-parser < demos/sample-files/calc.g > bin/calc-gram.c
-leak calc-parser "13*10+9"
-leak calc-parser "-13+20"
-leak calc-parser "1>52?2+3:53"
+# shared bin/calc-gram
+# leak generate-parser "-o bin/calc -t lalr-table bin/calc-gram.so"
+# cc demos/sample-files/calc-skeleton "" calc-parser
+
+# leak calc-parser "13*10+9"
+# leak calc-parser "-13+20"
+# leak calc-parser "1>52?2+3:53"
+
+leak gram-parser < demos/sample-files/lbp.g > bin/lbp-gram.c
+shared bin/lbp-gram
+leak generate-parser "-o bin/lbp -t lalr-table bin/lbp-gram.so"
+
+cc demos/sample-files/lbp-skeleton "util/dict.c" lbp-parser
+leak lbp-parser < demos/sample-files/lbp-code.lbp
diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c
index 49329bd..b1ae268 100644
--- a/demos/sample-files/gram-defs.c
+++ b/demos/sample-files/gram-defs.c
@@ -1,13 +1,13 @@
#include "util/util.h"
-#define SYMBOLS(X) \
- X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) \
- X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
- X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
- \
- X(Sp) X(A) X(B) X(C) \
- X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \
- X(Actionlist) X(Idenlist) X(IorNlist) \
- X(SYMBOLS_END) \
+#define SYMBOLS(X) \
+ X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) X(NOPREC) \
+ X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
+ X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
+ \
+ X(S) X(A) X(B) X(C) \
+ X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \
+ X(Actionlist) X(Idenlist) X(IorNlist) \
+ X(SYMBOLS_END) \
#include "parts/symbol.h"
enum symbol { SYMBOLS(X_TO_ENUM) };
@@ -15,20 +15,21 @@ size_t total_symbols = SYMBOLS_END;
char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) };
-IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; }
+IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < S; }
IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
#include "parts/grammar.h"
#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
#define GRAMMAR_ACTION_DEF(X) \
- X(PROD(Sp, -->, A, B, C, END_INPUT), "") \
+ X(PROD(S, -->, A, B, C, END_INPUT), "") \
\
X(PROD(A, -->, TERMINAL, Idenlist, \
SEMICOL, NONTERM, Idenlist, DOT), \
"handle_type(A(1), A(4))") \
\
X(PROD(B, -->, Preclist), "handle_prec(A(0));") \
+ X(PROD(B, -->, NOPREC, DOT), "handle_prec(NULL);") \
X(PROD(Preclist, -->, Prec, SEMICOL, Preclist), \
"v = list_new_head(A(2), A(0));") \
X(PROD(Preclist, -->, Prec, DOT), "v = A(0);") \
diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c
index 7a54548..4e40c14 100644
--- a/demos/sample-files/gram-skeleton.c
+++ b/demos/sample-files/gram-skeleton.c
@@ -5,7 +5,7 @@
#include <ctype.h>
#define INPUT_CAP 4096
-#define ARENA_CAP 4096
+#define ARENA_CAP 4096*2
#define ARENA_IMPLEMENTATION
#include "util/arena.h"
@@ -115,6 +115,13 @@ void handle_prec(struct list_head *preclist)
printf(" int *list;\n");
printf(" size_t nlist;\n");
printf("};\n");
+
+ if(!preclist) {
+ printf("struct precedence_def *precedence_defs = NULL;\n");
+ printf("size_t nprecedence_defs = 0;\n");
+ return;
+ }
+
printf("struct precedence_def *precedence_defs = (struct precedence_def[]){\n");
list_for_each_entry(struct prec_entry, entry, list, preclist) {
printf("{ %d, (int[]){", entry->flag);
@@ -211,7 +218,7 @@ int main(void)
intptr_t value;
if(lr_parser(&value)) {
- printf(input);
+ fprintf(stderr, input);
return 1;
}
@@ -274,6 +281,7 @@ static char *next_token(char *str)
else if(strcmp(s, "nonterminal") == 0) tok.s = NONTERM;
else if(strcmp(s, "left") == 0) tok.s = LEFT;
else if(strcmp(s, "right") == 0) tok.s = RIGHT;
+ else if(strcmp(s, "noprec") == 0) tok.s = NOPREC;
else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; }
break;
case '{':
diff --git a/demos/sample-files/gram.g b/demos/sample-files/gram.g
new file mode 100644
index 0000000..f9daded
--- /dev/null
+++ b/demos/sample-files/gram.g
@@ -0,0 +1,42 @@
+-terminal
+ TERMINAL NONTERM LEFT RIGHT COLON NOPREC
+ PIPE SEMICOL DOT
+ IDEN NUM ACTION;
+-nonterminal
+ S A B C
+ Type Prec Prod Preclist Prodlist
+ Actionlist Idenlist IorNlist.
+
+-noprec.
+
+S: A B C {};
+
+A: TERMINAL Idenlist SEMICOL NONTERM Idenlist DOT { handle_type(A(1), A(4)) };
+
+
+B: Preclist { handle_prec(A(0)); }
+ | NOPREC DOT { handle_prec(NULL); };
+
+Preclist: Prec SEMICOL Preclist { v = list_new_head(A(2), A(0)); }
+ | Prec DOT { v = A(0); };
+
+Prec: LEFT IorNlist { v = prec_new(A(1), PRECEDENCE_LEFT_ASSOC); }
+ | RIGHT IorNlist { v = prec_new(A(1), PRECEDENCE_RIGHT_ASSOC); };
+
+
+C: Prodlist { handle_prod(A(0)); };
+
+Prodlist: Prod SEMICOL Prodlist { v = list_new_head(A(2), A(0)); }
+ | Prod DOT { v = A(0); };
+
+Prod: IDEN COLON Actionlist { v = prod_new(A(0), A(2)); };
+
+Actionlist: Idenlist ACTION PIPE Actionlist { v = list_new_head(A(3), action_new(A(0), A(1))); }
+ | Idenlist ACTION { v = action_new(A(0), A(1)); };
+
+Idenlist: IDEN Idenlist { v = list_new_head(A(1), ptr_new(A(0))); }
+ | IDEN { v = ptr_new(A(0)); };
+IorNlist: IDEN IorNlist { v = list_new_head(A(1), ptr_new(A(0))); }
+ | IDEN { v = ptr_new(A(0)); }
+ | NUM IorNlist { v = list_new_head(A(1), num_new(A(0))); }
+ | NUM { v = num_new(A(0)); }.
diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp
new file mode 100644
index 0000000..df5bdcc
--- /dev/null
+++ b/demos/sample-files/lbp-code.lbp
@@ -0,0 +1,40 @@
+inbounds/int-function(low, high, val) {
+ > val low, < val high.
+},
+
+:downlink_fmt/enum {
+ (17 |_, :EXTENDED_SQUITTER);
+ (18 |_, :NON_TRANSPONDER).
+},
+
+:type_code/enum {
+ (inbounds 1 4 |_, :AIRCRAFT_IDEN);
+ (inbounds 5 8 |_, :SURFACE_POS);
+ (inbounds 9 18 |_, :AIR_POS);
+ (inbounds 20 22 |_, ---);
+ (19 |_, ---);
+ (28 |_, ---);
+ (29 |_, ---);
+ (31 |_, ---).
+},
+
+:aircraft_iden/struct {
+-.
+},
+
+:message/struct {
+ DF/enum(:downlinkfmt) |5,
+ CA/enum(:capabilities) |3,
+ ICAO/int-big |24,
+ TC/enum(:type_code) |5,
+
+ ((TC :type_code:AIRCRAFT_IDEN, aircraft_iden/struct(:aircraft_iden));
+ (TC :type_code:SURFACE_POS, surface_pos/struct {
+ POS |1,
+ - |_.
+ });
+ (TC :type_code:AIR_POS, air_pos/struct(:air_pos))
+ ) |51,
+
+ CRC |24.
+}.
diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c
new file mode 100644
index 0000000..ae0a17f
--- /dev/null
+++ b/demos/sample-files/lbp-skeleton.c
@@ -0,0 +1,258 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+
+// TODO: lr parser is bad for debugging
+
+#define INPUT_CAP 4096
+#define ARENA_CAP 4096
+
+#define ARENA_IMPLEMENTATION
+#include "util/arena.h"
+
+static char buf[ARENA_CAP];
+static struct arena_ctx global_arena;
+static void *xalloc(size_t sz) {
+ void *addr = arena_allocate(&global_arena, sz);
+ if(!addr) {
+ fprintf(stderr, "ERROR: Arena empty\n"); exit(1);
+ }
+
+ return addr;
+}
+
+// other things here
+#include "util/list.h"
+static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new)
+{
+ if(head) list_add(new, head);
+ return new;
+}
+
+#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new)
+
+// generated
+#include "bin/lbp.h"
+#include "bin/lbp.c"
+
+#include "util/dict.h"
+static struct dict types_dict;
+static struct string_token types_strings[] = {
+ {"int", T_INT},
+ {"enum", T_ENUM},
+ {"struct", T_STRUCT},
+ {"function", ST_FUNCTION},
+ {"big", ST_BIG},
+ {"little", ST_LITTLE},
+ {"native", ST_NATIVE},
+};
+static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings);
+static uint8_t dict_lowercase_char_to_bit[256] = {
+ ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
+ ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
+ ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
+ ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
+ ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
+};
+
+
+#include "parts/toklist.h"
+struct token {
+ symbol s;
+ intptr_t v;
+};
+
+#include "util/queue.h"
+QUEUE_GENERATE(tokbuf, struct token, 16)
+
+symbol token_sym(struct token *t) { return t->s; }
+intptr_t token_val(struct token *t) { return t->v; }
+
+static void print_token(struct token *t);
+static char *next_token(char *str);
+
+static char *input;
+
+struct token *toklist_eat()
+{
+ static struct token t;
+ tokbuf_dequeue(&t); // err not checked
+ if(tokbuf_empty()) input = next_token(input);
+ return &t;
+}
+
+struct token *toklist_peek() {
+ static struct token t;
+ tokbuf_peek(&t); // err not checked
+ return &t;
+}
+
+// #define _LR_PARSER_DEBUG
+#include "lr-parser.c"
+
+int main(void)
+{
+ static char input_buf[INPUT_CAP];
+ if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) {
+ fprintf(stderr, "INPUT_CAP reached\n");
+ return 1;
+ }
+
+ global_arena = ARENA_CTX_INIT(buf, ARENA_CAP);
+
+ types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit);
+ dict_compile(&types_dict);
+
+ input = next_token(input_buf);
+
+ // while(1) {
+ // struct token *tok = toklist_eat();
+ // print_token(tok);
+ // if(token_sym(tok) == END_INPUT) break;
+ // } return 0;
+
+ intptr_t value;
+ if(lr_parser(&value)) {
+ fprintf(stderr, input);
+ return 1;
+ }
+
+ fprintf(stderr, "OUTPUT: %jd\n", value);
+
+ dict_free(&types_dict);
+ return 0;
+}
+
+static void print_token(struct token *tok)
+{
+ printf("%s\n", symbol_to_str[token_sym(tok)]);
+ if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok));
+}
+
+// STR UTIL
+
+#define strdup(...) _strdup(__VA_ARGS__)
+static inline char *_strdup(char *str)
+{
+ return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1);
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+ static char sub[128];
+ if(!str) return sub;
+
+ if(sub_end+1 > sizeof(sub)) return NULL;
+
+ sub[sub_end] = '\0';
+ return memcpy(sub, str, sub_end);
+}
+
+static inline size_t tillch(char *str, size_t len, char ch)
+{
+ for(size_t i = 0; i < len; i++) if(str[i] == ch) return i;
+ return len;
+}
+
+// LEXER
+
+static inline int issep(char c)
+{
+ return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' ||
+ c == '.' || c == '(' || c == ')' || c == '{' || c == '}';
+}
+
+static inline int tillsep(char *str)
+{
+ size_t i = 0;
+ while(!issep(str[i++]));
+ return i-1;
+}
+
+static char *typelist_tokenize(char *str)
+{
+ size_t off = 0;
+ while(!issep(str[off]) && str[off] != '-') off++;
+
+ if(off > 0) {
+ int s = dict_check(&types_dict, substring(str, off));
+ if(s < 0) {
+ fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0));
+ return NULL;
+ }
+
+ tokbuf_enqueue(&(struct token){.s = s, .v = s});
+ }
+
+ str += off;
+
+ switch(str[0]) {
+ case '-': return typelist_tokenize(str+1);
+ case '(':
+ while((str = next_token(str)))
+ if(*(str-1)== ')') { // not really
+ if(str[0] == '-') return typelist_tokenize(str+1);
+ else return str;
+ }
+ return NULL;
+ default: return str;
+ }
+}
+
+static char *next_token(char *str)
+{
+ if(!str) return str;
+
+ struct token tok = {0};
+ size_t off = 0;
+ char c0 = str[0];
+
+ if(c0 == '\0') tok.s = END_INPUT;
+ if(isspace(c0)) return next_token(str+1);
+ else {
+ off = tillsep(str);
+ if(off == 0) { // sep
+ switch(str[off++]) {
+ case ',': tok.s = COMMA; break;
+ case ';': tok.s = SEMICOL; break;
+ case '.': tok.s = DOT; break;
+ case '(': tok.s = LPAREN; break;
+ case ')': tok.s = RPAREN; break;
+ case '{': tok.s = LBRACE; break;
+ case '}': tok.s = RBRACE; break;
+ case '/':
+ tok.s = TYPELIST_START; tokbuf_enqueue(&tok);
+ if(!(str = typelist_tokenize(str+off))) goto fail;
+ tok.s = TYPELIST_END; tokbuf_enqueue(&tok);
+ return str;
+ default: break;
+ }
+ } else if(c0 >= '0' && c0 <= '9') { // num
+ tok.s = NUM;
+ tok.v = (intptr_t)atoi(substring(str, off)); // not really
+ } else { // iden or atom (possibly with fields)
+ int hasfield = 0;
+ size_t sub_off;
+
+ do {
+ sub_off = tillch(str + 1, off - 1, ':') + 1;
+ if(hasfield)
+ tokbuf_enqueue(&(struct token){.s = COLON, .v = 0});
+
+ tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN,
+ .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))});
+ } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0);
+
+ return str;
+ }
+ }
+
+ tokbuf_enqueue(&tok);
+ return str+off;
+
+fail:
+ tokbuf_enqueue(&(struct token){.s = END_INPUT});
+ return NULL;
+}
diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g
new file mode 100644
index 0000000..bc82cb3
--- /dev/null
+++ b/demos/sample-files/lbp.g
@@ -0,0 +1,39 @@
+-terminal NUM IDEN ATOM
+ COMMA SEMICOL DOT COLON
+ RPAREN LPAREN RBRACE LBRACE
+
+ TYPELIST_START TYPELIST_END
+ T_INT T_ENUM T_STRUCT
+ ST_FUNCTION ST_BIG ST_LITTLE ST_NATIVE;
+
+-nonterminal S exprlist expr sym fieldlist basetype subtypelist.
+
+-left LPAREN;
+-left COMMA SEMICOL.
+
+S: exprlist DOT {};
+
+exprlist: expr {}
+ | exprlist expr {}
+ | exprlist COMMA exprlist {}
+ | exprlist SEMICOL exprlist {};
+
+expr: NUM {}
+ | sym {}
+ | sym fieldlist {}
+ | sym TYPELIST_START basetype TYPELIST_END {}
+ | sym TYPELIST_START basetype subtypelist TYPELIST_END {}
+ | LBRACE exprlist DOT RBRACE {}
+ | LPAREN exprlist RPAREN {};
+
+sym: IDEN {} | ATOM {};
+
+fieldlist: COLON IDEN {}
+ | fieldlist fieldlist {};
+
+basetype: T_INT {}
+ | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {}
+ | T_ENUM {} | T_ENUM LPAREN ATOM RPAREN {};
+subtypelist: ST_FUNCTION LPAREN exprlist RPAREN {}
+ | ST_BIG {} | ST_LITTLE {} | ST_NATIVE {}
+ | subtypelist subtypelist {}.
diff --git a/lr-parser.c b/lr-parser.c
index bca8a52..336c222 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -21,11 +21,19 @@ typedef intmax_t stack_item;
static stack_item stack_bottom[STACK_CAP];
static stack_item *stack_head = stack_bottom;
+static void print_stack()
+{
+ fprintf(stderr, "STACK: { ");
+ for(stack_item *s = stack_bottom+1; s <= stack_head; s += 3)
+ fprintf(stderr, "%s ", symbol_to_str[*(symbol *)s]);
+ fprintf(stderr, "}\n\n");
+}
+
int lr_parser(intptr_t *value)
{
#define push(item) do { \
if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \
- else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); return 1; } \
+ else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); print_stack(); return 1; } \
} while(0)
#define pop() (--stack_head)
#define eat() toklist_eat()
@@ -41,6 +49,10 @@ int lr_parser(intptr_t *value)
push(token_sym(t));
push(token_val(t));
push(a.arg);
+#ifdef _LR_PARSER_DEBUG
+ fprintf(stderr, "SHIFT %s\n", symbol_to_str[token_sym(t)]);
+ print_stack();
+#endif
break;
case ACTION_REDUCE:
intptr_t semantic_value = semantic_actions[a.arg](stack_head);
@@ -58,6 +70,9 @@ int lr_parser(intptr_t *value)
push(lhs);
push(semantic_value);
push(a_goto.arg);
+#ifdef _LR_PARSER_DEBUG
+ fprintf(stderr, "READUCE %s\n", symbol_to_str[lhs]);
+#endif
break;
case ACTION_ACCEPT:
for(size_t i = 0; i < 3; i++) push(0); // todo: better fix for reducing the final production expecting an END_INPUT on the stack
@@ -66,8 +81,10 @@ int lr_parser(intptr_t *value)
case ACTION_NOT_SET:
default:
fprintf(stderr,
- "ERROR: Unexpected symbol '%d' at state %zu\n",
- token_sym(peek()), (size_t)*stack_head);
+ "ERROR: Unexpected symbol '%s' at state %zu\n",
+ symbol_to_str[token_sym(peek())], (size_t)*stack_head);
+ // Expected ...
+ print_stack();
return 1;
}
}
diff --git a/util/dict.h b/util/dict.h
index 109c07a..2da8e6f 100644
--- a/util/dict.h
+++ b/util/dict.h
@@ -28,6 +28,8 @@ struct dict {
size_t num_levels;
};
+#define DICT_INIT(strings_, nstrings_, char_to_bit_) (struct dict){.strings = strings_, .nstrings = nstrings_, .char_to_bit = char_to_bit_}
+
int dict_compile(struct dict *d);
void dict_free(struct dict *d);
void dict_print(struct dict *d);
diff --git a/util/queue.h b/util/queue.h
new file mode 100644
index 0000000..31236f6
--- /dev/null
+++ b/util/queue.h
@@ -0,0 +1,46 @@
+#ifndef QUEUE_H
+#define QUEUE_H
+
+#define QUEUE_GENERATE(id, type, cap) \
+ static struct \
+ { type buf[cap]; size_t start; size_t end; } _##id##_queue; \
+ \
+ static int id##_enqueue(type *m) \
+ { \
+ if(_##id##_queue.end >= _##id##_queue.start + cap) { \
+ fprintf(stderr, \
+ "ERROR: Queue capacity of %d reached\n", cap); \
+ return 1; \
+ } \
+ \
+ _##id##_queue.buf[_##id##_queue.end++ % cap] = *m; \
+ return 0; \
+ } \
+ \
+ static int id##_dequeue(type *m) \
+ { \
+ if(_##id##_queue.start >= _##id##_queue.end) { \
+ fprintf(stderr, "ERROR: Trying to dequeue empty queue\n"); \
+ return 1; \
+ } \
+ \
+ *m = _##id##_queue.buf[_##id##_queue.start++ % cap]; \
+ return 0; \
+ } \
+ \
+ static int id##_empty() \
+ { return _##id##_queue.start == _##id##_queue.end; } \
+ \
+ static int id##_peek(type *m) \
+ { \
+ if(id##_empty()) { \
+ fprintf(stderr, \
+ "ERROR: Trying to peek into empty queue\n"); \
+ return 1; \
+ } \
+ \
+ *m = _##id##_queue.buf[_##id##_queue.start % cap]; \
+ return 0; \
+ }
+
+#endif