aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <kartofen.mail.0@protonmail.com>2025-07-20 01:32:24 +0300
committerkartofen <kartofen.mail.0@protonmail.com>2025-07-20 01:32:24 +0300
commit34357640c0676f33ad13aac1fe28effc6f6e47c7 (patch)
treed656ee61da7d7a0b133aa57311266653ef100569
parent174e9b35ce3b6e99e500907f1bb24c6f31f481bf (diff)
start of grammar parsing
-rwxr-xr-xbuild.sh28
-rw-r--r--clr-table.c15
-rw-r--r--demos/generate-parser.c48
-rw-r--r--demos/sample-files/calc-defs.c41
-rw-r--r--demos/sample-files/calc-skeleton.c91
-rw-r--r--demos/sample-files/gram-defs.c65
-rw-r--r--demos/sample-files/gram-skeleton.c149
-rw-r--r--lr-parser.c17
-rw-r--r--parts/grammar.h1
-rw-r--r--parts/table.h6
-rw-r--r--parts/toklist.h5
-rw-r--r--util/arena.h30
12 files changed, 395 insertions, 101 deletions
diff --git a/build.sh b/build.sh
index aa0c308..1fe06a9 100755
--- a/build.sh
+++ b/build.sh
@@ -4,27 +4,27 @@ set -e
function log
{
- >&2 echo "-> $@"
+ echo "-> $@"
"$@"
}
function cc
{
mkdir -p bin
- [ -n "$3" ] && NAME=$3 || NAME=$(basename $1)
- log gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o "bin/$NAME"
+ [ -n "$3" ] && NAME="$3" || NAME=$(basename "$1")
+ log gcc -Wall -Wextra -Wpedantic -I. -g $2 "$1.c" -o "bin/$NAME"
}
function shared
{
mkdir -p bin
- [ -n "$3" ] && NAME=$3 || NAME=$(basename $1)
- log gcc -Wall -Wextra -Wpedantic -I. -g -shared -fPIC $2 $1.c -o "bin/$NAME.so"
+ [ -n "$3" ] && NAME="$3" || NAME=$(basename "$1")
+ log gcc -Wall -Wextra -Wpedantic -I. -g -shared -fPIC $2 "$1.c" -o "bin/$NAME.so"
}
function leak
{
- log valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 "$2"
+ log valgrind --leak-check=full --show-leak-kinds=all -s bin/"$1" $2
}
# cc util/dict -D_DICT_STANDALONE
@@ -67,7 +67,15 @@ shared clr-table -D_LAZY_LALR lalr-table
shared demos/sample-files/lalr-defs
# --- Calc example ---
-shared demos/sample-files/calc-defs
-leak "generate-parser -t lalr-table bin/calc-defs.so"
-cc demos/sample-files/calc-skeleton "" parser
-leak parser "13*10+9 - (54*(10+8))"
+# shared demos/sample-files/calc-defs
+# leak generate-parser "-o bin/calc -t lalr-table bin/calc-defs.so"
+# cc demos/sample-files/calc-skeleton "" parser
+# leak parser "13*10+9 - (54*(10+8))" # wrong answer
+# leak parser "-13 + 20"
+# leak parser "1 > 52 ? 2 + 3 : 53"
+
+# --- Grammar Definitino example ---
+shared demos/sample-files/gram-defs
+leak generate-parser "-o bin/gram -t lalr-table bin/gram-defs.so"
+cc demos/sample-files/gram-skeleton "" parser
+leak parser
diff --git a/clr-table.c b/clr-table.c
index af39441..764bbcc 100644
--- a/clr-table.c
+++ b/clr-table.c
@@ -3,8 +3,6 @@
#include <stdint.h>
#include <setjmp.h>
-// TODO: handle conflicts (itemset_insert returns 2 on table problem)
-
#ifndef XCALLOC_IMPLEMENTED
#define XCALLOC_IMPLEMENTED
void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); }
@@ -44,7 +42,7 @@ static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->do
static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; }
#endif
-#define SEEN_SETS_CAP 64
+#define SEEN_SETS_CAP 256
static struct {
struct item *items;
size_t nitems;
@@ -72,7 +70,7 @@ static size_t itemset_handle(struct item *set, size_t nset)
#endif
// 1. is set in seen_sets
- for(size_t i = 0; i < nseen_sets; i++) {
+ for(size_t i = 0; i < nseen_sets; i++)
if(seen_sets[i].nitems == nset) {
int _seen = 0;
for(size_t j = 0; j < nset; j++) {
@@ -85,6 +83,7 @@ static size_t itemset_handle(struct item *set, size_t nset)
}
#ifdef _LAZY_LALR
+ for(size_t i = 0; i < nseen_sets; i++) {
int _same_core = 0;
for(size_t j = 0; j < nset; j++) {
_same_core = 0;
@@ -92,9 +91,9 @@ static size_t itemset_handle(struct item *set, size_t nset)
if(item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 1;
if(!_same_core) break;
}
- if(_same_core) { (use_state != SIZE_MAX) && (exit(15), 1); use_state = seen_sets[i].state; }
-#endif
+ if(_same_core) { use_state = seen_sets[i].state; break; }
}
+#endif
// 2. add set to seen_sets
if(nseen_sets >= SEEN_SETS_CAP) {
@@ -126,8 +125,8 @@ static size_t itemset_handle(struct item *set, size_t nset)
return new_state;
}
-#define CLOSURE_SET_CAP 64
-#define GOTO_SET_CAP 32
+#define CLOSURE_SET_CAP 128
+#define GOTO_SET_CAP 128
static int itemset_insert(size_t state, struct item *initial_set, size_t ninitial)
{
struct item closure_set[CLOSURE_SET_CAP];
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index 23201fa..48fa48c 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -5,12 +5,13 @@
#include <unistd.h> // getopt
#include <assert.h>
-#define DEFUALT_PATH "./bin"
-#define DEFUALT_TYPE "lalr-table"
#define DEFAULT_OUTPUT "bin/a"
+#define DEFUALT_MODPATH "./bin"
+#define DEFUALT_TYPE "lalr-table"
#include "parts/symbol.h"
size_t total_symbols;
+char **symbol_to_str;
int (*symbol_is_terminal)(symbol s);
int (*symbol_is_input_end)(symbol s);
int (*symbol_is_valid)(symbol s);
@@ -42,8 +43,7 @@ void (*table_free)();
#include "util-tables.c"
-
-void *xdlsym(void *handle, char *sym)
+static void *xdlsym(void *handle, char *sym)
{
void *r = dlsym(handle, sym);
if(!r) {
@@ -58,18 +58,18 @@ void *xdlsym(void *handle, char *sym)
var = *(typeof(&var))xdlsym(handle, #var)
-char *modpath(char *name)
+static char *modpath(char *name)
{
static char fullpath[128];
// TODO: search the GENERATE_PARSER_PATH env var
- char *path = DEFUALT_PATH;
+ char *path = DEFUALT_MODPATH;
assert(snprintf(fullpath, 128, "%s/%s.so", path, name) < 128);
return fullpath;
}
-char *add_extension(char *str, char *ext)
+static char *add_extension(char *str, char *ext)
{
static char full[128];
assert((strlen(str) + strlen(ext) + 1) <= 128);
@@ -77,7 +77,7 @@ char *add_extension(char *str, char *ext)
return strcat(full, ext);
}
-void set_stdout(char *filename)
+static void set_stdout(char *filename)
{
if(!filename) filename = "/dev/tty";
assert(freopen(filename, "w", stdout));
@@ -114,6 +114,7 @@ int main(int argc, char **argv)
GET_VARIABLE(table_free, table_handle);
GET_VARIABLE(total_symbols, def_handle);
+ GET_VARIABLE(symbol_to_str, def_handle);
GET_VARIABLE(symbol_is_terminal, def_handle);
GET_VARIABLE(symbol_is_input_end, def_handle);
GET_VARIABLE(symbol_is_valid, def_handle);
@@ -132,8 +133,13 @@ int main(int argc, char **argv)
goto cleanup;
}
+ table_print();
+
set_stdout(add_extension(output_path, ".c"));
printf("size_t total_symbols = %zu;\n", total_symbols);
+ printf("char **symbol_to_string = (char *([])){\n");
+ for(size_t i = 0; i < total_symbols; i++) printf("\"%s\", ", symbol_to_str[i]);
+ printf("};\n");
printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) {return s < total_symbols;}\n");
printf("struct production _grammar[] = {\n");
@@ -157,19 +163,33 @@ int main(int argc, char **argv)
for(size_t i = 0; i < total_productions; i++) {
printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1);
- printf("int __prod%zu_action(int *stack_head)\n", i);
- printf("{ int v;\n");
- printf(semantic_action_str[i]);
+ printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i);
+ printf("{ intptr_t v;\n");
+ puts(semantic_action_str[i]);
printf("return v; }\n");
printf("#undef A\n");
}
- printf("typedef int (*semantic_action_fn)(int *stack_head);\n");
+ printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n");
printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n");
for(size_t i = 0; i < total_productions; i++)
printf("__prod%zu_action, ", i);
printf("};");
+
+ set_stdout(add_extension(output_path, ".h"));
+ printf("#ifndef GENERATED_H\n");
+ printf("#define GENERATED_H\n");
+ printf("#include \"parts/symbol.h\"\n");
+ printf("enum symbol {\n");
+ for(size_t i = 0; i < total_symbols; i++) printf("%s, ", symbol_to_str[i]);
+ printf("};\n");
+ printf("#include \"parts/grammar.h\"\n");
+ printf("#include \"parts/table.h\"\n");
+ printf("#include <stdint.h>\n");
+ printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n");
+ printf("extern semantic_action_fn *semantic_actions;\n");
+ printf("#endif\n");
set_stdout(NULL);
cleanup:
@@ -189,10 +209,10 @@ void precedence_tables_fill()
for(size_t i = 0; i < nprecedence_defs; i++)
for(size_t j = 0; j < precedence_defs[i].nlist; j++)
- if(precedence_defs[i].flag >= 0)
+ if(precedence_defs[i].list[j] >= 0)
precedence_symbol[precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1);
else
- precedence_production[precedence_defs[i].list[j]] = PRECEDENCE_SET(~precedence_defs[i].flag, i+1);
+ precedence_production[~precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1);
for(size_t i = 0; i < total_productions; i++) {
if(precedence_production[i]) continue;
diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c
index 7321a88..103b69e 100644
--- a/demos/sample-files/calc-defs.c
+++ b/demos/sample-files/calc-defs.c
@@ -1,17 +1,19 @@
-#include <stddef.h> // size_t
+#include "util/util.h"
+#define SYMBOLS(X) \
+ X(PLUS) X(MINUS) X(TIMES) X(MORE) X(LESS) X(EQUA) \
+ X(LPAREN) X(RPAREN) \
+ X(QMARK) X(COLON) \
+ X(NUM) X(END_INPUT) \
+ \
+ X(EP) X(E) \
+ X(SYMBOLS_END)
#include "parts/symbol.h"
-enum symbol {
- PLUS, MINUS, TIMES,
- LPAREN, RPAREN,
- NUM, END_INPUT,
-
- EP, E, T,
- SYMBOLS_END,
-};
-
+enum symbol { SYMBOLS(X_TO_ENUM) };
size_t total_symbols = SYMBOLS_END;
+extern char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) };
+
IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; }
IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
@@ -24,6 +26,12 @@ static struct production _grammar[] = {
PROD(E, -->, E, MINUS, E),
PROD(E, -->, E, TIMES, E),
PROD(E, -->, LPAREN, E, RPAREN),
+ PROD(E, -->, MINUS, E),
+ PROD(E, -->, E, QMARK, E, COLON, E),
+ PROD(E, -->, E, QMARK, E),
+ PROD(E, -->, E, MORE, E),
+ PROD(E, -->, E, LESS, E),
+ PROD(E, -->, E, EQUA, E),
PROD(E, -->, NUM),
};
@@ -37,20 +45,29 @@ char **semantic_action_str = (char *([])){
"v = A(0) - A(2);",
"v = A(0) * A(2);",
"v = A(1);",
+ "v = - A(1);",
+ "v = A(0) ? A(2) : A(4);",
+ "v = A(0) ? A(2) : 0;",
+ "v = A(0) > A(1);",
+ "v = A(0) < A(1);",
+ "v = A(0) = A(1);",
"v = A(0);",
};
#include "parts/precedence.h"
-
struct precedence_def {
int flag;
int *list;
size_t nlist;
};
#define PREC(f, ...) {f, (int[]){__VA_ARGS__}, sizeof((int[]){__VA_ARGS__})/sizeof(int)}
+#define USE_PROD(n) (~(n))
struct precedence_def _precedence_defs[] = {
+ PREC(0, QMARK),
+ PREC(0, USE_PROD(7)),
+ PREC(PRECEDENCE_LEFT_ASSOC, MORE, LESS, EQUA),
PREC(PRECEDENCE_LEFT_ASSOC, MINUS, PLUS),
- PREC(PRECEDENCE_LEFT_ASSOC, TIMES),
+ PREC(PRECEDENCE_LEFT_ASSOC, TIMES, USE_PROD(5)),
PREC(PRECEDENCE_LEFT_ASSOC, LPAREN, RPAREN),
};
diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c
index b0cbf00..6e5d2d5 100644
--- a/demos/sample-files/calc-skeleton.c
+++ b/demos/sample-files/calc-skeleton.c
@@ -1,30 +1,57 @@
#include <stdio.h>
#include <string.h>
+#include <stdint.h>
#include <ctype.h>
-#include "lr-parser.c"
-#include "bin/a.c" // generated
-
-// these should come from a generated
-// header file by the parser generator
-#include "parts/symbol.h"
-enum symbol {
- PLUS, MINUS, TIMES,
- LPAREN, RPAREN,
- NUM, END_INPUT,
-
- EP, E, T,
- SYMBOLS_END,
-};
+// generated
+#include "bin/calc.h"
+#include "bin/calc.c"
+#include "parts/toklist.h"
static struct token {
symbol s;
int v;
} tok;
+static char *next_token(char *str);
+
+symbol token_sym(struct token *t) { return t->s; }
+intptr_t token_val(struct token *t) { return (intptr_t)t->v; }
+
+static char *input;
+
+struct token *toklist_eat()
+{
+ static struct token t;
+ t = tok;
+ input = next_token(input);
+ return &t;
+}
+
+struct token *toklist_peek() { return &tok; }
+
+#include "lr-parser.c"
+
+int main(int argc, char **argv)
+{
+ if(argc != 2) return 1;
+
+ input = next_token(argv[1]);
+
+ intptr_t value;
+ if(lr_parser(&value)) return 1;
+
+ printf("INPUT: '%s'\n", argv[1]);
+ printf("OUTPUT: %jd\n", value);
+
+ return 0;
+}
+
+// LEXER
+
static inline int issep(char c)
{
- return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*';;
+ return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '>' || c == '<' || c == '=' || c == '?' || c == ':';
}
static inline int tillsep(char *str)
@@ -59,6 +86,11 @@ static char *next_token(char *str)
case '-': tok.s = MINUS; break;
case '+': tok.s = PLUS; break;
case '*': tok.s = TIMES; break;
+ case '>': tok.s = MORE; break;
+ case '<': tok.s = LESS; break;
+ case '=': tok.s = EQUA; break;
+ case '?': tok.s = QMARK; break;
+ case ':': tok.s = COLON; break;
}
} else if(c0 >= '0' && c0 <= '9') { // num
tok.s = NUM;
@@ -68,32 +100,3 @@ static char *next_token(char *str)
return str+off;
}
-
-static char *input;
-
-symbol token_sym(struct token *t) { return t->s; }
-int token_val(struct token *t) { return t->v; }
-
-struct token *toklist_eat()
-{
- static struct token t;
- t = tok;
- input = next_token(input);
- return &t;
-}
-struct token *toklist_peek() { return &tok; }
-
-int main(int argc, char **argv)
-{
- if(argc != 2) return 1;
-
- input = next_token(argv[1]);
-
- int value;
- if(lr_parser(&value)) return 1;
-
- printf("INPUT: '%s'\n", argv[1]);
- printf("OUTPUT: %d\n", value);
-
- return 0;
-}
diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c
new file mode 100644
index 0000000..733a866
--- /dev/null
+++ b/demos/sample-files/gram-defs.c
@@ -0,0 +1,65 @@
+#include "util/util.h"
+#define SYMBOLS(X) \
+ X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
+ X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \
+ X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
+ \
+ X(Sp) X(S) X(Slist) X(Prod) X(Prec) \
+ X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \
+ X(SYMBOLS_END) \
+
+#include "parts/symbol.h"
+enum symbol { SYMBOLS(X_TO_ENUM) };
+size_t total_symbols = SYMBOLS_END;
+
+char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) };
+
+IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; }
+IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
+IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
+
+#include "parts/grammar.h"
+#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
+#define GRAMMAR_ACTION_DEF(X) \
+ X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \
+ X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \
+ X(PROD(Slist, -->, S, DOT), "v = 0;") \
+ X(PROD(S, -->, Prod), "v = 0;") \
+ X(PROD(S, -->, Prec), "v = 0;") \
+ X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \
+ X(PROD(Idenlist, -->, IDEN), "v = 0;") \
+ X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \
+ X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \
+ X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \
+ X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \
+ X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \
+ X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \
+ X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \
+ X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \
+ X(PROD(IorNlist, -->, IorN), "v = 0;") \
+ X(PROD(IorN, -->, IDEN), "v = 0;") \
+ X(PROD(IorN, -->, NUM), "v = 0;")
+
+#define X_GRAMMAR(G, A) G,
+#define X_ACTION(G, A) A,
+
+static struct production _grammar[] = {
+ GRAMMAR_ACTION_DEF(X_GRAMMAR)
+};
+
+struct production *grammar = _grammar;
+size_t total_productions = sizeof(_grammar) / sizeof(*_grammar);
+
+// #include "???.h"
+char **semantic_action_str = (char *([])){
+ GRAMMAR_ACTION_DEF(X_ACTION)
+};
+
+#include "parts/precedence.h"
+struct precedence_def {
+ int flag;
+ int *list;
+ size_t nlist;
+};
+struct precedence_def *precedence_defs = NULL;
+size_t nprecedence_defs = 0;
diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c
new file mode 100644
index 0000000..89ef6b4
--- /dev/null
+++ b/demos/sample-files/gram-skeleton.c
@@ -0,0 +1,149 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define ARENA_IMPLEMENTATION
+#include "util/arena.h"
+
+static char buf[1024];
+static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf));
+static void *xalloc(size_t sz) {
+ void *addr = arena_allocate(&global_arena, sz);
+ if(!addr) {
+ fprintf(stderr, "ERROR: Arena empty\n"); exit(1);
+ }
+
+ return addr;
+}
+
+// generated
+#include "bin/gram.h"
+#include "bin/gram.c"
+
+#include "parts/toklist.h"
+struct token {
+ symbol s;
+ intptr_t v;
+} tok;
+
+static char *next_token(char *str);
+
+symbol token_sym(struct token *t) { return t->s; }
+intptr_t token_val(struct token *t) { return t->v; }
+
+static char *input = (char []){
+ "-left B;"
+ "-right C;"
+ "-left D;"
+ ""
+ "A: B {a}"
+ " | C N {d}."
+};
+
+struct token *toklist_eat()
+{
+ static struct token t;
+ t = tok;
+ input = next_token(input);
+ return &t;
+}
+
+struct token *toklist_peek() { return &tok; }
+
+#include "lr-parser.c"
+
+int main(void)
+{
+ input = next_token(input);
+
+ intptr_t value;
+ if(lr_parser(&value)) {
+ return 1;
+ }
+
+ printf("OUTPUT: %jd\n", value);
+ return 0;
+}
+
+// STR UTIL
+
+#define strdup(...) _strdup(__VA_ARGS__)
+static inline char *_strdup(char *str)
+{
+ return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1);
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+ static char sub[128];
+ if(sub_end+1 > sizeof(sub)) return NULL;
+
+ sub[sub_end] = '\0';
+ return memcpy(sub, str, sub_end);
+}
+
+// LEXER
+
+static inline int issep(char c)
+{
+ return isspace(c) || c == '\0' || c == ':' || c == '|' || c == ';' || c == '.' || c == '-' || c == '{';
+}
+
+static inline int tillsep(char *str)
+{
+ size_t i = 0;
+ while(!issep(str[i++]));
+ return i-1;
+}
+
+static char *next_token(char *str)
+{
+ if(!str) return str;
+
+ size_t off = 0;
+ char c0 = str[0];
+
+ if(c0 == '\0') tok.s = END_INPUT;
+ if(isspace(c0)) return next_token(str+1);
+ else {
+ off = tillsep(str);
+ if(off == 0) { // sep
+ switch(str[off++]) {
+ case ':': tok.s = COLON; break;
+ case '|': tok.s = PIPE; break;
+ case ';': tok.s = SEMICOL; break;
+ case '.': tok.s = DOT; break;
+ case '-':
+ off = tillsep(++str);
+ char *s = substring(str, off);
+ if(strcmp(s, "left") == 0) tok.s = D_LEFT;
+ else if(strcmp(s, "right") == 0) tok.s = D_RIGHT;
+ else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL;
+ else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM;
+ else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; }
+ break;
+ case '{':
+ for(int c = 1; c != 0; off++)
+ if(str[off] == '\0') { fprintf(stderr, "ERROR: No closing '{'\n"); goto fail; }
+ else if(str[off] == '{') c++;
+ else if(str[off] == '}') c--;
+ tok.s = ACTION;
+ tok.v = (intptr_t)strdup(substring(str, off));
+ break;
+ }
+ } else if(isalpha(c0)) { // iden or named symbol
+ tok.s = IDEN;
+ tok.v = (intptr_t)strdup(substring(str, off));
+ } else if(c0 >= '0' && c0 <= '9') { // num
+ tok.s = NUM;
+ tok.v = (intptr_t)atoi(substring(str, off));
+ }
+ }
+
+ return str+off;
+
+fail:
+ tok.s = END_INPUT;
+ return NULL;
+}
diff --git a/lr-parser.c b/lr-parser.c
index 799276d..3b6be84 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
// TODO: - check erros and fail safely and
// see connection with the lexer
@@ -11,16 +12,16 @@
#include "parts/table.h"
#include "parts/toklist.h"
// and
-typedef int (*semantic_action_fn)(int *stack_head);
+typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);
extern semantic_action_fn *semantic_actions;
-typedef int stack_item;
+typedef intmax_t stack_item;
#define STACK_CAP 128
static stack_item stack_bottom[STACK_CAP];
static stack_item *stack_head = stack_bottom;
-int lr_parser(int *value)
+int lr_parser(intptr_t *value)
{
#define push(item) do { \
if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \
@@ -42,7 +43,7 @@ int lr_parser(int *value)
push(a.arg);
break;
case ACTION_REDUCE:
- int semantic_value = semantic_actions[a.arg](stack_head);
+ intptr_t semantic_value = semantic_actions[a.arg](stack_head);
for(size_t i = 0; i < 3*grammar[a.arg].nRHS; i++) pop();
symbol lhs = grammar[a.arg].LHS;
@@ -136,7 +137,7 @@ struct token {
int v;
};
-static struct token toklist[] = {{N0}, {PLUS}, {N1}, {END_INPUT}};
+static struct token toklist[] = {{N0, 0}, {PLUS, 0}, {N1, 0}, {END_INPUT, 0}};
static const size_t ntoklist = sizeof(toklist)/sizeof(*toklist);
static size_t tok;
@@ -151,15 +152,15 @@ struct token *toklist_peek() { return toklist + tok; }
symbol token_sym(struct token *t) { return t->s; }
int token_val(struct token *t) { return t->v; }
-int none(int *stack_head) {(void)stack_head; return 0;}
+intptr_t none(intmax_t *stack_head) {(void)stack_head; return 0;}
semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none};
int main(void)
{
- int value;
+ intptr_t value;
if(lr_parser(&value)) return 1;
- printf("%d\n", value);
+ printf("%jd\n", value);
return 0;
}
diff --git a/parts/grammar.h b/parts/grammar.h
index d1bf176..e747855 100644
--- a/parts/grammar.h
+++ b/parts/grammar.h
@@ -1,6 +1,7 @@
#ifndef GRAMMAR_H
#define GRAMMAR_H
+#include "symbol.h"
#include <stddef.h> // size_t
extern struct production {
diff --git a/parts/table.h b/parts/table.h
index efd19bb..c60ad4f 100644
--- a/parts/table.h
+++ b/parts/table.h
@@ -2,7 +2,7 @@
#define TABLE_H
#include <stddef.h> // size_t
-#include "util/util.h"
+#include "util/util.h" // X_TO_...
#define ACTION_TYPE(X) \
X(ACTION_NOT_SET) \
@@ -28,7 +28,7 @@ extern void (*table_free)();
void table_print();
void table_print_cstyle();
-int table_insert(size_t state, symbol sym, struct action a); // should it be here??
+int table_insert(size_t state, symbol sym, struct action a); // should this be here??
#include "symbol.h"
@@ -103,7 +103,7 @@ int table_insert(size_t state, symbol sym, struct action a)
if(prec_num(tbl_a) > prec_num(new_a)) set_tbl_a = 0;
else if(prec_num(tbl_a) < prec_num(new_a)) set_tbl_a = 1;
else { report = 1;
- if(new_a->arg > tbl_a->arg) set_tbl_a = 1;
+ if(new_a->arg < tbl_a->arg) set_tbl_a = 1;
}
} else if(shift_reduce) {
int favor_shift = 0;
diff --git a/parts/toklist.h b/parts/toklist.h
index 760846f..08fce66 100644
--- a/parts/toklist.h
+++ b/parts/toklist.h
@@ -1,12 +1,13 @@
#ifndef TOKLIST_H
#define TOKLIST_H
+#include <stdint.h> // intptr_t
#include "symbol.h"
struct token;
-symbol token_sym(struct token *t); // UB for NULL
-int token_val(struct token *t); // UB for NULL
+symbol token_sym(struct token *t); // t != NULL
+intptr_t token_val(struct token *t); // t != NULL
struct token *toklist_eat(); // always non-NULL
struct token *toklist_peek(); // always non-NULL
diff --git a/util/arena.h b/util/arena.h
new file mode 100644
index 0000000..3d82b95
--- /dev/null
+++ b/util/arena.h
@@ -0,0 +1,30 @@
+#ifndef ARENA_H
+#define ARENA_H
+
+#include <stddef.h> // size_t
+
+struct arena_ctx {
+ void *buffer;
+ size_t size;
+ size_t offset;
+};
+
+#define ARENA_CTX_INIT(buffer, sz) (struct arena_ctx){(buffer), (sz), 0}
+void *arena_allocate(struct arena_ctx *ctx, size_t sz);
+void arena_reset(struct arena_ctx *ctx);
+
+#ifdef ARENA_IMPLEMENTATION
+
+void *arena_allocate(struct arena_ctx *ctx, size_t sz)
+{
+ if(ctx->offset + sz > ctx->size) return NULL;
+
+ void *off = ctx->buffer + ctx->offset;
+ ctx->offset += sz;
+ return off;
+}
+
+void arena_reset(struct arena_ctx *ctx) { ctx->offset = 0; }
+
+#endif
+#endif