diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-20 01:32:24 +0300 |
| commit | 34357640c0676f33ad13aac1fe28effc6f6e47c7 (patch) | |
| tree | d656ee61da7d7a0b133aa57311266653ef100569 | |
| parent | 174e9b35ce3b6e99e500907f1bb24c6f31f481bf (diff) | |
start of grammar parsing
| -rwxr-xr-x | build.sh | 28 | ||||
| -rw-r--r-- | clr-table.c | 15 | ||||
| -rw-r--r-- | demos/generate-parser.c | 48 | ||||
| -rw-r--r-- | demos/sample-files/calc-defs.c | 41 | ||||
| -rw-r--r-- | demos/sample-files/calc-skeleton.c | 91 | ||||
| -rw-r--r-- | demos/sample-files/gram-defs.c | 65 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 149 | ||||
| -rw-r--r-- | lr-parser.c | 17 | ||||
| -rw-r--r-- | parts/grammar.h | 1 | ||||
| -rw-r--r-- | parts/table.h | 6 | ||||
| -rw-r--r-- | parts/toklist.h | 5 | ||||
| -rw-r--r-- | util/arena.h | 30 |
12 files changed, 395 insertions, 101 deletions
@@ -4,27 +4,27 @@ set -e function log { - >&2 echo "-> $@" + echo "-> $@" "$@" } function cc { mkdir -p bin - [ -n "$3" ] && NAME=$3 || NAME=$(basename $1) - log gcc -Wall -Wextra -Wpedantic -I. -g $2 $1.c -o "bin/$NAME" + [ -n "$3" ] && NAME="$3" || NAME=$(basename "$1") + log gcc -Wall -Wextra -Wpedantic -I. -g $2 "$1.c" -o "bin/$NAME" } function shared { mkdir -p bin - [ -n "$3" ] && NAME=$3 || NAME=$(basename $1) - log gcc -Wall -Wextra -Wpedantic -I. -g -shared -fPIC $2 $1.c -o "bin/$NAME.so" + [ -n "$3" ] && NAME="$3" || NAME=$(basename "$1") + log gcc -Wall -Wextra -Wpedantic -I. -g -shared -fPIC $2 "$1.c" -o "bin/$NAME.so" } function leak { - log valgrind --leak-check=full --show-leak-kinds=all -s bin/$1 "$2" + log valgrind --leak-check=full --show-leak-kinds=all -s bin/"$1" $2 } # cc util/dict -D_DICT_STANDALONE @@ -67,7 +67,15 @@ shared clr-table -D_LAZY_LALR lalr-table shared demos/sample-files/lalr-defs # --- Calc example --- -shared demos/sample-files/calc-defs -leak "generate-parser -t lalr-table bin/calc-defs.so" -cc demos/sample-files/calc-skeleton "" parser -leak parser "13*10+9 - (54*(10+8))" +# shared demos/sample-files/calc-defs +# leak generate-parser "-o bin/calc -t lalr-table bin/calc-defs.so" +# cc demos/sample-files/calc-skeleton "" parser +# leak parser "13*10+9 - (54*(10+8))" # wrong answer +# leak parser "-13 + 20" +# leak parser "1 > 52 ? 2 + 3 : 53" + +# --- Grammar Definitino example --- +shared demos/sample-files/gram-defs +leak generate-parser "-o bin/gram -t lalr-table bin/gram-defs.so" +cc demos/sample-files/gram-skeleton "" parser +leak parser diff --git a/clr-table.c b/clr-table.c index af39441..764bbcc 100644 --- a/clr-table.c +++ b/clr-table.c @@ -3,8 +3,6 @@ #include <stdint.h> #include <setjmp.h> -// TODO: handle conflicts (itemset_insert returns 2 on table problem) - #ifndef XCALLOC_IMPLEMENTED #define XCALLOC_IMPLEMENTED void *xcalloc(size_t n, size_t size) { void *addr = calloc(n, size); return addr ? addr : (exit(1), NULL); } @@ -44,7 +42,7 @@ static int item_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->do static int item_core_eq(struct item *i1, struct item *i2) { return (i1->dot == i2->dot && i1->prod_idx == i2->prod_idx) ? 1 : 0; } #endif -#define SEEN_SETS_CAP 64 +#define SEEN_SETS_CAP 256 static struct { struct item *items; size_t nitems; @@ -72,7 +70,7 @@ static size_t itemset_handle(struct item *set, size_t nset) #endif // 1. is set in seen_sets - for(size_t i = 0; i < nseen_sets; i++) { + for(size_t i = 0; i < nseen_sets; i++) if(seen_sets[i].nitems == nset) { int _seen = 0; for(size_t j = 0; j < nset; j++) { @@ -85,6 +83,7 @@ static size_t itemset_handle(struct item *set, size_t nset) } #ifdef _LAZY_LALR + for(size_t i = 0; i < nseen_sets; i++) { int _same_core = 0; for(size_t j = 0; j < nset; j++) { _same_core = 0; @@ -92,9 +91,9 @@ static size_t itemset_handle(struct item *set, size_t nset) if(item_core_eq(&seen_sets[i].items[k], &set[j])) _same_core = 1; if(!_same_core) break; } - if(_same_core) { (use_state != SIZE_MAX) && (exit(15), 1); use_state = seen_sets[i].state; } -#endif + if(_same_core) { use_state = seen_sets[i].state; break; } } +#endif // 2. add set to seen_sets if(nseen_sets >= SEEN_SETS_CAP) { @@ -126,8 +125,8 @@ static size_t itemset_handle(struct item *set, size_t nset) return new_state; } -#define CLOSURE_SET_CAP 64 -#define GOTO_SET_CAP 32 +#define CLOSURE_SET_CAP 128 +#define GOTO_SET_CAP 128 static int itemset_insert(size_t state, struct item *initial_set, size_t ninitial) { struct item closure_set[CLOSURE_SET_CAP]; diff --git a/demos/generate-parser.c b/demos/generate-parser.c index 23201fa..48fa48c 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -5,12 +5,13 @@ #include <unistd.h> // getopt #include <assert.h> -#define DEFUALT_PATH "./bin" -#define DEFUALT_TYPE "lalr-table" #define DEFAULT_OUTPUT "bin/a" +#define DEFUALT_MODPATH "./bin" +#define DEFUALT_TYPE "lalr-table" #include "parts/symbol.h" size_t total_symbols; +char **symbol_to_str; int (*symbol_is_terminal)(symbol s); int (*symbol_is_input_end)(symbol s); int (*symbol_is_valid)(symbol s); @@ -42,8 +43,7 @@ void (*table_free)(); #include "util-tables.c" - -void *xdlsym(void *handle, char *sym) +static void *xdlsym(void *handle, char *sym) { void *r = dlsym(handle, sym); if(!r) { @@ -58,18 +58,18 @@ void *xdlsym(void *handle, char *sym) var = *(typeof(&var))xdlsym(handle, #var) -char *modpath(char *name) +static char *modpath(char *name) { static char fullpath[128]; // TODO: search the GENERATE_PARSER_PATH env var - char *path = DEFUALT_PATH; + char *path = DEFUALT_MODPATH; assert(snprintf(fullpath, 128, "%s/%s.so", path, name) < 128); return fullpath; } -char *add_extension(char *str, char *ext) +static char *add_extension(char *str, char *ext) { static char full[128]; assert((strlen(str) + strlen(ext) + 1) <= 128); @@ -77,7 +77,7 @@ char *add_extension(char *str, char *ext) return strcat(full, ext); } -void set_stdout(char *filename) +static void set_stdout(char *filename) { if(!filename) filename = "/dev/tty"; assert(freopen(filename, "w", stdout)); @@ -114,6 +114,7 @@ int main(int argc, char **argv) GET_VARIABLE(table_free, table_handle); GET_VARIABLE(total_symbols, def_handle); + GET_VARIABLE(symbol_to_str, def_handle); GET_VARIABLE(symbol_is_terminal, def_handle); GET_VARIABLE(symbol_is_input_end, def_handle); GET_VARIABLE(symbol_is_valid, def_handle); @@ -132,8 +133,13 @@ int main(int argc, char **argv) goto cleanup; } + table_print(); + set_stdout(add_extension(output_path, ".c")); printf("size_t total_symbols = %zu;\n", total_symbols); + printf("char **symbol_to_string = (char *([])){\n"); + for(size_t i = 0; i < total_symbols; i++) printf("\"%s\", ", symbol_to_str[i]); + printf("};\n"); printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) {return s < total_symbols;}\n"); printf("struct production _grammar[] = {\n"); @@ -157,19 +163,33 @@ int main(int argc, char **argv) for(size_t i = 0; i < total_productions; i++) { printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1); - printf("int __prod%zu_action(int *stack_head)\n", i); - printf("{ int v;\n"); - printf(semantic_action_str[i]); + printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i); + printf("{ intptr_t v;\n"); + puts(semantic_action_str[i]); printf("return v; }\n"); printf("#undef A\n"); } - printf("typedef int (*semantic_action_fn)(int *stack_head);\n"); + printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n"); for(size_t i = 0; i < total_productions; i++) printf("__prod%zu_action, ", i); printf("};"); + + set_stdout(add_extension(output_path, ".h")); + printf("#ifndef GENERATED_H\n"); + printf("#define GENERATED_H\n"); + printf("#include \"parts/symbol.h\"\n"); + printf("enum symbol {\n"); + for(size_t i = 0; i < total_symbols; i++) printf("%s, ", symbol_to_str[i]); + printf("};\n"); + printf("#include \"parts/grammar.h\"\n"); + printf("#include \"parts/table.h\"\n"); + printf("#include <stdint.h>\n"); + printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); + printf("extern semantic_action_fn *semantic_actions;\n"); + printf("#endif\n"); set_stdout(NULL); cleanup: @@ -189,10 +209,10 @@ void precedence_tables_fill() for(size_t i = 0; i < nprecedence_defs; i++) for(size_t j = 0; j < precedence_defs[i].nlist; j++) - if(precedence_defs[i].flag >= 0) + if(precedence_defs[i].list[j] >= 0) precedence_symbol[precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1); else - precedence_production[precedence_defs[i].list[j]] = PRECEDENCE_SET(~precedence_defs[i].flag, i+1); + precedence_production[~precedence_defs[i].list[j]] = PRECEDENCE_SET(precedence_defs[i].flag, i+1); for(size_t i = 0; i < total_productions; i++) { if(precedence_production[i]) continue; diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c index 7321a88..103b69e 100644 --- a/demos/sample-files/calc-defs.c +++ b/demos/sample-files/calc-defs.c @@ -1,17 +1,19 @@ -#include <stddef.h> // size_t +#include "util/util.h" +#define SYMBOLS(X) \ + X(PLUS) X(MINUS) X(TIMES) X(MORE) X(LESS) X(EQUA) \ + X(LPAREN) X(RPAREN) \ + X(QMARK) X(COLON) \ + X(NUM) X(END_INPUT) \ + \ + X(EP) X(E) \ + X(SYMBOLS_END) #include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; - +enum symbol { SYMBOLS(X_TO_ENUM) }; size_t total_symbols = SYMBOLS_END; +extern char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } @@ -24,6 +26,12 @@ static struct production _grammar[] = { PROD(E, -->, E, MINUS, E), PROD(E, -->, E, TIMES, E), PROD(E, -->, LPAREN, E, RPAREN), + PROD(E, -->, MINUS, E), + PROD(E, -->, E, QMARK, E, COLON, E), + PROD(E, -->, E, QMARK, E), + PROD(E, -->, E, MORE, E), + PROD(E, -->, E, LESS, E), + PROD(E, -->, E, EQUA, E), PROD(E, -->, NUM), }; @@ -37,20 +45,29 @@ char **semantic_action_str = (char *([])){ "v = A(0) - A(2);", "v = A(0) * A(2);", "v = A(1);", + "v = - A(1);", + "v = A(0) ? A(2) : A(4);", + "v = A(0) ? A(2) : 0;", + "v = A(0) > A(1);", + "v = A(0) < A(1);", + "v = A(0) = A(1);", "v = A(0);", }; #include "parts/precedence.h" - struct precedence_def { int flag; int *list; size_t nlist; }; #define PREC(f, ...) {f, (int[]){__VA_ARGS__}, sizeof((int[]){__VA_ARGS__})/sizeof(int)} +#define USE_PROD(n) (~(n)) struct precedence_def _precedence_defs[] = { + PREC(0, QMARK), + PREC(0, USE_PROD(7)), + PREC(PRECEDENCE_LEFT_ASSOC, MORE, LESS, EQUA), PREC(PRECEDENCE_LEFT_ASSOC, MINUS, PLUS), - PREC(PRECEDENCE_LEFT_ASSOC, TIMES), + PREC(PRECEDENCE_LEFT_ASSOC, TIMES, USE_PROD(5)), PREC(PRECEDENCE_LEFT_ASSOC, LPAREN, RPAREN), }; diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c index b0cbf00..6e5d2d5 100644 --- a/demos/sample-files/calc-skeleton.c +++ b/demos/sample-files/calc-skeleton.c @@ -1,30 +1,57 @@ #include <stdio.h> #include <string.h> +#include <stdint.h> #include <ctype.h> -#include "lr-parser.c" -#include "bin/a.c" // generated - -// these should come from a generated -// header file by the parser generator -#include "parts/symbol.h" -enum symbol { - PLUS, MINUS, TIMES, - LPAREN, RPAREN, - NUM, END_INPUT, - - EP, E, T, - SYMBOLS_END, -}; +// generated +#include "bin/calc.h" +#include "bin/calc.c" +#include "parts/toklist.h" static struct token { symbol s; int v; } tok; +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return (intptr_t)t->v; } + +static char *input; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(int argc, char **argv) +{ + if(argc != 2) return 1; + + input = next_token(argv[1]); + + intptr_t value; + if(lr_parser(&value)) return 1; + + printf("INPUT: '%s'\n", argv[1]); + printf("OUTPUT: %jd\n", value); + + return 0; +} + +// LEXER + static inline int issep(char c) { - return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*';; + return isspace(c) || c == '\0' || c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '>' || c == '<' || c == '=' || c == '?' || c == ':'; } static inline int tillsep(char *str) @@ -59,6 +86,11 @@ static char *next_token(char *str) case '-': tok.s = MINUS; break; case '+': tok.s = PLUS; break; case '*': tok.s = TIMES; break; + case '>': tok.s = MORE; break; + case '<': tok.s = LESS; break; + case '=': tok.s = EQUA; break; + case '?': tok.s = QMARK; break; + case ':': tok.s = COLON; break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; @@ -68,32 +100,3 @@ static char *next_token(char *str) return str+off; } - -static char *input; - -symbol token_sym(struct token *t) { return t->s; } -int token_val(struct token *t) { return t->v; } - -struct token *toklist_eat() -{ - static struct token t; - t = tok; - input = next_token(input); - return &t; -} -struct token *toklist_peek() { return &tok; } - -int main(int argc, char **argv) -{ - if(argc != 2) return 1; - - input = next_token(argv[1]); - - int value; - if(lr_parser(&value)) return 1; - - printf("INPUT: '%s'\n", argv[1]); - printf("OUTPUT: %d\n", value); - - return 0; -} diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c new file mode 100644 index 0000000..733a866 --- /dev/null +++ b/demos/sample-files/gram-defs.c @@ -0,0 +1,65 @@ +#include "util/util.h" +#define SYMBOLS(X) \ + X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \ + X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ + \ + X(Sp) X(S) X(Slist) X(Prod) X(Prec) \ + X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \ + X(SYMBOLS_END) \ + +#include "parts/symbol.h" +enum symbol { SYMBOLS(X_TO_ENUM) }; +size_t total_symbols = SYMBOLS_END; + +char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; + +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; } +IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } +IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } + +#include "parts/grammar.h" +#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} +#define GRAMMAR_ACTION_DEF(X) \ + X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \ + X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \ + X(PROD(Slist, -->, S, DOT), "v = 0;") \ + X(PROD(S, -->, Prod), "v = 0;") \ + X(PROD(S, -->, Prec), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \ + X(PROD(Idenlist, -->, IDEN), "v = 0;") \ + X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \ + X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \ + X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \ + X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \ + X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \ + X(PROD(IorNlist, -->, IorN), "v = 0;") \ + X(PROD(IorN, -->, IDEN), "v = 0;") \ + X(PROD(IorN, -->, NUM), "v = 0;") + +#define X_GRAMMAR(G, A) G, +#define X_ACTION(G, A) A, + +static struct production _grammar[] = { + GRAMMAR_ACTION_DEF(X_GRAMMAR) +}; + +struct production *grammar = _grammar; +size_t total_productions = sizeof(_grammar) / sizeof(*_grammar); + +// #include "???.h" +char **semantic_action_str = (char *([])){ + GRAMMAR_ACTION_DEF(X_ACTION) +}; + +#include "parts/precedence.h" +struct precedence_def { + int flag; + int *list; + size_t nlist; +}; +struct precedence_def *precedence_defs = NULL; +size_t nprecedence_defs = 0; diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c new file mode 100644 index 0000000..89ef6b4 --- /dev/null +++ b/demos/sample-files/gram-skeleton.c @@ -0,0 +1,149 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#define ARENA_IMPLEMENTATION +#include "util/arena.h" + +static char buf[1024]; +static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf)); +static void *xalloc(size_t sz) { + void *addr = arena_allocate(&global_arena, sz); + if(!addr) { + fprintf(stderr, "ERROR: Arena empty\n"); exit(1); + } + + return addr; +} + +// generated +#include "bin/gram.h" +#include "bin/gram.c" + +#include "parts/toklist.h" +struct token { + symbol s; + intptr_t v; +} tok; + +static char *next_token(char *str); + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return t->v; } + +static char *input = (char []){ + "-left B;" + "-right C;" + "-left D;" + "" + "A: B {a}" + " | C N {d}." +}; + +struct token *toklist_eat() +{ + static struct token t; + t = tok; + input = next_token(input); + return &t; +} + +struct token *toklist_peek() { return &tok; } + +#include "lr-parser.c" + +int main(void) +{ + input = next_token(input); + + intptr_t value; + if(lr_parser(&value)) { + return 1; + } + + printf("OUTPUT: %jd\n", value); + return 0; +} + +// STR UTIL + +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str) +{ + return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +// LEXER + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == ':' || c == '|' || c == ';' || c == '.' || c == '-' || c == '{'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static char *next_token(char *str) +{ + if(!str) return str; + + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case ':': tok.s = COLON; break; + case '|': tok.s = PIPE; break; + case ';': tok.s = SEMICOL; break; + case '.': tok.s = DOT; break; + case '-': + off = tillsep(++str); + char *s = substring(str, off); + if(strcmp(s, "left") == 0) tok.s = D_LEFT; + else if(strcmp(s, "right") == 0) tok.s = D_RIGHT; + else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL; + else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM; + else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } + break; + case '{': + for(int c = 1; c != 0; off++) + if(str[off] == '\0') { fprintf(stderr, "ERROR: No closing '{'\n"); goto fail; } + else if(str[off] == '{') c++; + else if(str[off] == '}') c--; + tok.s = ACTION; + tok.v = (intptr_t)strdup(substring(str, off)); + break; + } + } else if(isalpha(c0)) { // iden or named symbol + tok.s = IDEN; + tok.v = (intptr_t)strdup(substring(str, off)); + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = (intptr_t)atoi(substring(str, off)); + } + } + + return str+off; + +fail: + tok.s = END_INPUT; + return NULL; +} diff --git a/lr-parser.c b/lr-parser.c index 799276d..3b6be84 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -1,5 +1,6 @@ #include <stdio.h> #include <stdlib.h> +#include <stdint.h> // TODO: - check erros and fail safely and // see connection with the lexer @@ -11,16 +12,16 @@ #include "parts/table.h" #include "parts/toklist.h" // and -typedef int (*semantic_action_fn)(int *stack_head); +typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head); extern semantic_action_fn *semantic_actions; -typedef int stack_item; +typedef intmax_t stack_item; #define STACK_CAP 128 static stack_item stack_bottom[STACK_CAP]; static stack_item *stack_head = stack_bottom; -int lr_parser(int *value) +int lr_parser(intptr_t *value) { #define push(item) do { \ if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \ @@ -42,7 +43,7 @@ int lr_parser(int *value) push(a.arg); break; case ACTION_REDUCE: - int semantic_value = semantic_actions[a.arg](stack_head); + intptr_t semantic_value = semantic_actions[a.arg](stack_head); for(size_t i = 0; i < 3*grammar[a.arg].nRHS; i++) pop(); symbol lhs = grammar[a.arg].LHS; @@ -136,7 +137,7 @@ struct token { int v; }; -static struct token toklist[] = {{N0}, {PLUS}, {N1}, {END_INPUT}}; +static struct token toklist[] = {{N0, 0}, {PLUS, 0}, {N1, 0}, {END_INPUT, 0}}; static const size_t ntoklist = sizeof(toklist)/sizeof(*toklist); static size_t tok; @@ -151,15 +152,15 @@ struct token *toklist_peek() { return toklist + tok; } symbol token_sym(struct token *t) { return t->s; } int token_val(struct token *t) { return t->v; } -int none(int *stack_head) {(void)stack_head; return 0;} +intptr_t none(intmax_t *stack_head) {(void)stack_head; return 0;} semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none}; int main(void) { - int value; + intptr_t value; if(lr_parser(&value)) return 1; - printf("%d\n", value); + printf("%jd\n", value); return 0; } diff --git a/parts/grammar.h b/parts/grammar.h index d1bf176..e747855 100644 --- a/parts/grammar.h +++ b/parts/grammar.h @@ -1,6 +1,7 @@ #ifndef GRAMMAR_H #define GRAMMAR_H +#include "symbol.h" #include <stddef.h> // size_t extern struct production { diff --git a/parts/table.h b/parts/table.h index efd19bb..c60ad4f 100644 --- a/parts/table.h +++ b/parts/table.h @@ -2,7 +2,7 @@ #define TABLE_H #include <stddef.h> // size_t -#include "util/util.h" +#include "util/util.h" // X_TO_... #define ACTION_TYPE(X) \ X(ACTION_NOT_SET) \ @@ -28,7 +28,7 @@ extern void (*table_free)(); void table_print(); void table_print_cstyle(); -int table_insert(size_t state, symbol sym, struct action a); // should it be here?? +int table_insert(size_t state, symbol sym, struct action a); // should this be here?? #include "symbol.h" @@ -103,7 +103,7 @@ int table_insert(size_t state, symbol sym, struct action a) if(prec_num(tbl_a) > prec_num(new_a)) set_tbl_a = 0; else if(prec_num(tbl_a) < prec_num(new_a)) set_tbl_a = 1; else { report = 1; - if(new_a->arg > tbl_a->arg) set_tbl_a = 1; + if(new_a->arg < tbl_a->arg) set_tbl_a = 1; } } else if(shift_reduce) { int favor_shift = 0; diff --git a/parts/toklist.h b/parts/toklist.h index 760846f..08fce66 100644 --- a/parts/toklist.h +++ b/parts/toklist.h @@ -1,12 +1,13 @@ #ifndef TOKLIST_H #define TOKLIST_H +#include <stdint.h> // intptr_t #include "symbol.h" struct token; -symbol token_sym(struct token *t); // UB for NULL -int token_val(struct token *t); // UB for NULL +symbol token_sym(struct token *t); // t != NULL +intptr_t token_val(struct token *t); // t != NULL struct token *toklist_eat(); // always non-NULL struct token *toklist_peek(); // always non-NULL diff --git a/util/arena.h b/util/arena.h new file mode 100644 index 0000000..3d82b95 --- /dev/null +++ b/util/arena.h @@ -0,0 +1,30 @@ +#ifndef ARENA_H +#define ARENA_H + +#include <stddef.h> // size_t + +struct arena_ctx { + void *buffer; + size_t size; + size_t offset; +}; + +#define ARENA_CTX_INIT(buffer, sz) (struct arena_ctx){(buffer), (sz), 0} +void *arena_allocate(struct arena_ctx *ctx, size_t sz); +void arena_reset(struct arena_ctx *ctx); + +#ifdef ARENA_IMPLEMENTATION + +void *arena_allocate(struct arena_ctx *ctx, size_t sz) +{ + if(ctx->offset + sz > ctx->size) return NULL; + + void *off = ctx->buffer + ctx->offset; + ctx->offset += sz; + return off; +} + +void arena_reset(struct arena_ctx *ctx) { ctx->offset = 0; } + +#endif +#endif |
