diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-09-13 15:24:28 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-09-13 15:24:28 +0300 |
| commit | db1b9c8dcb0d115217a33c2fe8e0760d49143e11 (patch) | |
| tree | c93743adff3d78ea066c14879b7d2bfeb3ce42fb | |
| parent | 46e786db9d1b48b8fbc3502e36f093b755f3e09f (diff) | |
ast nearly build and proper errors
| -rwxr-xr-x | build.sh | 4 | ||||
| -rw-r--r-- | demos/generate-parser.c | 19 | ||||
| -rw-r--r-- | demos/sample-files/calc-defs.c | 4 | ||||
| -rw-r--r-- | demos/sample-files/calc-skeleton.c | 6 | ||||
| -rw-r--r-- | demos/sample-files/calc.g | 2 | ||||
| -rw-r--r-- | demos/sample-files/gram-defs.c | 12 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 62 | ||||
| -rw-r--r-- | demos/sample-files/lbp-code.lbp | 4 | ||||
| -rw-r--r-- | demos/sample-files/lbp-skeleton.c | 267 | ||||
| -rw-r--r-- | demos/sample-files/lbp.g | 46 | ||||
| -rw-r--r-- | lr-parser.c | 117 | ||||
| -rw-r--r-- | util/dict.c | 36 | ||||
| -rw-r--r-- | util/dict.h | 11 | ||||
| -rw-r--r-- | util/list.h | 32 |
14 files changed, 447 insertions, 175 deletions
@@ -80,13 +80,13 @@ shared demos/sample-files/gram-defs leak generate-parser "-o bin/gram -t lalr-table bin/gram-defs.so" cc demos/sample-files/gram-skeleton "" gram-parser +# !!! gram.g is outdated !!! # leak gram-parser < demos/sample-files/gram.g > bin/gram-gram.c # shared bin/gram-gram # leak generate-parser "-o bin/gram -t lalr-table bin/gram-gram.so" # cc demos/sample-files/gram-skeleton "" gram2-parser -# leak gram2-parser < demos/sample-files/calc.g > bin/calc-gram.c - +# leak gram-parser < demos/sample-files/calc.g > bin/calc-gram.c # shared bin/calc-gram # leak generate-parser "-o bin/calc -t lalr-table bin/calc-gram.so" # cc demos/sample-files/calc-skeleton "" calc-parser diff --git a/demos/generate-parser.c b/demos/generate-parser.c index d1391d5..e0ec5f5 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -20,6 +20,7 @@ int (*symbol_is_valid)(symbol s); struct production *grammar; size_t total_productions; +char *stack_item_type; char **semantic_action_str; #include "parts/precedence.h" @@ -120,6 +121,12 @@ int main(int argc, char **argv) GET_VARIABLE(symbol_is_valid, def_handle); GET_VARIABLE(grammar, def_handle); GET_VARIABLE(total_productions, def_handle); + + if(!dlsym(def_handle, "stack_item_type")) + stack_item_type = "intptr_t"; + else + GET_VARIABLE(stack_item_type, def_handle); + GET_VARIABLE(semantic_action_str, def_handle); GET_VARIABLE(precedence_defs, def_handle); GET_VARIABLE(nprecedence_defs, def_handle); @@ -162,15 +169,15 @@ int main(int argc, char **argv) printf("};\n"); for(size_t i = 0; i < total_productions; i++) { - printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1); - printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i); - printf("{ intptr_t v = 0;\n"); + printf("#define A(n) (*(stack_head-%zu+n))\n", grammar[i].nRHS-1); + printf("stack_item __prod%zu_action(stack_item *stack_head)\n", i); + printf("{ stack_item v = {0};\n"); puts(semantic_action_str[i]); printf("return v; }\n"); printf("#undef A\n"); } - printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); + printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n"); printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n"); for(size_t i = 0; i < total_productions; i++) @@ -186,8 +193,8 @@ int main(int argc, char **argv) printf("};\n"); printf("#include \"parts/grammar.h\"\n"); printf("#include \"parts/table.h\"\n"); - printf("#include <stdint.h>\n"); - printf("typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head);\n"); + printf("typedef %s stack_item;\n", stack_item_type); + printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n"); printf("extern semantic_action_fn *semantic_actions;\n"); printf("#endif\n"); set_stdout(NULL); diff --git a/demos/sample-files/calc-defs.c b/demos/sample-files/calc-defs.c index b9d1788..9385a02 100644 --- a/demos/sample-files/calc-defs.c +++ b/demos/sample-files/calc-defs.c @@ -12,7 +12,7 @@ enum symbol { SYMBOLS(X_TO_ENUM) }; size_t total_symbols = SYMBOLS_END; -extern char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; +char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < EP; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } @@ -38,6 +38,8 @@ static struct production _grammar[] = { struct production *grammar = _grammar; size_t total_productions = sizeof(_grammar)/sizeof(*_grammar); +char *stack_item_type = "int"; + // #include "???.h" char **semantic_action_str = (char *([])){ "v = A(0);", diff --git a/demos/sample-files/calc-skeleton.c b/demos/sample-files/calc-skeleton.c index ad4aba9..414293e 100644 --- a/demos/sample-files/calc-skeleton.c +++ b/demos/sample-files/calc-skeleton.c @@ -18,7 +18,7 @@ static struct token { static char *next_token(char *str); symbol token_sym(struct token *t) { return t->s; } -intptr_t token_val(struct token *t) { return (intptr_t)t->v; } +intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } static char *input; @@ -40,11 +40,11 @@ int main(int argc, char **argv) input = next_token(argv[1]); - intptr_t value; + int value; if(lr_parser(&value)) return 1; printf("INPUT: '%s'\n", argv[1]); - printf("OUTPUT: %jd\n", value); + printf("OUTPUT: %d\n", value); return 0; } diff --git a/demos/sample-files/calc.g b/demos/sample-files/calc.g index 804e072..aa5b5db 100644 --- a/demos/sample-files/calc.g +++ b/demos/sample-files/calc.g @@ -3,6 +3,8 @@ -nonterminal EP E. +-stacktype { int }. + -left LPAREN; -left 5; -left TIMES; diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c index b1ae268..97312c7 100644 --- a/demos/sample-files/gram-defs.c +++ b/demos/sample-files/gram-defs.c @@ -1,10 +1,10 @@ #include "util/util.h" #define SYMBOLS(X) \ X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) X(NOPREC) \ - X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(STYPE) X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ \ - X(S) X(A) X(B) X(C) \ + X(SP) X(S) X(A) X(T) X(B) X(C) \ X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \ X(Actionlist) X(Idenlist) X(IorNlist) \ X(SYMBOLS_END) \ @@ -22,11 +22,15 @@ IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } #include "parts/grammar.h" #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} #define GRAMMAR_ACTION_DEF(X) \ - X(PROD(S, -->, A, B, C, END_INPUT), "") \ + X(PROD(SP, -->, S, END_INPUT), "") \ + X(PROD(S, -->, A, B, C), "") \ + X(PROD(S, -->, A, T, B, C), "") \ \ X(PROD(A, -->, TERMINAL, Idenlist, \ SEMICOL, NONTERM, Idenlist, DOT), \ - "handle_type(A(1), A(4))") \ + "handle_enum(A(1), A(4));") \ + X(PROD(T, -->, STYPE, ACTION, DOT), \ + "handle_stype(A(1));") \ \ X(PROD(B, -->, Preclist), "handle_prec(A(0));") \ X(PROD(B, -->, NOPREC, DOT), "handle_prec(NULL);") \ diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c index 4e40c14..9898c6b 100644 --- a/demos/sample-files/gram-skeleton.c +++ b/demos/sample-files/gram-skeleton.c @@ -24,10 +24,10 @@ static void *xalloc(size_t sz) { #include "parts/precedence.h" #include "util/list.h" -static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) +static inline struct list_head *list_new_head(struct list_head *list, struct list_head *newhead) { - if(head) list_add(new, head); - return new; + newhead->next = list; + return newhead; } struct ptr_entry { @@ -79,7 +79,7 @@ struct list_head *prod_new(char *iden, struct list_head *actionlist) entry->ptrlist = actionlist; }) -void handle_type(struct list_head *terminals, struct list_head *nonterminals) +void handle_enum(struct list_head *terminals, struct list_head *nonterminals) { printf("#include \"parts/symbol.h\"\n"); printf("enum symbol { "); @@ -107,6 +107,12 @@ void handle_type(struct list_head *terminals, struct list_head *nonterminals) } +static inline char *substring(char *str, size_t sub_end); +void handle_stype(char *action) +{ + printf("char *stack_item_type = \"%s\";\n", substring(action+1, strlen(action)-2)); +} + void handle_prec(struct list_head *preclist) { printf("#include \"parts/precedence.h\"\n"); @@ -142,8 +148,7 @@ void handle_prod(struct list_head *prodlist) printf("struct production *grammar = (struct production[]){\n"); list_for_each_entry(struct strnptr_entry, e1, list, prodlist) { if(productions == 0) - list_add(list_get_tail(list_entry(e1->ptrlist, struct strnptr_entry, list)->ptrlist), - ptr_new("END_INPUT")); + list_get_tail(list_entry(e1->ptrlist, struct strnptr_entry, list)->ptrlist)->next = ptr_new("END_INPUT"); productions += list_len(e1->ptrlist); list_for_each_entry(struct strnptr_entry, e2, list, e1->ptrlist) { @@ -171,7 +176,8 @@ void handle_prod(struct list_head *prodlist) #define action_new(idenlist, action) (intptr_t)action_new((struct list_head *)idenlist, (char *)action) #define prod_new(iden, actionlist) (intptr_t)prod_new((char *)iden, (struct list_head *)actionlist) -#define handle_type(terminals, nonterminals) handle_type((struct list_head *)terminals, (struct list_head *)nonterminals); +#define handle_enum(terminals, nonterminals) handle_enum((struct list_head *)terminals, (struct list_head *)nonterminals); +#define handle_stype(action) handle_stype((char *)action); #define handle_prec(preclist) handle_prec((struct list_head *)preclist); #define handle_prod(prodlist) handle_prod((struct list_head *)prodlist); @@ -188,7 +194,7 @@ struct token { static char *next_token(char *str); symbol token_sym(struct token *t) { return t->s; } -intptr_t token_val(struct token *t) { return t->v; } +intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } static char *input; @@ -234,10 +240,43 @@ static inline char *_strdup(char *str) return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); } +static inline size_t tillch(char *str, size_t len, char ch) +{ + for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; + return len; +} + +static inline char *escapeNL(char *str) +{ +// static char new[256]; +// char *new_ptr = new; + +// #define addch(ch) if(new_ptr - new > sizeof(new) - 1) { \ +// fprintf(stderr, "ERROR: escapeNL cap reached"); return NULL; \ +// } else *new_ptr++ = ch; + +// for(size_t i = 0; i < strlen(str); i++) +// if(str[i] == '\n') { +// addch('\\'); addch('n'); +// continue; +// } else addch(str[i]); + +// *new_ptr = '\0'; +// return new; + + // temp + for(size_t i = 0; i < strlen(str); i++) if(str[i] == '\n') str[i] = ' '; + return str; +} + static inline char *substring(char *str, size_t sub_end) { - static char sub[128]; - if(sub_end+1 > sizeof(sub)) return NULL; + // static char sub[256]; + static char sub[512]; + if(sub_end+1 > sizeof(sub)) { + fprintf(stderr, "ERROR: substring cap reached"); + return NULL; + } sub[sub_end] = '\0'; return memcpy(sub, str, sub_end); @@ -282,6 +321,7 @@ static char *next_token(char *str) else if(strcmp(s, "left") == 0) tok.s = LEFT; else if(strcmp(s, "right") == 0) tok.s = RIGHT; else if(strcmp(s, "noprec") == 0) tok.s = NOPREC; + else if(strcmp(s, "stacktype") == 0) tok.s = STYPE; else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } break; case '{': @@ -290,7 +330,7 @@ static char *next_token(char *str) else if(str[off] == '{') c++; else if(str[off] == '}') c--; tok.s = ACTION; - tok.v = (intptr_t)strdup(substring(str, off)); + tok.v = (intptr_t)strdup(escapeNL(substring(str, off))); break; } } else if(isalpha(c0)) { // iden or named symbol diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp index df5bdcc..3750623 100644 --- a/demos/sample-files/lbp-code.lbp +++ b/demos/sample-files/lbp-code.lbp @@ -18,9 +18,7 @@ inbounds/int-function(low, high, val) { (31 |_, ---). }, -:aircraft_iden/struct { --. -}, +:aircraft_iden/struct { -. tova_tuk_e_sintaktichna_greshka. }, :message/struct { DF/enum(:downlinkfmt) |5, diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c index ae0a17f..bf7bdca 100644 --- a/demos/sample-files/lbp-skeleton.c +++ b/demos/sample-files/lbp-skeleton.c @@ -4,34 +4,84 @@ #include <stdint.h> #include <ctype.h> -// TODO: lr parser is bad for debugging +// TODO: - lr parser is bad for debugging +// - deal with errors #define INPUT_CAP 4096 -#define ARENA_CAP 4096 +#define ARENA_CAP 8192 //4096 #define ARENA_IMPLEMENTATION #include "util/arena.h" static char buf[ARENA_CAP]; static struct arena_ctx global_arena; -static void *xalloc(size_t sz) { - void *addr = arena_allocate(&global_arena, sz); +static void *xalloc(size_t size) { + void *addr = arena_allocate(&global_arena, size); if(!addr) { fprintf(stderr, "ERROR: Arena empty\n"); exit(1); } return addr; } +static void *xcalloc(size_t nmemb, size_t size) { return xalloc(nmemb * size); } +static void xfree(void *ptr) { (void)ptr; return; } + +// macros to create the g_ variant of the function +// where everything is cast to and from intptr_t +// so it plays nicely with the lr parser +// #define EMPTY() +// #define DEFER(m) m EMPTY() +// #define EVAL3(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) +// #define EVAL2(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) +// #define EVAL1(...) __VA_ARGS__ + +// #define A1(type, name) type name +// #define A2(type, name) intptr_t name +// #define A3(type, name) (type)name + +// #define _map2() map2 +// #define map2(F, type, name, ...) F(type, name) __VA_OPT__(, DEFER(_map2)()(F, __VA_ARGS__)) +// #define map(a, ...) DEFER(_map2)()(a, EVAL1 __VA_ARGS__) + +// #define g(ret, name, args) EVAL3( \ + ret name(map(A1, args)); \ + intptr_t g_##name(map(A2, args)) \ + { return (intptr_t)name(map(A3, args)); } \ + ret name(map(A1, args))) -// other things here #include "util/list.h" -static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) -{ - if(head) list_add(new, head); - return new; -} -#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new) +struct ast_strlist { char *str; struct list_head list; }; + +struct ast_expr { + enum { AST_NUMBER, AST_VARIABLE, AST_FIELDLIST, AST_DECLARATION, AST_DEFINITION, AST_OPERATION, AST_PARENLIST } type; + union { + int number; + struct ast_vrbl { int is_atom; char *iden; } variable; + struct ast_fiel { struct ast_vrbl variable; struct list_head *fields_strlist; } fieldlist; + struct ast_decl { struct ast_vrbl variable; int typed; } declaration; + struct ast_defn { struct ast_decl declartion; struct list_head *block_exprlist; } definition; + struct ast_oprn { enum { AST_OP_AND, AST_OP_OR } optype; struct list_head *left_exprlist; struct list_head *right_exprlist; } operation; + struct list_head *paren_exprlist; + }; + struct list_head list; +}; + +#define NEW(t, ...) ((struct ast_##t){__VA_ARGS__}) +#define g_NEW(t, ...) ((stack_item){.t = (struct ast_##t){__VA_ARGS__}}) + +#define LST(v) ({ typeof(v) *r = xalloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; }) +#define g_LST(v) ((stack_item){.list = LST(v)}) + +void ast_vrbl_print(struct ast_vrbl *vrbl); +void ast_fiel_print(struct ast_fiel *fiel); +void ast_decl_print(struct ast_decl *decl); +void ast_defn_print(struct ast_defn *defn); +void ast_oprn_print(struct ast_oprn *oprn); +void ast_expr_print(struct ast_expr *expr); +void ast_exprlist_print(struct list_head *list); + +void ast_exprlist_free(struct list_head *list); // generated #include "bin/lbp.h" @@ -57,23 +107,31 @@ static uint8_t dict_lowercase_char_to_bit[256] = { ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; - #include "parts/toklist.h" struct token { symbol s; - intptr_t v; + stack_item v; }; - -#include "util/queue.h" -QUEUE_GENERATE(tokbuf, struct token, 16) +#define TOKEN_INIT(sym, val) (struct token){ .s = sym, .v = val } +static void print_token(struct token *t); symbol token_sym(struct token *t) { return t->s; } -intptr_t token_val(struct token *t) { return t->v; } +intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } -static void print_token(struct token *t); +static char *input; +static size_t line = 1; +static size_t active_region; static char *next_token(char *str); -static char *input; +static inline char *substring(char *str, size_t sub_end); +static inline char *linestart(char *strstart, char *pos); +static inline size_t tillch(char *str, size_t len, char ch); +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str); + + +#include "util/queue.h" +QUEUE_GENERATE(tokbuf, struct token, 16) struct token *toklist_eat() { @@ -85,7 +143,7 @@ struct token *toklist_eat() struct token *toklist_peek() { static struct token t; - tokbuf_peek(&t); // err not checked + tokbuf_peek(&t); // err not checked return &t; } @@ -94,6 +152,8 @@ struct token *toklist_peek() { int main(void) { + char *filename = "stdin"; + static char input_buf[INPUT_CAP]; if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { fprintf(stderr, "INPUT_CAP reached\n"); @@ -103,6 +163,7 @@ int main(void) global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); + // DICT_SET_ALLOCATOR(&types_dict, xcalloc, xfree); dict_compile(&types_dict); input = next_token(input_buf); @@ -113,22 +174,43 @@ int main(void) // if(token_sym(tok) == END_INPUT) break; // } return 0; - intptr_t value; - if(lr_parser(&value)) { - fprintf(stderr, input); - return 1; + stack_item value; + struct lr_errinfo *errinfo; + if((errinfo = lr_parser(&value))) { + char *l = linestart(input_buf, input); + + fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, line, input - l - active_region+1, lr_err_str(errinfo)); + + size_t indent = fprintf(stderr, " %zu ", line); + fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n'))); + + fprintf(stderr, "%*s| %*s", indent, "", input - l - active_region, ""); + if(active_region == 0) active_region = 1; + fprintf(stderr, "^"); for(size_t i = 0; i < active_region-1; i++) fprintf(stderr, "~"); + + fprintf(stderr, "\n\n"); + goto cleanup; } - fprintf(stderr, "OUTPUT: %jd\n", value); + ast_exprlist_print(value.list); + // ast_exprlist_free(value.list); +cleanup: dict_free(&types_dict); return 0; } static void print_token(struct token *tok) { + // TODO: unfinished function printf("%s\n", symbol_to_str[token_sym(tok)]); - if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok)); + switch(token_sym(tok)) { + case IDEN: + case ATOM: + printf(" %s\n", (char *)token_val(tok)); + break; + default: break; + } } // STR UTIL @@ -150,6 +232,12 @@ static inline char *substring(char *str, size_t sub_end) return memcpy(sub, str, sub_end); } +static inline char *linestart(char *strstart, char *pos) +{ + while(pos-- > strstart) if(*pos == '\n') return pos+1; + return strstart; +} + static inline size_t tillch(char *str, size_t len, char ch) { for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; @@ -160,8 +248,10 @@ static inline size_t tillch(char *str, size_t len, char ch) static inline int issep(char c) { - return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || - c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; + return isspace(c) || + c == '\0' || c == '/' || c == ',' || c == ';' || + c == '.' || c == '(' || c == ')' || c == '{' || + c == '}'; } static inline int tillsep(char *str) @@ -180,10 +270,9 @@ static char *typelist_tokenize(char *str) int s = dict_check(&types_dict, substring(str, off)); if(s < 0) { fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); - return NULL; + } else { + tokbuf_enqueue(&TOKEN_INIT(s, { .num = s })); } - - tokbuf_enqueue(&(struct token){.s = s, .v = s}); } str += off; @@ -191,9 +280,12 @@ static char *typelist_tokenize(char *str) switch(str[0]) { case '-': return typelist_tokenize(str+1); case '(': + size_t depth = 0; while((str = next_token(str))) - if(*(str-1)== ')') { // not really - if(str[0] == '-') return typelist_tokenize(str+1); + if(str[0] == '(') depth++; + else if(*(str-1) == ')') { + if(depth > 0) depth--; + else if(str[0] == '-') return typelist_tokenize(str+1); else return str; } return NULL; @@ -209,8 +301,10 @@ static char *next_token(char *str) size_t off = 0; char c0 = str[0]; - if(c0 == '\0') tok.s = END_INPUT; + if(c0 == '\n') line++; if(isspace(c0)) return next_token(str+1); + + if(c0 == '\0') tok.s = END_INPUT; else { off = tillsep(str); if(off == 0) { // sep @@ -223,26 +317,28 @@ static char *next_token(char *str) case '{': tok.s = LBRACE; break; case '}': tok.s = RBRACE; break; case '/': + char *s2 = str; tok.s = TYPELIST_START; tokbuf_enqueue(&tok); - if(!(str = typelist_tokenize(str+off))) goto fail; + if(!(s2 = typelist_tokenize(s2+off))) goto fail; tok.s = TYPELIST_END; tokbuf_enqueue(&tok); - return str; - default: break; + active_region = s2 - str; return s2; + default: fprintf(stderr, "ERROR: Unknown sep '%c'\n", c0); break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; - tok.v = (intptr_t)atoi(substring(str, off)); // not really + tok.v = (stack_item){ .num = atoi(substring(str, off)) }; // not really } else { // iden or atom (possibly with fields) + active_region = off; int hasfield = 0; size_t sub_off; do { sub_off = tillch(str + 1, off - 1, ':') + 1; - if(hasfield) - tokbuf_enqueue(&(struct token){.s = COLON, .v = 0}); + if(hasfield) tokbuf_enqueue(&TOKEN_INIT(COLON, { .num=0 })); - tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN, - .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))}); + tokbuf_enqueue( + &TOKEN_INIT((!hasfield && str[0] == ':') ? ATOM : IDEN, + { .str = strdup(substring(str+hasfield, sub_off-hasfield))})); } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); return str; @@ -250,9 +346,92 @@ static char *next_token(char *str) } tokbuf_enqueue(&tok); + active_region = off; return str+off; fail: - tokbuf_enqueue(&(struct token){.s = END_INPUT}); - return NULL; + exit(14); + // tok.s = END_INPUT; tokbuf_enqueue(&tok); + // return NULL; +} + +// ast printing + +void ast_vrbl_print(struct ast_vrbl *vrbl) +{ + printf("%s%s", vrbl->is_atom ? ":" : "", vrbl->iden); +} + +void ast_fiel_print(struct ast_fiel *fiel) +{ + ast_vrbl_print(&fiel->variable); + list_for_each_entry(struct ast_strlist, entry, list, fiel->fields_strlist) { + printf(":%s", entry->str); + } +} + +void ast_decl_print(struct ast_decl *decl) +{ + // TODO: implement +} + +void ast_defn_print(struct ast_defn *defn) +{ + // TODO: implement +} + +void ast_oprn_print(struct ast_oprn *oprn) +{ + ast_exprlist_print(oprn->left_exprlist); + if(oprn->optype == AST_OP_AND) printf(",\n"); else printf(";\n"); + ast_exprlist_print(oprn->right_exprlist); +} + +void ast_expr_print(struct ast_expr *expr) { ast_exprlist_print(&expr->list); } +void ast_exprlist_print(struct list_head *list) +{ + list_for_each_entry(struct ast_expr, entry, list, list) + { + switch(entry->type) { + case AST_NUMBER: printf("%d", entry->number); break; + case AST_VARIABLE: ast_vrbl_print(&entry->variable); break; + case AST_FIELDLIST: ast_fiel_print(&entry->fieldlist); break; + case AST_DECLARATION: ast_decl_print(&entry->declaration); break; + case AST_OPERATION: ast_oprn_print(&entry->operation); break; + case AST_PARENLIST: + printf("("); + ast_exprlist_print(entry->paren_exprlist); + printf(")"); + break; + default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); + } + printf(" "); + } + printf("\n"); +} + +void ast_exprlist_free(struct list_head *list) +{ + list_for_each_safe(l, list) { + struct ast_expr *entry = list_entry(l, typeof(*entry), list); + + switch(entry->type) { + case AST_NUMBER: break; + case AST_VARIABLE: break; + case AST_FIELDLIST: + list_for_each_safe(l, entry->fieldlist.fields_strlist) + free(list_entry(l, struct ast_strlist, list)); + break; + case AST_DECLARATION: break; + case AST_DEFINITION: break; + case AST_OPERATION: + ast_exprlist_free(entry->operation.left_exprlist); + ast_exprlist_free(entry->operation.right_exprlist); + break; + case AST_PARENLIST: ast_exprlist_free(entry->paren_exprlist); break; + default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); + } + + free(entry); + } } diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g index bc82cb3..1dd176c 100644 --- a/demos/sample-files/lbp.g +++ b/demos/sample-files/lbp.g @@ -8,28 +8,44 @@ -nonterminal S exprlist expr sym fieldlist basetype subtypelist. +-stacktype { union { + int num; + char *str; + + struct ast_vrbl vrbl; + struct ast_fiel fiel; + struct ast_decl decl; + struct ast_defn defn; + struct ast_oprn oprn; + + struct ast_expr expr; + struct ast_strlist strlist; + struct list_head *list; + }}. + -left LPAREN; -left COMMA SEMICOL. -S: exprlist DOT {}; +S: exprlist DOT { v = A(0); }; -exprlist: expr {} - | exprlist expr {} - | exprlist COMMA exprlist {} - | exprlist SEMICOL exprlist {}; +exprlist: expr { v = g_LST(A(0).expr); } + | expr exprlist { v = g_LST(A(0).expr); v.list->next = A(1).list; } + | exprlist COMMA exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_AND, .left_exprlist = A(0).list, .right_exprlist = A(2).list))); } + | exprlist SEMICOL exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_OR, .left_exprlist = A(0).list, .right_exprlist = A(2).list)));}; -expr: NUM {} - | sym {} - | sym fieldlist {} - | sym TYPELIST_START basetype TYPELIST_END {} - | sym TYPELIST_START basetype subtypelist TYPELIST_END {} - | LBRACE exprlist DOT RBRACE {} - | LPAREN exprlist RPAREN {}; +expr: NUM { v = g_NEW(expr, .type = AST_NUMBER, .number = A(0).num); } + | sym { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } + | sym fieldlist { v = g_NEW(expr, .type = AST_FIELDLIST, .fieldlist = NEW(fiel, .variable = A(0).vrbl, .fields_strlist = A(1).list)); } + | sym TYPELIST_START basetype TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } + | sym TYPELIST_START basetype subtypelist TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } + | LBRACE exprlist DOT RBRACE { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); } + | LPAREN exprlist RPAREN { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); }; -sym: IDEN {} | ATOM {}; +sym: IDEN { v = g_NEW(vrbl, .is_atom = 0, .iden = A(0).str); } + | ATOM { v = g_NEW(vrbl, .is_atom = 1, .iden = A(0).str); }; -fieldlist: COLON IDEN {} - | fieldlist fieldlist {}; +fieldlist: COLON IDEN { v = g_LST(NEW(strlist, .str = A(1).str)); } + | fieldlist fieldlist { A(0).list->next = A(1).list; v = A(0); }; basetype: T_INT {} | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {} diff --git a/lr-parser.c b/lr-parser.c index 336c222..a909f7f 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -12,84 +12,118 @@ #include "parts/table.h" #include "parts/toklist.h" // and -typedef intptr_t (*semantic_action_fn)(intmax_t *stack_head); +typedef stack_item (*semantic_action_fn)(stack_item *item_head); extern semantic_action_fn *semantic_actions; -typedef intmax_t stack_item; - #define STACK_CAP 128 -static stack_item stack_bottom[STACK_CAP]; -static stack_item *stack_head = stack_bottom; +static stack_item item_bottom[STACK_CAP]; +static stack_item *item_head = item_bottom; + +static int state_bottom[STACK_CAP]; +static int *state_head = state_bottom; static void print_stack() { fprintf(stderr, "STACK: { "); - for(stack_item *s = stack_bottom+1; s <= stack_head; s += 3) + for(int *s = state_bottom+1; s <= state_head; s += 2) fprintf(stderr, "%s ", symbol_to_str[*(symbol *)s]); fprintf(stderr, "}\n\n"); } -int lr_parser(intptr_t *value) +struct lr_errinfo { + enum lr_errtype { LR_ERR_STACKCAP_EXCEEDED, LR_ERR_UNEXPECTED_SYMBOL, LR_ERR_NO_GOTO_ENTRY } type; + union { + size_t stack_cap; + struct { symbol sym; size_t state; } idx; + }; +}; + +struct lr_errinfo *lr_parser(void *value) { -#define push(item) do { \ - if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \ - else { fprintf(stderr, "ERROR: STACK_CAP exceeded\n"); print_stack(); return 1; } \ + static struct lr_errinfo errinfo; + +#define push(stack_head, stack_bottom, item) do { \ + if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \ + else { errinfo = (struct lr_errinfo){.type = LR_ERR_STACKCAP_EXCEEDED, .stack_cap = STACK_CAP }; return &errinfo; } \ } while(0) -#define pop() (--stack_head) +#define pop(stack_head) (--stack_head) + +#define spush(item) push(state_head, state_bottom, item) +#define spop() pop(state_head) +#define ipush(item) push(item_head, item_bottom, item) +#define ipop() pop(item_head) + #define eat() toklist_eat() #define peek() toklist_peek() while(1) { - struct action a = table[(size_t)*stack_head][token_sym(peek())]; + struct action a = table[(size_t)*state_head][token_sym(peek())]; switch(a.type) { case ACTION_SHIFT:; struct token *t = eat(); - push(token_sym(t)); - push(token_val(t)); - push(a.arg); + ipush(*(stack_item*)token_val(t)); + spush(token_sym(t)); + spush(a.arg); #ifdef _LR_PARSER_DEBUG fprintf(stderr, "SHIFT %s\n", symbol_to_str[token_sym(t)]); print_stack(); #endif break; case ACTION_REDUCE: - intptr_t semantic_value = semantic_actions[a.arg](stack_head); - for(size_t i = 0; i < 3*grammar[a.arg].nRHS; i++) pop(); + stack_item semantic_value = semantic_actions[a.arg](item_head); + for(size_t i = 0; i < grammar[a.arg].nRHS; i++) { + ipop(); spop(); spop(); + } symbol lhs = grammar[a.arg].LHS; - struct action a_goto = table[(size_t)*stack_head][lhs]; + struct action a_goto = table[(size_t)*state_head][lhs]; if(a_goto.type != ACTION_GOTO) { - fprintf(stderr, - "ERROR: Expected goto action for token '%d' at state %zu\n", - lhs, (size_t)*stack_head); - return 1; + errinfo = (struct lr_errinfo){.type = LR_ERR_NO_GOTO_ENTRY, .idx = {lhs, (size_t)*state_head}}; + return &errinfo; } - push(lhs); - push(semantic_value); - push(a_goto.arg); + ipush(semantic_value); + spush(lhs); + spush(a_goto.arg); #ifdef _LR_PARSER_DEBUG fprintf(stderr, "READUCE %s\n", symbol_to_str[lhs]); + print_stack(); #endif break; case ACTION_ACCEPT: - for(size_t i = 0; i < 3; i++) push(0); // todo: better fix for reducing the final production expecting an END_INPUT on the stack - *value = semantic_actions[0](stack_head); - return 0; + ipush((stack_item){0}); + spush(0); spush(0); + // todo: better fix for reducing the final production expecting an END_INPUT on the stack + *(stack_item *)value = semantic_actions[0](item_head); + return NULL; case ACTION_NOT_SET: default: - fprintf(stderr, - "ERROR: Unexpected symbol '%s' at state %zu\n", - symbol_to_str[token_sym(peek())], (size_t)*stack_head); - // Expected ... - print_stack(); - return 1; + errinfo = (struct lr_errinfo){.type = LR_ERR_UNEXPECTED_SYMBOL, .idx = {token_sym(peek()), (size_t)*state_head}}; + return &errinfo; } } +} + +char *lr_err_str(struct lr_errinfo *errinfo) +{ + // TODO: check if strbuf cap is exceeded + static char strbuf[128]; + + switch(errinfo->type) { + case LR_ERR_STACKCAP_EXCEEDED: + snprintf(strbuf, sizeof(strbuf), "LR parser stack capacity of %zu has been exceeded", errinfo->stack_cap); + break; + case LR_ERR_UNEXPECTED_SYMBOL: + snprintf(strbuf, sizeof(strbuf), "Unexpected symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state); + break; + case LR_ERR_NO_GOTO_ENTRY: + snprintf(strbuf, sizeof(strbuf), "No GOTO state for symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state); + break; + } - return 1; + return strbuf; } #ifdef _LR_PARSER_STANDALONE @@ -107,6 +141,7 @@ enum symbol { SYMBOLS_END, }; +char **symbol_to_str = (char*[]){"+", "-", "(", ")", "0", "1", "IN_END", "EP", "E", "T", "N", "S_END"}; size_t total_symbols = SYMBOLS_END; IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } @@ -152,7 +187,7 @@ size_t table_states = 13; // implement toklist.h struct token { symbol s; - int v; + intmax_t v; }; static struct token toklist[] = {{N0, 0}, {PLUS, 0}, {N1, 0}, {END_INPUT, 0}}; @@ -167,18 +202,18 @@ struct token *toklist_eat() } struct token *toklist_peek() { return toklist + tok; } -symbol token_sym(struct token *t) { return t->s; } -int token_val(struct token *t) { return t->v; } +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } -intptr_t none(intmax_t *stack_head) {(void)stack_head; return 0;} +intmax_t none(intmax_t *stack_head) {(void)stack_head; return 0;} semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none}; int main(void) { - intptr_t value; + intmax_t value; if(lr_parser(&value)) return 1; - printf("%jd\n", value); + printf("%ld\n", value); return 0; } diff --git a/util/dict.c b/util/dict.c index 75fcf3b..d50b07b 100644 --- a/util/dict.c +++ b/util/dict.c @@ -4,7 +4,7 @@ #include "util/dict.h" -#define CHAR_TO_PTR(c) (bit_to_ptr[char_to_bit[c]]) +#define CHAR_TO_PTR(c) (bit_to_ptr[char_to_bit(c)]) #define popcount(x) (__builtin_popcount(x)) #define start_level d->start_level @@ -12,17 +12,19 @@ #define num_levels d->num_levels #define strings d->strings #define nstrings d->nstrings -#define char_to_bit d->char_to_bit +#define char_to_bit(c) d->char_to_bit[(size_t)(c)] int dict_compile(struct dict *d) { + void *(*new_calloc)(size_t, size_t) = d->calloc ? d->calloc : calloc; + // max number of levels for(size_t i = 0; i < nstrings; i++) if(strlen(strings[i].s) > num_levels) num_levels = strlen(strings[i].s); // allocated levels for(size_t i = 0; i < MAPPED_CHARS; i++) { - bit_to_ptr[i] = calloc(num_levels, sizeof(*bit_to_ptr[i])); + bit_to_ptr[i] = new_calloc(num_levels, sizeof(*bit_to_ptr[i])); if(!bit_to_ptr[i]) return 1; } @@ -32,7 +34,7 @@ int dict_compile(struct dict *d) for(size_t i = 0; i < nstrings; i++) { struct level *l = &start_level; for(size_t j = 0; j < strlen(strings[i].s)+1; j++) { - uint8_t bit = char_to_bit[strings[i].s[j]]; + uint8_t bit = char_to_bit(strings[i].s[j]); l->bit_mask |= 1 << bit; l = &bit_to_ptr[bit][j]; @@ -44,10 +46,12 @@ int dict_compile(struct dict *d) for(size_t i = 0; i < MAPPED_CHARS; i++) { struct level *l = &start_level; for(size_t j = 0; j < num_levels + 1; j++) { - l->token_masks = realloc(l->token_masks, popcount(l->bit_mask) - * sizeof(*l->token_masks)); - memset(l->token_masks, 0, popcount(l->bit_mask) - * sizeof(*l->token_masks)); + if(!l->token_masks) { + l->token_masks = new_calloc(popcount(l->bit_mask), sizeof(*l->token_masks)); + if(!l->token_masks) return 1; + memset(l->token_masks, 0, popcount(l->bit_mask) + * sizeof(*l->token_masks)); + } l = &bit_to_ptr[i][j]; } @@ -57,7 +61,7 @@ int dict_compile(struct dict *d) for(size_t i = 0; i < nstrings; i++) { struct level *l = &start_level; for(size_t j = 0; j < strlen(strings[i].s)+1; j++) { - uint8_t bit = char_to_bit[strings[i].s[j]]; + uint8_t bit = char_to_bit(strings[i].s[j]); uint8_t idx = popcount(l->bit_mask & ((1 << bit) - 1)); l->token_masks[idx] |= 1 << strings[i].t; @@ -93,13 +97,15 @@ void dict_print(struct dict *d) void dict_free(struct dict *d) { - free(start_level.token_masks); + void (*new_free)(void *) = d->free ? d->free : free; + + new_free(start_level.token_masks); for(size_t i = 0; i < MAPPED_CHARS; i++) { for(size_t j = 0; j < num_levels; j++) { if(bit_to_ptr[i][j].token_masks) - free(bit_to_ptr[i][j].token_masks); + new_free(bit_to_ptr[i][j].token_masks); } - free(bit_to_ptr[i]); + new_free(bit_to_ptr[i]); } } @@ -110,9 +116,9 @@ int dict_check(struct dict *d, char *string) for(size_t i = 0; i < strlen(string) + 1; i++) { struct level *l = (i == 0) ? &start_level - : &bit_to_ptr[char_to_bit[string[i-1]]][i-1]; + : &bit_to_ptr[char_to_bit(string[i-1])][i-1]; - uint8_t bit = char_to_bit[string[i]]; + uint8_t bit = char_to_bit(string[i]); if((l->bit_mask & (1 << bit)) == 0) return -1; @@ -175,7 +181,7 @@ int main(void) d.strings = strings; d.nstrings = nstrings; d.char_to_bit = char_to_bit; - + dict_compile(&d); int t; diff --git a/util/dict.h b/util/dict.h index 2da8e6f..b19e766 100644 --- a/util/dict.h +++ b/util/dict.h @@ -26,9 +26,18 @@ struct dict { struct level start_level; struct level *bit_to_ptr[MAPPED_CHARS]; size_t num_levels; + + // allocator + void *(*calloc)(size_t, size_t); + void (*free)(void *); }; -#define DICT_INIT(strings_, nstrings_, char_to_bit_) (struct dict){.strings = strings_, .nstrings = nstrings_, .char_to_bit = char_to_bit_} +#define DICT_INIT(strings_, nstrings_, char_to_bit_) (struct dict){.strings = strings_, .nstrings = nstrings_, .char_to_bit = char_to_bit_, .calloc = NULL, .free = NULL} + +#define DICT_SET_ALLOCATOR(dict, calloc_, free_) do { \ + (dict)->calloc = (calloc_); \ + (dict)->free = (free_); \ + } while(0) int dict_compile(struct dict *d); void dict_free(struct dict *d); diff --git a/util/list.h b/util/list.h index d9531ea..73eb900 100644 --- a/util/list.h +++ b/util/list.h @@ -5,16 +5,13 @@ #define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member))) struct list_head { - struct list_head *prev; struct list_head *next; }; #define LIST_END NULL -#define LIST_EMPTY(list) do { \ - (list)->next = LIST_END; \ - (list)->prev = LIST_END; \ - } while(0); +#define LIST_EMPTY(list) (list)->next = LIST_END +#define LIST_INIT_EMPTY() (struct list_head){.next = NULL} #define list_entry(ptr, type, member) \ container_of(ptr, type, member) @@ -35,44 +32,21 @@ struct list_head { pos && (__next = pos->next,1); \ pos = __next) -static inline int list_is_head(struct list_head *l) -{ - return l->prev == LIST_END; -} - static inline int list_is_tail(struct list_head *l) { return l->next == LIST_END; } -static inline struct list_head *list_get_head(struct list_head *l) -{ - while(!list_is_head(l)) l = l->prev; - return l; -} - static inline struct list_head *list_get_tail(struct list_head *l) { while(!list_is_tail(l)) l = l->next; return l; } -static inline struct list_head *list_add( - struct list_head *head, - struct list_head *new) -{ - if(head) { - // new->next = head->next; - head->next = new; - } - new->prev = head; - return new; -} - static inline size_t list_len(struct list_head *head) { size_t n = 0; - list_for_each(pos, list_get_head(head)) n++; + list_for_each(pos, head) n++; return n; } |
