diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-09-24 00:06:54 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-09-24 00:06:54 +0300 |
| commit | fec8e3a95becfb3dc2a3eb0f512a120a7a4551c5 (patch) | |
| tree | 8d3da9f0c3fa36f5e06bc49cc1bfdc0db3099ea1 | |
| parent | db1b9c8dcb0d115217a33c2fe8e0760d49143e11 (diff) | |
debug info through the lr parser
| -rw-r--r-- | demos/generate-parser.c | 8 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 5 | ||||
| -rw-r--r-- | demos/sample-files/lbp-code.lbp | 2 | ||||
| -rw-r--r-- | demos/sample-files/lbp-skeleton.c | 267 | ||||
| -rw-r--r-- | demos/sample-files/lbp.g | 66 | ||||
| -rw-r--r-- | lr-parser.c | 73 |
6 files changed, 285 insertions, 136 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c index e0ec5f5..c9c71f3 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -170,14 +170,14 @@ int main(int argc, char **argv) for(size_t i = 0; i < total_productions; i++) { printf("#define A(n) (*(stack_head-%zu+n))\n", grammar[i].nRHS-1); - printf("stack_item __prod%zu_action(stack_item *stack_head)\n", i); + printf("void *__prod%zu_action(stack_item *r, stack_item *stack_head)\n", i); printf("{ stack_item v = {0};\n"); puts(semantic_action_str[i]); - printf("return v; }\n"); + printf("*r = v; return NULL; }\n"); printf("#undef A\n"); } - printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n"); + printf("typedef void *(*semantic_action_fn)(stack_item *, stack_item *);\n"); printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n"); for(size_t i = 0; i < total_productions; i++) @@ -194,7 +194,7 @@ int main(int argc, char **argv) printf("#include \"parts/grammar.h\"\n"); printf("#include \"parts/table.h\"\n"); printf("typedef %s stack_item;\n", stack_item_type); - printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n"); + printf("typedef void *(*semantic_action_fn)(stack_item *, stack_item *);\n"); printf("extern semantic_action_fn *semantic_actions;\n"); printf("#endif\n"); set_stdout(NULL); diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c index 9898c6b..d9616cc 100644 --- a/demos/sample-files/gram-skeleton.c +++ b/demos/sample-files/gram-skeleton.c @@ -222,8 +222,9 @@ int main(void) input = next_token(input_buf); - intptr_t value; - if(lr_parser(&value)) { + struct lr_parseinfo parseinfo; + intptr_t value = *(intptr_t *)lr_parser(&parseinfo); + if(parseinfo.type) { fprintf(stderr, input); return 1; } diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp index 3750623..8339951 100644 --- a/demos/sample-files/lbp-code.lbp +++ b/demos/sample-files/lbp-code.lbp @@ -18,7 +18,7 @@ inbounds/int-function(low, high, val) { (31 |_, ---). }, -:aircraft_iden/struct { -. tova_tuk_e_sintaktichna_greshka. }, +:aircraft_iden/struct { -, tova_tuk_e_sintaktichna_greshka. }, :message/struct { DF/enum(:downlinkfmt) |5, diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c index bf7bdca..1ee54d8 100644 --- a/demos/sample-files/lbp-skeleton.c +++ b/demos/sample-files/lbp-skeleton.c @@ -3,12 +3,20 @@ #include <string.h> #include <stdint.h> #include <ctype.h> +#include <stdarg.h> -// TODO: - lr parser is bad for debugging -// - deal with errors +// TODO: - lr parser is bad for debugging (now its better) +// - deal with errors (the token queue for example)!!!! +// - debuginfo in the token that gets propagaded through the +// stack_items and lr_parse returns a generic errorinfo +// with user-implemented compilation error messages +// - ast should show the specific operation (match, assignment, etc) + +#define MIN(a, b) ((a) > (b) ? (b) : (a)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define INPUT_CAP 4096 -#define ARENA_CAP 8192 //4096 +#define ARENA_CAP 4096 * 3 #define ARENA_IMPLEMENTATION #include "util/arena.h" @@ -51,40 +59,75 @@ static void xfree(void *ptr) { (void)ptr; return; } #include "util/list.h" -struct ast_strlist { char *str; struct list_head list; }; +struct datatype { + int typeflags; + // TODO: add ptr + char *iden; + struct list_head *function_exprlist; +}; + +struct strlist { char *str; struct list_head list; }; + +// TODO: +// - type of each each exprlist, be it assignement, match, function call, etc +// - revise expr types, etc + +// struct ast_exprlist { +// enum { AST_OP_ASSIGNMENT, AST_OP_TYPE, AST_OP_CALL } type; +// struct list_head *exprlist; +// } + +// struct ast_exprlist { +// enum { AST_OP_ASSIGNMENT, AST_OP_IMPLEMENT, AST_OP_MATCH, AST_OP_EQUAL, AST_OP_CALL } type; +// struct list_head *exprlist; +// }; + +// struct ast_expr { +// enum { AST_LITERAL, AST_IDENTIFIER, AST_DECLARATION, AST_SUB } type; + +// struct datatype datatype; +// struct list_head list; +// } struct ast_expr { - enum { AST_NUMBER, AST_VARIABLE, AST_FIELDLIST, AST_DECLARATION, AST_DEFINITION, AST_OPERATION, AST_PARENLIST } type; + enum { AST_NUMBER, AST_VARIABLE, AST_DECLARATION, AST_FIELDLIST, AST_OPERATION, AST_PARENLIST, AST_BRACELIST } type; union { int number; struct ast_vrbl { int is_atom; char *iden; } variable; struct ast_fiel { struct ast_vrbl variable; struct list_head *fields_strlist; } fieldlist; - struct ast_decl { struct ast_vrbl variable; int typed; } declaration; - struct ast_defn { struct ast_decl declartion; struct list_head *block_exprlist; } definition; + struct ast_oprn { enum { AST_OP_AND, AST_OP_OR } optype; struct list_head *left_exprlist; struct list_head *right_exprlist; } operation; - struct list_head *paren_exprlist; + struct list_head *exprlist; }; + + struct datatype datatype; struct list_head list; }; -#define NEW(t, ...) ((struct ast_##t){__VA_ARGS__}) -#define g_NEW(t, ...) ((stack_item){.t = (struct ast_##t){__VA_ARGS__}}) +#define AST(t, ...) ((struct ast_##t){__VA_ARGS__}) +#define g_AST(t, ...) ((stack_item){.t = {__VA_ARGS__}}) -#define LST(v) ({ typeof(v) *r = xalloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; }) -#define g_LST(v) ((stack_item){.list = LST(v)}) +#define NEW(t, ...) ((struct t){__VA_ARGS__}) +#define g_NEW(t, ...) ((stack_item){.t = {__VA_ARGS__}}) -void ast_vrbl_print(struct ast_vrbl *vrbl); -void ast_fiel_print(struct ast_fiel *fiel); -void ast_decl_print(struct ast_decl *decl); -void ast_defn_print(struct ast_defn *defn); -void ast_oprn_print(struct ast_oprn *oprn); -void ast_expr_print(struct ast_expr *expr); -void ast_exprlist_print(struct list_head *list); +#define LST(v) ({ typeof(v) *r = malloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; }) +#define g_LST(v) ((stack_item){.list = LST(v)}) -void ast_exprlist_free(struct list_head *list); +// void ast_decide_datatype(struct list_head *list); +void ast_print(struct list_head *list); +void ast_free(struct list_head *list); // generated #include "bin/lbp.h" + +enum { DATATYPE_INT = 0, DATATYPE_STRUCT, DATATYPE_ENUM }; +enum { SUBTYPE_FUNCTION = 1 << 0, SUBTYPE_LITTLE = 1 << 1, + SUBTYPE_BIG = 1 << 2, SUBTYPE_NATIVE = 1 << 3 }; + +#define TYPEFLAGS(type, subtype) (((subtype) << 3) | (type)) +#define flags2type(flags) ((flags) & 7) +#define flags2subtype(flags) ((flags) >> 3) + #include "bin/lbp.c" #include "util/dict.h" @@ -107,32 +150,31 @@ static uint8_t dict_lowercase_char_to_bit[256] = { ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; +static inline char *substring(char *str, size_t sub_end); +static inline char *linestart(char *strstart, char *pos); +static inline size_t tillch(char *str, size_t len, char ch); +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str); + #include "parts/toklist.h" struct token { symbol s; stack_item v; }; #define TOKEN_INIT(sym, val) (struct token){ .s = sym, .v = val } -static void print_token(struct token *t); - symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } +static void print_token(struct token *t); -static char *input; static size_t line = 1; static size_t active_region; static char *next_token(char *str); -static inline char *substring(char *str, size_t sub_end); -static inline char *linestart(char *strstart, char *pos); -static inline size_t tillch(char *str, size_t len, char ch); -#define strdup(...) _strdup(__VA_ARGS__) -static inline char *_strdup(char *str); - - #include "util/queue.h" QUEUE_GENERATE(tokbuf, struct token, 16) +static char *input; + struct token *toklist_eat() { static struct token t; @@ -147,13 +189,22 @@ struct token *toklist_peek() { return &t; } +struct debuginfo { + size_t line; + size_t active_region; + char *end_ptr; +}; +#define debuginfo_merge(out, ...) _debuginfo_merge(out, __VA_ARGS__, NULL); +void _debuginfo_merge(struct debuginfo *out, ...); + +void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message); + // #define _LR_PARSER_DEBUG #include "lr-parser.c" int main(void) { - char *filename = "stdin"; - + static char *filename = "stdin"; static char input_buf[INPUT_CAP]; if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { fprintf(stderr, "INPUT_CAP reached\n"); @@ -174,26 +225,17 @@ int main(void) // if(token_sym(tok) == END_INPUT) break; // } return 0; - stack_item value; - struct lr_errinfo *errinfo; - if((errinfo = lr_parser(&value))) { - char *l = linestart(input_buf, input); - - fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, line, input - l - active_region+1, lr_err_str(errinfo)); - - size_t indent = fprintf(stderr, " %zu ", line); - fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n'))); - - fprintf(stderr, "%*s| %*s", indent, "", input - l - active_region, ""); - if(active_region == 0) active_region = 1; - fprintf(stderr, "^"); for(size_t i = 0; i < active_region-1; i++) fprintf(stderr, "~"); - - fprintf(stderr, "\n\n"); + struct lr_parseinfo parseinfo; + void *value = lr_parser(&parseinfo); + if(parseinfo.type == LR_ABORTED) { + goto cleanup; + } else if(parseinfo.type) { + errmsg_print(filename, input_buf, NULL, lr_err_str(&parseinfo)); goto cleanup; } - ast_exprlist_print(value.list); - // ast_exprlist_free(value.list); + ast_print(((stack_item *)value)->list); + ast_free(((stack_item *)value)->list); cleanup: dict_free(&types_dict); @@ -213,6 +255,46 @@ static void print_token(struct token *tok) } } +void _debuginfo_merge(struct debuginfo *out, ...) +{ + va_list ap; + va_start(ap, out); + + *out = *va_arg(ap, struct debuginfo *); + + struct debuginfo *arg; + while((arg = va_arg(ap, struct debuginfo *))) { + intptr_t start = MIN((intptr_t)out->end_ptr - out->active_region, + (intptr_t)arg->end_ptr - arg->active_region); + intptr_t end = MAX((intptr_t)out->end_ptr, (intptr_t)arg->end_ptr); + + out->active_region = end - start; + out->end_ptr = (char *)end; + } + + va_end(ap); +} + +void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message) +{ + if(!debuginfo) debuginfo = &(struct debuginfo){.line = line, .active_region = active_region, .end_ptr = input}; + + char *l = linestart(input_buf, debuginfo->end_ptr); + + fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, debuginfo->line, + debuginfo->end_ptr - l - debuginfo->active_region+1, + message); + + int indent = fprintf(stderr, " %zu ", debuginfo->line); + fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n'))); + + fprintf(stderr, "%*s| %*s", indent, "", debuginfo->end_ptr - l - debuginfo->active_region, ""); + if(active_region == 0) debuginfo->active_region = 1; + for(size_t i = 0; i < debuginfo->active_region; i++) fprintf(stderr, "~"); + + fprintf(stderr, "\n"); +} + // STR UTIL #define strdup(...) _strdup(__VA_ARGS__) @@ -336,9 +418,10 @@ static char *next_token(char *str) sub_off = tillch(str + 1, off - 1, ':') + 1; if(hasfield) tokbuf_enqueue(&TOKEN_INIT(COLON, { .num=0 })); + int skip = hasfield || str[0] == ':'; tokbuf_enqueue( - &TOKEN_INIT((!hasfield && str[0] == ':') ? ATOM : IDEN, - { .str = strdup(substring(str+hasfield, sub_off-hasfield))})); + &TOKEN_INIT((skip && !hasfield) ? ATOM : IDEN, + { .str = strdup(substring(str+skip, sub_off-skip))})); } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); return str; @@ -357,6 +440,34 @@ fail: // ast printing +int ident = 0; +#define INDENT() printf("%*s", ident*2, ""); + +void ast_exprlist_print(struct list_head *list); + +void datatype_print(struct datatype *datatype) +{ + // TOOD: fix, very messy + switch(flags2type(datatype->typeflags)) { + case DATATYPE_INT: printf("int"); break; + case DATATYPE_ENUM: printf("enum"); break; + case DATATYPE_STRUCT: printf("struct"); break; + } + + if(datatype->iden) printf("(%s)", datatype->iden); + + for(int i = 0; i < 3; i++) + switch(flags2subtype(datatype->typeflags) & (1 << i)) { + case 0: break; + case SUBTYPE_FUNCTION: + printf("-function("); + ast_exprlist_print(datatype->function_exprlist); + printf(")"); + break; + default: printf("-%d", i); break; + } +} + void ast_vrbl_print(struct ast_vrbl *vrbl) { printf("%s%s", vrbl->is_atom ? ":" : "", vrbl->iden); @@ -365,25 +476,25 @@ void ast_vrbl_print(struct ast_vrbl *vrbl) void ast_fiel_print(struct ast_fiel *fiel) { ast_vrbl_print(&fiel->variable); - list_for_each_entry(struct ast_strlist, entry, list, fiel->fields_strlist) { + list_for_each_entry(struct strlist, entry, list, fiel->fields_strlist) { printf(":%s", entry->str); } } -void ast_decl_print(struct ast_decl *decl) -{ - // TODO: implement -} - -void ast_defn_print(struct ast_defn *defn) +void ast_decl_print(struct ast_vrbl *vrbl, struct datatype *datatype) { - // TODO: implement + ast_vrbl_print(vrbl); + printf("/"); + datatype_print(datatype); } void ast_oprn_print(struct ast_oprn *oprn) { ast_exprlist_print(oprn->left_exprlist); + if(oprn->optype == AST_OP_AND) printf(",\n"); else printf(";\n"); + INDENT(); + ast_exprlist_print(oprn->right_exprlist); } @@ -396,21 +507,34 @@ void ast_exprlist_print(struct list_head *list) case AST_NUMBER: printf("%d", entry->number); break; case AST_VARIABLE: ast_vrbl_print(&entry->variable); break; case AST_FIELDLIST: ast_fiel_print(&entry->fieldlist); break; - case AST_DECLARATION: ast_decl_print(&entry->declaration); break; + case AST_DECLARATION: ast_decl_print(&entry->variable, &entry->datatype); break; case AST_OPERATION: ast_oprn_print(&entry->operation); break; case AST_PARENLIST: - printf("("); - ast_exprlist_print(entry->paren_exprlist); - printf(")"); + printf("(\n"); ident++; + INDENT(); ast_exprlist_print(entry->exprlist); + printf("\n"); ident--; + INDENT(); printf(")"); + break; + case AST_BRACELIST: + printf("{\n"); ident++; + INDENT(); ast_exprlist_print(entry->exprlist); + printf(".\n"); ident--; + INDENT(); printf("}"); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } - printf(" "); + + if(entry->list.next) printf(" "); } - printf("\n"); } -void ast_exprlist_free(struct list_head *list) +void ast_print(struct list_head *list) +{ + ast_exprlist_print(list); + printf(".\n"); +} + +void ast_free(struct list_head *list) { list_for_each_safe(l, list) { struct ast_expr *entry = list_entry(l, typeof(*entry), list); @@ -420,18 +544,19 @@ void ast_exprlist_free(struct list_head *list) case AST_VARIABLE: break; case AST_FIELDLIST: list_for_each_safe(l, entry->fieldlist.fields_strlist) - free(list_entry(l, struct ast_strlist, list)); + free(list_entry(l, struct strlist, list)); break; case AST_DECLARATION: break; - case AST_DEFINITION: break; case AST_OPERATION: - ast_exprlist_free(entry->operation.left_exprlist); - ast_exprlist_free(entry->operation.right_exprlist); + ast_free(entry->operation.left_exprlist); + ast_free(entry->operation.right_exprlist); break; - case AST_PARENLIST: ast_exprlist_free(entry->paren_exprlist); break; + case AST_BRACELIST: + case AST_PARENLIST: ast_free(entry->exprlist); break; default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type); } + ast_free(entry->datatype.function_exprlist); free(entry); } } diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g index 1dd176c..a5532ca 100644 --- a/demos/sample-files/lbp.g +++ b/demos/sample-files/lbp.g @@ -9,18 +9,16 @@ -nonterminal S exprlist expr sym fieldlist basetype subtypelist. -stacktype { union { + struct ast_expr expr; + int num; char *str; - struct ast_vrbl vrbl; - struct ast_fiel fiel; - struct ast_decl decl; - struct ast_defn defn; - struct ast_oprn oprn; - - struct ast_expr expr; - struct ast_strlist strlist; + struct strlist strlist; struct list_head *list; + + struct { int type; char *iden; } basetype; + struct { int type; struct list_head *exprlist; } subtype; }}. -left LPAREN; @@ -28,28 +26,44 @@ S: exprlist DOT { v = A(0); }; -exprlist: expr { v = g_LST(A(0).expr); } +exprlist: expr { v = g_LST(A(0).expr); /* determine type??? */ } | expr exprlist { v = g_LST(A(0).expr); v.list->next = A(1).list; } - | exprlist COMMA exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_AND, .left_exprlist = A(0).list, .right_exprlist = A(2).list))); } - | exprlist SEMICOL exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_OR, .left_exprlist = A(0).list, .right_exprlist = A(2).list)));}; + | exprlist COMMA exprlist + { v = g_LST(AST(expr, .type = AST_OPERATION, .operation = + AST(oprn, .optype = AST_OP_AND, .left_exprlist = A(0).list, .right_exprlist = A(2).list))); } + | exprlist SEMICOL exprlist + { v = g_LST(AST(expr, .type = AST_OPERATION, .operation = + AST(oprn, .optype = AST_OP_OR, .left_exprlist = A(0).list, .right_exprlist = A(2).list)));}; -expr: NUM { v = g_NEW(expr, .type = AST_NUMBER, .number = A(0).num); } - | sym { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } - | sym fieldlist { v = g_NEW(expr, .type = AST_FIELDLIST, .fieldlist = NEW(fiel, .variable = A(0).vrbl, .fields_strlist = A(1).list)); } - | sym TYPELIST_START basetype TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } - | sym TYPELIST_START basetype subtypelist TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } - | LBRACE exprlist DOT RBRACE { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); } - | LPAREN exprlist RPAREN { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); }; +expr: NUM { v = g_AST(expr, .type = AST_NUMBER, .number = A(0).num); } + | sym { v = g_AST(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); } + | sym fieldlist + { v = g_AST(expr, .type = AST_FIELDLIST, .fieldlist = AST(fiel, .variable = A(0).vrbl, .fields_strlist = A(1).list)); } + | sym TYPELIST_START basetype TYPELIST_END + { v = g_AST(expr, .type = AST_DECLARATION, .variable = A(0).vrbl, + .datatype = NEW(datatype, .typeflags = A(2).basetype.type, + .iden = A(2).basetype.iden)); } + | sym TYPELIST_START basetype subtypelist TYPELIST_END + { v = g_AST(expr, .type = AST_DECLARATION, .variable = A(0).vrbl, + .datatype = NEW(datatype, .typeflags = TYPEFLAGS(A(2).basetype.type, A(3).subtype.type), + .iden = A(2).basetype.iden, + .function_exprlist = A(3).subtype.exprlist)); } + | LBRACE exprlist DOT RBRACE { v = g_AST(expr, .type = AST_BRACELIST, .exprlist = A(1).list); } + | LPAREN exprlist RPAREN { v = g_AST(expr, .type = AST_PARENLIST, .exprlist = A(1).list); }; -sym: IDEN { v = g_NEW(vrbl, .is_atom = 0, .iden = A(0).str); } - | ATOM { v = g_NEW(vrbl, .is_atom = 1, .iden = A(0).str); }; +sym: IDEN { v = g_AST(vrbl, .is_atom = 0, .iden = A(0).str); } + | ATOM { v = g_AST(vrbl, .is_atom = 1, .iden = A(0).str); }; fieldlist: COLON IDEN { v = g_LST(NEW(strlist, .str = A(1).str)); } | fieldlist fieldlist { A(0).list->next = A(1).list; v = A(0); }; -basetype: T_INT {} - | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {} - | T_ENUM {} | T_ENUM LPAREN ATOM RPAREN {}; -subtypelist: ST_FUNCTION LPAREN exprlist RPAREN {} - | ST_BIG {} | ST_LITTLE {} | ST_NATIVE {} - | subtypelist subtypelist {}. +basetype: T_INT { v = g_NEW(basetype, .type = DATATYPE_INT); } + | T_STRUCT { v = g_NEW(basetype, .type = DATATYPE_STRUCT); } + | T_ENUM { v = g_NEW(basetype, .type = DATATYPE_ENUM); } + | T_STRUCT LPAREN ATOM RPAREN { v = g_NEW(basetype, .type = DATATYPE_STRUCT, .iden = A(2).str); } + | T_ENUM LPAREN ATOM RPAREN { v = g_NEW(basetype, .type = DATATYPE_ENUM, .iden = A(2).str); }; +subtypelist: ST_FUNCTION LPAREN exprlist RPAREN { v = g_NEW(subtype, .type = SUBTYPE_FUNCTION, .exprlist = A(2).list); } + | ST_BIG { v = g_NEW(subtype, .type = SUBTYPE_BIG); } + | ST_LITTLE { v = g_NEW(subtype, .type = SUBTYPE_LITTLE); } + | ST_NATIVE { v = g_NEW(subtype, .type = SUBTYPE_NATIVE); } + | subtypelist subtypelist { v = A(1); v.subtype.type |= A(0).subtype.type; if(A(0).subtype.exprlist) v.subtype.exprlist = A(0).subtype.exprlist; }. diff --git a/lr-parser.c b/lr-parser.c index a909f7f..68b874a 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -12,14 +12,15 @@ #include "parts/table.h" #include "parts/toklist.h" // and -typedef stack_item (*semantic_action_fn)(stack_item *item_head); +typedef void *(*semantic_action_fn)(stack_item *, stack_item *); extern semantic_action_fn *semantic_actions; -#define STACK_CAP 128 +#define ARR_LEN(arr) (sizeof(arr)/sizeof(*arr)) +#define STACK_CAP 64 static stack_item item_bottom[STACK_CAP]; static stack_item *item_head = item_bottom; -static int state_bottom[STACK_CAP]; +static int state_bottom[STACK_CAP*2]; static int *state_head = state_bottom; static void print_stack() @@ -30,38 +31,36 @@ static void print_stack() fprintf(stderr, "}\n\n"); } -struct lr_errinfo { - enum lr_errtype { LR_ERR_STACKCAP_EXCEEDED, LR_ERR_UNEXPECTED_SYMBOL, LR_ERR_NO_GOTO_ENTRY } type; +struct lr_parseinfo { + enum { LR_SUCCESS = 0, LR_ABORTED, LR_ERR_STACKCAP_EXCEEDED, LR_ERR_UNEXPECTED_SYMBOL, LR_ERR_NO_GOTO_ENTRY } type; union { size_t stack_cap; struct { symbol sym; size_t state; } idx; }; }; -struct lr_errinfo *lr_parser(void *value) +void *lr_parser(struct lr_parseinfo *parseinfo) { - static struct lr_errinfo errinfo; + void *semantic_action_r; + stack_item semantic_value; #define push(stack_head, stack_bottom, item) do { \ - if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \ - else { errinfo = (struct lr_errinfo){.type = LR_ERR_STACKCAP_EXCEEDED, .stack_cap = STACK_CAP }; return &errinfo; } \ + if((size_t)(++stack_head - stack_bottom) < (size_t)ARR_LEN(stack_bottom) ) *stack_head = item; \ + else { *parseinfo = (struct lr_parseinfo){.type = LR_ERR_STACKCAP_EXCEEDED, .stack_cap = STACK_CAP }; return NULL; } \ } while(0) -#define pop(stack_head) (--stack_head) +#define pop(stack_head, stack_bottom) if(--stack_head < stack_bottom) exit(33); #define spush(item) push(state_head, state_bottom, item) -#define spop() pop(state_head) +#define spop() pop(state_head, state_bottom) #define ipush(item) push(item_head, item_bottom, item) -#define ipop() pop(item_head) - -#define eat() toklist_eat() -#define peek() toklist_peek() +#define ipop() pop(item_head, item_bottom) while(1) { - struct action a = table[(size_t)*state_head][token_sym(peek())]; + struct action a = table[(size_t)*state_head][token_sym(toklist_peek())]; switch(a.type) { case ACTION_SHIFT:; - struct token *t = eat(); + struct token *t = toklist_eat(); ipush(*(stack_item*)token_val(t)); spush(token_sym(t)); @@ -71,8 +70,9 @@ struct lr_errinfo *lr_parser(void *value) print_stack(); #endif break; - case ACTION_REDUCE: - stack_item semantic_value = semantic_actions[a.arg](item_head); + case ACTION_REDUCE:; + if((semantic_action_r = semantic_actions[a.arg](&semantic_value, item_head))) goto abort; + for(size_t i = 0; i < grammar[a.arg].nRHS; i++) { ipop(); spop(); spop(); } @@ -80,8 +80,8 @@ struct lr_errinfo *lr_parser(void *value) symbol lhs = grammar[a.arg].LHS; struct action a_goto = table[(size_t)*state_head][lhs]; if(a_goto.type != ACTION_GOTO) { - errinfo = (struct lr_errinfo){.type = LR_ERR_NO_GOTO_ENTRY, .idx = {lhs, (size_t)*state_head}}; - return &errinfo; + *parseinfo = (struct lr_parseinfo){.type = LR_ERR_NO_GOTO_ENTRY, .idx = {lhs, (size_t)*state_head}}; + return NULL; } ipush(semantic_value); @@ -95,32 +95,41 @@ struct lr_errinfo *lr_parser(void *value) case ACTION_ACCEPT: ipush((stack_item){0}); spush(0); spush(0); - // todo: better fix for reducing the final production expecting an END_INPUT on the stack - *(stack_item *)value = semantic_actions[0](item_head); - return NULL; + + // todo: better fix for reducing the final production expecting an END_INPUT on the stack + if((semantic_action_r = semantic_actions[0](&semantic_value, item_head))) goto abort; + + ipush(semantic_value); + parseinfo->type = LR_SUCCESS; + return item_head; case ACTION_NOT_SET: default: - errinfo = (struct lr_errinfo){.type = LR_ERR_UNEXPECTED_SYMBOL, .idx = {token_sym(peek()), (size_t)*state_head}}; - return &errinfo; + *parseinfo = (struct lr_parseinfo){.type = LR_ERR_UNEXPECTED_SYMBOL, .idx = {token_sym(toklist_peek()), (size_t)*state_head}}; + return NULL; } } + +abort: + parseinfo->type = LR_ABORTED; + return semantic_action_r; } -char *lr_err_str(struct lr_errinfo *errinfo) +char *lr_err_str(struct lr_parseinfo *parseinfo) { // TODO: check if strbuf cap is exceeded static char strbuf[128]; - switch(errinfo->type) { + switch(parseinfo->type) { case LR_ERR_STACKCAP_EXCEEDED: - snprintf(strbuf, sizeof(strbuf), "LR parser stack capacity of %zu has been exceeded", errinfo->stack_cap); + snprintf(strbuf, sizeof(strbuf), "LR parser stack capacity of %zu has been exceeded", parseinfo->stack_cap); break; case LR_ERR_UNEXPECTED_SYMBOL: - snprintf(strbuf, sizeof(strbuf), "Unexpected symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state); + snprintf(strbuf, sizeof(strbuf), "Unexpected symbol '%s' at state '%zu'", symbol_to_str[parseinfo->idx.sym], parseinfo->idx.state); break; case LR_ERR_NO_GOTO_ENTRY: - snprintf(strbuf, sizeof(strbuf), "No GOTO state for symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state); + snprintf(strbuf, sizeof(strbuf), "No GOTO state for symbol '%s' at state '%zu'", symbol_to_str[parseinfo->idx.sym], parseinfo->idx.state); break; + default: break; } return strbuf; @@ -205,7 +214,7 @@ struct token *toklist_peek() { return toklist + tok; } symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return (intptr_t)&t->v; } -intmax_t none(intmax_t *stack_head) {(void)stack_head; return 0;} +void *none(intmax_t *v, intmax_t *stack_head) {(void)stack_head; return 0;} semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none}; int main(void) |
