aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <kartofen.mail.0@protonmail.com>2025-09-24 00:06:54 +0300
committerkartofen <kartofen.mail.0@protonmail.com>2025-09-24 00:06:54 +0300
commitfec8e3a95becfb3dc2a3eb0f512a120a7a4551c5 (patch)
tree8d3da9f0c3fa36f5e06bc49cc1bfdc0db3099ea1
parentdb1b9c8dcb0d115217a33c2fe8e0760d49143e11 (diff)
debug info through the lr parser
-rw-r--r--demos/generate-parser.c8
-rw-r--r--demos/sample-files/gram-skeleton.c5
-rw-r--r--demos/sample-files/lbp-code.lbp2
-rw-r--r--demos/sample-files/lbp-skeleton.c267
-rw-r--r--demos/sample-files/lbp.g66
-rw-r--r--lr-parser.c73
6 files changed, 285 insertions, 136 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index e0ec5f5..c9c71f3 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -170,14 +170,14 @@ int main(int argc, char **argv)
for(size_t i = 0; i < total_productions; i++) {
printf("#define A(n) (*(stack_head-%zu+n))\n", grammar[i].nRHS-1);
- printf("stack_item __prod%zu_action(stack_item *stack_head)\n", i);
+ printf("void *__prod%zu_action(stack_item *r, stack_item *stack_head)\n", i);
printf("{ stack_item v = {0};\n");
puts(semantic_action_str[i]);
- printf("return v; }\n");
+ printf("*r = v; return NULL; }\n");
printf("#undef A\n");
}
- printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n");
+ printf("typedef void *(*semantic_action_fn)(stack_item *, stack_item *);\n");
printf("semantic_action_fn *semantic_actions = (semantic_action_fn[]){\n");
for(size_t i = 0; i < total_productions; i++)
@@ -194,7 +194,7 @@ int main(int argc, char **argv)
printf("#include \"parts/grammar.h\"\n");
printf("#include \"parts/table.h\"\n");
printf("typedef %s stack_item;\n", stack_item_type);
- printf("typedef stack_item (*semantic_action_fn)(stack_item *stack_head);\n");
+ printf("typedef void *(*semantic_action_fn)(stack_item *, stack_item *);\n");
printf("extern semantic_action_fn *semantic_actions;\n");
printf("#endif\n");
set_stdout(NULL);
diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c
index 9898c6b..d9616cc 100644
--- a/demos/sample-files/gram-skeleton.c
+++ b/demos/sample-files/gram-skeleton.c
@@ -222,8 +222,9 @@ int main(void)
input = next_token(input_buf);
- intptr_t value;
- if(lr_parser(&value)) {
+ struct lr_parseinfo parseinfo;
+ intptr_t value = *(intptr_t *)lr_parser(&parseinfo);
+ if(parseinfo.type) {
fprintf(stderr, input);
return 1;
}
diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp
index 3750623..8339951 100644
--- a/demos/sample-files/lbp-code.lbp
+++ b/demos/sample-files/lbp-code.lbp
@@ -18,7 +18,7 @@ inbounds/int-function(low, high, val) {
(31 |_, ---).
},
-:aircraft_iden/struct { -. tova_tuk_e_sintaktichna_greshka. },
+:aircraft_iden/struct { -, tova_tuk_e_sintaktichna_greshka. },
:message/struct {
DF/enum(:downlinkfmt) |5,
diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c
index bf7bdca..1ee54d8 100644
--- a/demos/sample-files/lbp-skeleton.c
+++ b/demos/sample-files/lbp-skeleton.c
@@ -3,12 +3,20 @@
#include <string.h>
#include <stdint.h>
#include <ctype.h>
+#include <stdarg.h>
-// TODO: - lr parser is bad for debugging
-// - deal with errors
+// TODO: - lr parser is bad for debugging (now its better)
+// - deal with errors (the token queue for example)!!!!
+// - debuginfo in the token that gets propagaded through the
+// stack_items and lr_parse returns a generic errorinfo
+// with user-implemented compilation error messages
+// - ast should show the specific operation (match, assignment, etc)
+
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define INPUT_CAP 4096
-#define ARENA_CAP 8192 //4096
+#define ARENA_CAP 4096 * 3
#define ARENA_IMPLEMENTATION
#include "util/arena.h"
@@ -51,40 +59,75 @@ static void xfree(void *ptr) { (void)ptr; return; }
#include "util/list.h"
-struct ast_strlist { char *str; struct list_head list; };
+struct datatype {
+ int typeflags;
+ // TODO: add ptr
+ char *iden;
+ struct list_head *function_exprlist;
+};
+
+struct strlist { char *str; struct list_head list; };
+
+// TODO:
+// - type of each each exprlist, be it assignement, match, function call, etc
+// - revise expr types, etc
+
+// struct ast_exprlist {
+// enum { AST_OP_ASSIGNMENT, AST_OP_TYPE, AST_OP_CALL } type;
+// struct list_head *exprlist;
+// }
+
+// struct ast_exprlist {
+// enum { AST_OP_ASSIGNMENT, AST_OP_IMPLEMENT, AST_OP_MATCH, AST_OP_EQUAL, AST_OP_CALL } type;
+// struct list_head *exprlist;
+// };
+
+// struct ast_expr {
+// enum { AST_LITERAL, AST_IDENTIFIER, AST_DECLARATION, AST_SUB } type;
+
+// struct datatype datatype;
+// struct list_head list;
+// }
struct ast_expr {
- enum { AST_NUMBER, AST_VARIABLE, AST_FIELDLIST, AST_DECLARATION, AST_DEFINITION, AST_OPERATION, AST_PARENLIST } type;
+ enum { AST_NUMBER, AST_VARIABLE, AST_DECLARATION, AST_FIELDLIST, AST_OPERATION, AST_PARENLIST, AST_BRACELIST } type;
union {
int number;
struct ast_vrbl { int is_atom; char *iden; } variable;
struct ast_fiel { struct ast_vrbl variable; struct list_head *fields_strlist; } fieldlist;
- struct ast_decl { struct ast_vrbl variable; int typed; } declaration;
- struct ast_defn { struct ast_decl declartion; struct list_head *block_exprlist; } definition;
+
struct ast_oprn { enum { AST_OP_AND, AST_OP_OR } optype; struct list_head *left_exprlist; struct list_head *right_exprlist; } operation;
- struct list_head *paren_exprlist;
+ struct list_head *exprlist;
};
+
+ struct datatype datatype;
struct list_head list;
};
-#define NEW(t, ...) ((struct ast_##t){__VA_ARGS__})
-#define g_NEW(t, ...) ((stack_item){.t = (struct ast_##t){__VA_ARGS__}})
+#define AST(t, ...) ((struct ast_##t){__VA_ARGS__})
+#define g_AST(t, ...) ((stack_item){.t = {__VA_ARGS__}})
-#define LST(v) ({ typeof(v) *r = xalloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; })
-#define g_LST(v) ((stack_item){.list = LST(v)})
+#define NEW(t, ...) ((struct t){__VA_ARGS__})
+#define g_NEW(t, ...) ((stack_item){.t = {__VA_ARGS__}})
-void ast_vrbl_print(struct ast_vrbl *vrbl);
-void ast_fiel_print(struct ast_fiel *fiel);
-void ast_decl_print(struct ast_decl *decl);
-void ast_defn_print(struct ast_defn *defn);
-void ast_oprn_print(struct ast_oprn *oprn);
-void ast_expr_print(struct ast_expr *expr);
-void ast_exprlist_print(struct list_head *list);
+#define LST(v) ({ typeof(v) *r = malloc(sizeof(v)); *r = v; LIST_EMPTY(&r->list); &r->list; })
+#define g_LST(v) ((stack_item){.list = LST(v)})
-void ast_exprlist_free(struct list_head *list);
+// void ast_decide_datatype(struct list_head *list);
+void ast_print(struct list_head *list);
+void ast_free(struct list_head *list);
// generated
#include "bin/lbp.h"
+
+enum { DATATYPE_INT = 0, DATATYPE_STRUCT, DATATYPE_ENUM };
+enum { SUBTYPE_FUNCTION = 1 << 0, SUBTYPE_LITTLE = 1 << 1,
+ SUBTYPE_BIG = 1 << 2, SUBTYPE_NATIVE = 1 << 3 };
+
+#define TYPEFLAGS(type, subtype) (((subtype) << 3) | (type))
+#define flags2type(flags) ((flags) & 7)
+#define flags2subtype(flags) ((flags) >> 3)
+
#include "bin/lbp.c"
#include "util/dict.h"
@@ -107,32 +150,31 @@ static uint8_t dict_lowercase_char_to_bit[256] = {
['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
};
+static inline char *substring(char *str, size_t sub_end);
+static inline char *linestart(char *strstart, char *pos);
+static inline size_t tillch(char *str, size_t len, char ch);
+#define strdup(...) _strdup(__VA_ARGS__)
+static inline char *_strdup(char *str);
+
#include "parts/toklist.h"
struct token {
symbol s;
stack_item v;
};
#define TOKEN_INIT(sym, val) (struct token){ .s = sym, .v = val }
-static void print_token(struct token *t);
-
symbol token_sym(struct token *t) { return t->s; }
intptr_t token_val(struct token *t) { return (intptr_t)&t->v; }
+static void print_token(struct token *t);
-static char *input;
static size_t line = 1;
static size_t active_region;
static char *next_token(char *str);
-static inline char *substring(char *str, size_t sub_end);
-static inline char *linestart(char *strstart, char *pos);
-static inline size_t tillch(char *str, size_t len, char ch);
-#define strdup(...) _strdup(__VA_ARGS__)
-static inline char *_strdup(char *str);
-
-
#include "util/queue.h"
QUEUE_GENERATE(tokbuf, struct token, 16)
+static char *input;
+
struct token *toklist_eat()
{
static struct token t;
@@ -147,13 +189,22 @@ struct token *toklist_peek() {
return &t;
}
+struct debuginfo {
+ size_t line;
+ size_t active_region;
+ char *end_ptr;
+};
+#define debuginfo_merge(out, ...) _debuginfo_merge(out, __VA_ARGS__, NULL);
+void _debuginfo_merge(struct debuginfo *out, ...);
+
+void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message);
+
// #define _LR_PARSER_DEBUG
#include "lr-parser.c"
int main(void)
{
- char *filename = "stdin";
-
+ static char *filename = "stdin";
static char input_buf[INPUT_CAP];
if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) {
fprintf(stderr, "INPUT_CAP reached\n");
@@ -174,26 +225,17 @@ int main(void)
// if(token_sym(tok) == END_INPUT) break;
// } return 0;
- stack_item value;
- struct lr_errinfo *errinfo;
- if((errinfo = lr_parser(&value))) {
- char *l = linestart(input_buf, input);
-
- fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, line, input - l - active_region+1, lr_err_str(errinfo));
-
- size_t indent = fprintf(stderr, " %zu ", line);
- fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n')));
-
- fprintf(stderr, "%*s| %*s", indent, "", input - l - active_region, "");
- if(active_region == 0) active_region = 1;
- fprintf(stderr, "^"); for(size_t i = 0; i < active_region-1; i++) fprintf(stderr, "~");
-
- fprintf(stderr, "\n\n");
+ struct lr_parseinfo parseinfo;
+ void *value = lr_parser(&parseinfo);
+ if(parseinfo.type == LR_ABORTED) {
+ goto cleanup;
+ } else if(parseinfo.type) {
+ errmsg_print(filename, input_buf, NULL, lr_err_str(&parseinfo));
goto cleanup;
}
- ast_exprlist_print(value.list);
- // ast_exprlist_free(value.list);
+ ast_print(((stack_item *)value)->list);
+ ast_free(((stack_item *)value)->list);
cleanup:
dict_free(&types_dict);
@@ -213,6 +255,46 @@ static void print_token(struct token *tok)
}
}
+void _debuginfo_merge(struct debuginfo *out, ...)
+{
+ va_list ap;
+ va_start(ap, out);
+
+ *out = *va_arg(ap, struct debuginfo *);
+
+ struct debuginfo *arg;
+ while((arg = va_arg(ap, struct debuginfo *))) {
+ intptr_t start = MIN((intptr_t)out->end_ptr - out->active_region,
+ (intptr_t)arg->end_ptr - arg->active_region);
+ intptr_t end = MAX((intptr_t)out->end_ptr, (intptr_t)arg->end_ptr);
+
+ out->active_region = end - start;
+ out->end_ptr = (char *)end;
+ }
+
+ va_end(ap);
+}
+
+void errmsg_print(char *filename, char *input_buf, struct debuginfo *debuginfo, char *message)
+{
+ if(!debuginfo) debuginfo = &(struct debuginfo){.line = line, .active_region = active_region, .end_ptr = input};
+
+ char *l = linestart(input_buf, debuginfo->end_ptr);
+
+ fprintf(stderr, "%s:%zu:%zu: ERROR: %s\n", filename, debuginfo->line,
+ debuginfo->end_ptr - l - debuginfo->active_region+1,
+ message);
+
+ int indent = fprintf(stderr, " %zu ", debuginfo->line);
+ fprintf(stderr, "| %s\n", substring(l, tillch(l, strlen(l), '\n')));
+
+ fprintf(stderr, "%*s| %*s", indent, "", debuginfo->end_ptr - l - debuginfo->active_region, "");
+ if(active_region == 0) debuginfo->active_region = 1;
+ for(size_t i = 0; i < debuginfo->active_region; i++) fprintf(stderr, "~");
+
+ fprintf(stderr, "\n");
+}
+
// STR UTIL
#define strdup(...) _strdup(__VA_ARGS__)
@@ -336,9 +418,10 @@ static char *next_token(char *str)
sub_off = tillch(str + 1, off - 1, ':') + 1;
if(hasfield) tokbuf_enqueue(&TOKEN_INIT(COLON, { .num=0 }));
+ int skip = hasfield || str[0] == ':';
tokbuf_enqueue(
- &TOKEN_INIT((!hasfield && str[0] == ':') ? ATOM : IDEN,
- { .str = strdup(substring(str+hasfield, sub_off-hasfield))}));
+ &TOKEN_INIT((skip && !hasfield) ? ATOM : IDEN,
+ { .str = strdup(substring(str+skip, sub_off-skip))}));
} while(hasfield = 1, str += sub_off, off -= sub_off, off > 0);
return str;
@@ -357,6 +440,34 @@ fail:
// ast printing
+int ident = 0;
+#define INDENT() printf("%*s", ident*2, "");
+
+void ast_exprlist_print(struct list_head *list);
+
+void datatype_print(struct datatype *datatype)
+{
+ // TOOD: fix, very messy
+ switch(flags2type(datatype->typeflags)) {
+ case DATATYPE_INT: printf("int"); break;
+ case DATATYPE_ENUM: printf("enum"); break;
+ case DATATYPE_STRUCT: printf("struct"); break;
+ }
+
+ if(datatype->iden) printf("(%s)", datatype->iden);
+
+ for(int i = 0; i < 3; i++)
+ switch(flags2subtype(datatype->typeflags) & (1 << i)) {
+ case 0: break;
+ case SUBTYPE_FUNCTION:
+ printf("-function(");
+ ast_exprlist_print(datatype->function_exprlist);
+ printf(")");
+ break;
+ default: printf("-%d", i); break;
+ }
+}
+
void ast_vrbl_print(struct ast_vrbl *vrbl)
{
printf("%s%s", vrbl->is_atom ? ":" : "", vrbl->iden);
@@ -365,25 +476,25 @@ void ast_vrbl_print(struct ast_vrbl *vrbl)
void ast_fiel_print(struct ast_fiel *fiel)
{
ast_vrbl_print(&fiel->variable);
- list_for_each_entry(struct ast_strlist, entry, list, fiel->fields_strlist) {
+ list_for_each_entry(struct strlist, entry, list, fiel->fields_strlist) {
printf(":%s", entry->str);
}
}
-void ast_decl_print(struct ast_decl *decl)
-{
- // TODO: implement
-}
-
-void ast_defn_print(struct ast_defn *defn)
+void ast_decl_print(struct ast_vrbl *vrbl, struct datatype *datatype)
{
- // TODO: implement
+ ast_vrbl_print(vrbl);
+ printf("/");
+ datatype_print(datatype);
}
void ast_oprn_print(struct ast_oprn *oprn)
{
ast_exprlist_print(oprn->left_exprlist);
+
if(oprn->optype == AST_OP_AND) printf(",\n"); else printf(";\n");
+ INDENT();
+
ast_exprlist_print(oprn->right_exprlist);
}
@@ -396,21 +507,34 @@ void ast_exprlist_print(struct list_head *list)
case AST_NUMBER: printf("%d", entry->number); break;
case AST_VARIABLE: ast_vrbl_print(&entry->variable); break;
case AST_FIELDLIST: ast_fiel_print(&entry->fieldlist); break;
- case AST_DECLARATION: ast_decl_print(&entry->declaration); break;
+ case AST_DECLARATION: ast_decl_print(&entry->variable, &entry->datatype); break;
case AST_OPERATION: ast_oprn_print(&entry->operation); break;
case AST_PARENLIST:
- printf("(");
- ast_exprlist_print(entry->paren_exprlist);
- printf(")");
+ printf("(\n"); ident++;
+ INDENT(); ast_exprlist_print(entry->exprlist);
+ printf("\n"); ident--;
+ INDENT(); printf(")");
+ break;
+ case AST_BRACELIST:
+ printf("{\n"); ident++;
+ INDENT(); ast_exprlist_print(entry->exprlist);
+ printf(".\n"); ident--;
+ INDENT(); printf("}");
break;
default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type);
}
- printf(" ");
+
+ if(entry->list.next) printf(" ");
}
- printf("\n");
}
-void ast_exprlist_free(struct list_head *list)
+void ast_print(struct list_head *list)
+{
+ ast_exprlist_print(list);
+ printf(".\n");
+}
+
+void ast_free(struct list_head *list)
{
list_for_each_safe(l, list) {
struct ast_expr *entry = list_entry(l, typeof(*entry), list);
@@ -420,18 +544,19 @@ void ast_exprlist_free(struct list_head *list)
case AST_VARIABLE: break;
case AST_FIELDLIST:
list_for_each_safe(l, entry->fieldlist.fields_strlist)
- free(list_entry(l, struct ast_strlist, list));
+ free(list_entry(l, struct strlist, list));
break;
case AST_DECLARATION: break;
- case AST_DEFINITION: break;
case AST_OPERATION:
- ast_exprlist_free(entry->operation.left_exprlist);
- ast_exprlist_free(entry->operation.right_exprlist);
+ ast_free(entry->operation.left_exprlist);
+ ast_free(entry->operation.right_exprlist);
break;
- case AST_PARENLIST: ast_exprlist_free(entry->paren_exprlist); break;
+ case AST_BRACELIST:
+ case AST_PARENLIST: ast_free(entry->exprlist); break;
default: fprintf(stderr, "UNKNOWN TYPE: %d\n", entry->type);
}
+ ast_free(entry->datatype.function_exprlist);
free(entry);
}
}
diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g
index 1dd176c..a5532ca 100644
--- a/demos/sample-files/lbp.g
+++ b/demos/sample-files/lbp.g
@@ -9,18 +9,16 @@
-nonterminal S exprlist expr sym fieldlist basetype subtypelist.
-stacktype { union {
+ struct ast_expr expr;
+
int num;
char *str;
-
struct ast_vrbl vrbl;
- struct ast_fiel fiel;
- struct ast_decl decl;
- struct ast_defn defn;
- struct ast_oprn oprn;
-
- struct ast_expr expr;
- struct ast_strlist strlist;
+ struct strlist strlist;
struct list_head *list;
+
+ struct { int type; char *iden; } basetype;
+ struct { int type; struct list_head *exprlist; } subtype;
}}.
-left LPAREN;
@@ -28,28 +26,44 @@
S: exprlist DOT { v = A(0); };
-exprlist: expr { v = g_LST(A(0).expr); }
+exprlist: expr { v = g_LST(A(0).expr); /* determine type??? */ }
| expr exprlist { v = g_LST(A(0).expr); v.list->next = A(1).list; }
- | exprlist COMMA exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_AND, .left_exprlist = A(0).list, .right_exprlist = A(2).list))); }
- | exprlist SEMICOL exprlist { v = g_LST(NEW(expr, .type = AST_OPERATION, .operation = NEW(oprn, .optype = AST_OP_OR, .left_exprlist = A(0).list, .right_exprlist = A(2).list)));};
+ | exprlist COMMA exprlist
+ { v = g_LST(AST(expr, .type = AST_OPERATION, .operation =
+ AST(oprn, .optype = AST_OP_AND, .left_exprlist = A(0).list, .right_exprlist = A(2).list))); }
+ | exprlist SEMICOL exprlist
+ { v = g_LST(AST(expr, .type = AST_OPERATION, .operation =
+ AST(oprn, .optype = AST_OP_OR, .left_exprlist = A(0).list, .right_exprlist = A(2).list)));};
-expr: NUM { v = g_NEW(expr, .type = AST_NUMBER, .number = A(0).num); }
- | sym { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); }
- | sym fieldlist { v = g_NEW(expr, .type = AST_FIELDLIST, .fieldlist = NEW(fiel, .variable = A(0).vrbl, .fields_strlist = A(1).list)); }
- | sym TYPELIST_START basetype TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); }
- | sym TYPELIST_START basetype subtypelist TYPELIST_END { v = g_NEW(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); }
- | LBRACE exprlist DOT RBRACE { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); }
- | LPAREN exprlist RPAREN { v = g_NEW(expr, .type = AST_PARENLIST, .paren_exprlist = A(1).list); };
+expr: NUM { v = g_AST(expr, .type = AST_NUMBER, .number = A(0).num); }
+ | sym { v = g_AST(expr, .type = AST_VARIABLE, .variable = A(0).vrbl); }
+ | sym fieldlist
+ { v = g_AST(expr, .type = AST_FIELDLIST, .fieldlist = AST(fiel, .variable = A(0).vrbl, .fields_strlist = A(1).list)); }
+ | sym TYPELIST_START basetype TYPELIST_END
+ { v = g_AST(expr, .type = AST_DECLARATION, .variable = A(0).vrbl,
+ .datatype = NEW(datatype, .typeflags = A(2).basetype.type,
+ .iden = A(2).basetype.iden)); }
+ | sym TYPELIST_START basetype subtypelist TYPELIST_END
+ { v = g_AST(expr, .type = AST_DECLARATION, .variable = A(0).vrbl,
+ .datatype = NEW(datatype, .typeflags = TYPEFLAGS(A(2).basetype.type, A(3).subtype.type),
+ .iden = A(2).basetype.iden,
+ .function_exprlist = A(3).subtype.exprlist)); }
+ | LBRACE exprlist DOT RBRACE { v = g_AST(expr, .type = AST_BRACELIST, .exprlist = A(1).list); }
+ | LPAREN exprlist RPAREN { v = g_AST(expr, .type = AST_PARENLIST, .exprlist = A(1).list); };
-sym: IDEN { v = g_NEW(vrbl, .is_atom = 0, .iden = A(0).str); }
- | ATOM { v = g_NEW(vrbl, .is_atom = 1, .iden = A(0).str); };
+sym: IDEN { v = g_AST(vrbl, .is_atom = 0, .iden = A(0).str); }
+ | ATOM { v = g_AST(vrbl, .is_atom = 1, .iden = A(0).str); };
fieldlist: COLON IDEN { v = g_LST(NEW(strlist, .str = A(1).str)); }
| fieldlist fieldlist { A(0).list->next = A(1).list; v = A(0); };
-basetype: T_INT {}
- | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {}
- | T_ENUM {} | T_ENUM LPAREN ATOM RPAREN {};
-subtypelist: ST_FUNCTION LPAREN exprlist RPAREN {}
- | ST_BIG {} | ST_LITTLE {} | ST_NATIVE {}
- | subtypelist subtypelist {}.
+basetype: T_INT { v = g_NEW(basetype, .type = DATATYPE_INT); }
+ | T_STRUCT { v = g_NEW(basetype, .type = DATATYPE_STRUCT); }
+ | T_ENUM { v = g_NEW(basetype, .type = DATATYPE_ENUM); }
+ | T_STRUCT LPAREN ATOM RPAREN { v = g_NEW(basetype, .type = DATATYPE_STRUCT, .iden = A(2).str); }
+ | T_ENUM LPAREN ATOM RPAREN { v = g_NEW(basetype, .type = DATATYPE_ENUM, .iden = A(2).str); };
+subtypelist: ST_FUNCTION LPAREN exprlist RPAREN { v = g_NEW(subtype, .type = SUBTYPE_FUNCTION, .exprlist = A(2).list); }
+ | ST_BIG { v = g_NEW(subtype, .type = SUBTYPE_BIG); }
+ | ST_LITTLE { v = g_NEW(subtype, .type = SUBTYPE_LITTLE); }
+ | ST_NATIVE { v = g_NEW(subtype, .type = SUBTYPE_NATIVE); }
+ | subtypelist subtypelist { v = A(1); v.subtype.type |= A(0).subtype.type; if(A(0).subtype.exprlist) v.subtype.exprlist = A(0).subtype.exprlist; }.
diff --git a/lr-parser.c b/lr-parser.c
index a909f7f..68b874a 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -12,14 +12,15 @@
#include "parts/table.h"
#include "parts/toklist.h"
// and
-typedef stack_item (*semantic_action_fn)(stack_item *item_head);
+typedef void *(*semantic_action_fn)(stack_item *, stack_item *);
extern semantic_action_fn *semantic_actions;
-#define STACK_CAP 128
+#define ARR_LEN(arr) (sizeof(arr)/sizeof(*arr))
+#define STACK_CAP 64
static stack_item item_bottom[STACK_CAP];
static stack_item *item_head = item_bottom;
-static int state_bottom[STACK_CAP];
+static int state_bottom[STACK_CAP*2];
static int *state_head = state_bottom;
static void print_stack()
@@ -30,38 +31,36 @@ static void print_stack()
fprintf(stderr, "}\n\n");
}
-struct lr_errinfo {
- enum lr_errtype { LR_ERR_STACKCAP_EXCEEDED, LR_ERR_UNEXPECTED_SYMBOL, LR_ERR_NO_GOTO_ENTRY } type;
+struct lr_parseinfo {
+ enum { LR_SUCCESS = 0, LR_ABORTED, LR_ERR_STACKCAP_EXCEEDED, LR_ERR_UNEXPECTED_SYMBOL, LR_ERR_NO_GOTO_ENTRY } type;
union {
size_t stack_cap;
struct { symbol sym; size_t state; } idx;
};
};
-struct lr_errinfo *lr_parser(void *value)
+void *lr_parser(struct lr_parseinfo *parseinfo)
{
- static struct lr_errinfo errinfo;
+ void *semantic_action_r;
+ stack_item semantic_value;
#define push(stack_head, stack_bottom, item) do { \
- if(++stack_head - stack_bottom < STACK_CAP ) *stack_head = item; \
- else { errinfo = (struct lr_errinfo){.type = LR_ERR_STACKCAP_EXCEEDED, .stack_cap = STACK_CAP }; return &errinfo; } \
+ if((size_t)(++stack_head - stack_bottom) < (size_t)ARR_LEN(stack_bottom) ) *stack_head = item; \
+ else { *parseinfo = (struct lr_parseinfo){.type = LR_ERR_STACKCAP_EXCEEDED, .stack_cap = STACK_CAP }; return NULL; } \
} while(0)
-#define pop(stack_head) (--stack_head)
+#define pop(stack_head, stack_bottom) if(--stack_head < stack_bottom) exit(33);
#define spush(item) push(state_head, state_bottom, item)
-#define spop() pop(state_head)
+#define spop() pop(state_head, state_bottom)
#define ipush(item) push(item_head, item_bottom, item)
-#define ipop() pop(item_head)
-
-#define eat() toklist_eat()
-#define peek() toklist_peek()
+#define ipop() pop(item_head, item_bottom)
while(1) {
- struct action a = table[(size_t)*state_head][token_sym(peek())];
+ struct action a = table[(size_t)*state_head][token_sym(toklist_peek())];
switch(a.type) {
case ACTION_SHIFT:;
- struct token *t = eat();
+ struct token *t = toklist_eat();
ipush(*(stack_item*)token_val(t));
spush(token_sym(t));
@@ -71,8 +70,9 @@ struct lr_errinfo *lr_parser(void *value)
print_stack();
#endif
break;
- case ACTION_REDUCE:
- stack_item semantic_value = semantic_actions[a.arg](item_head);
+ case ACTION_REDUCE:;
+ if((semantic_action_r = semantic_actions[a.arg](&semantic_value, item_head))) goto abort;
+
for(size_t i = 0; i < grammar[a.arg].nRHS; i++) {
ipop(); spop(); spop();
}
@@ -80,8 +80,8 @@ struct lr_errinfo *lr_parser(void *value)
symbol lhs = grammar[a.arg].LHS;
struct action a_goto = table[(size_t)*state_head][lhs];
if(a_goto.type != ACTION_GOTO) {
- errinfo = (struct lr_errinfo){.type = LR_ERR_NO_GOTO_ENTRY, .idx = {lhs, (size_t)*state_head}};
- return &errinfo;
+ *parseinfo = (struct lr_parseinfo){.type = LR_ERR_NO_GOTO_ENTRY, .idx = {lhs, (size_t)*state_head}};
+ return NULL;
}
ipush(semantic_value);
@@ -95,32 +95,41 @@ struct lr_errinfo *lr_parser(void *value)
case ACTION_ACCEPT:
ipush((stack_item){0});
spush(0); spush(0);
- // todo: better fix for reducing the final production expecting an END_INPUT on the stack
- *(stack_item *)value = semantic_actions[0](item_head);
- return NULL;
+
+ // todo: better fix for reducing the final production expecting an END_INPUT on the stack
+ if((semantic_action_r = semantic_actions[0](&semantic_value, item_head))) goto abort;
+
+ ipush(semantic_value);
+ parseinfo->type = LR_SUCCESS;
+ return item_head;
case ACTION_NOT_SET:
default:
- errinfo = (struct lr_errinfo){.type = LR_ERR_UNEXPECTED_SYMBOL, .idx = {token_sym(peek()), (size_t)*state_head}};
- return &errinfo;
+ *parseinfo = (struct lr_parseinfo){.type = LR_ERR_UNEXPECTED_SYMBOL, .idx = {token_sym(toklist_peek()), (size_t)*state_head}};
+ return NULL;
}
}
+
+abort:
+ parseinfo->type = LR_ABORTED;
+ return semantic_action_r;
}
-char *lr_err_str(struct lr_errinfo *errinfo)
+char *lr_err_str(struct lr_parseinfo *parseinfo)
{
// TODO: check if strbuf cap is exceeded
static char strbuf[128];
- switch(errinfo->type) {
+ switch(parseinfo->type) {
case LR_ERR_STACKCAP_EXCEEDED:
- snprintf(strbuf, sizeof(strbuf), "LR parser stack capacity of %zu has been exceeded", errinfo->stack_cap);
+ snprintf(strbuf, sizeof(strbuf), "LR parser stack capacity of %zu has been exceeded", parseinfo->stack_cap);
break;
case LR_ERR_UNEXPECTED_SYMBOL:
- snprintf(strbuf, sizeof(strbuf), "Unexpected symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state);
+ snprintf(strbuf, sizeof(strbuf), "Unexpected symbol '%s' at state '%zu'", symbol_to_str[parseinfo->idx.sym], parseinfo->idx.state);
break;
case LR_ERR_NO_GOTO_ENTRY:
- snprintf(strbuf, sizeof(strbuf), "No GOTO state for symbol '%s' at state '%zu'", symbol_to_str[errinfo->idx.sym], errinfo->idx.state);
+ snprintf(strbuf, sizeof(strbuf), "No GOTO state for symbol '%s' at state '%zu'", symbol_to_str[parseinfo->idx.sym], parseinfo->idx.state);
break;
+ default: break;
}
return strbuf;
@@ -205,7 +214,7 @@ struct token *toklist_peek() { return toklist + tok; }
symbol token_sym(struct token *t) { return t->s; }
intptr_t token_val(struct token *t) { return (intptr_t)&t->v; }
-intmax_t none(intmax_t *stack_head) {(void)stack_head; return 0;}
+void *none(intmax_t *v, intmax_t *stack_head) {(void)stack_head; return 0;}
semantic_action_fn *semantic_actions = (semantic_action_fn[]){none, none, none, none, none, none, none, none};
int main(void)