diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-24 23:43:25 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-07-24 23:43:25 +0300 |
| commit | 059ee9afcc575572f87f224c93288e2835cd1a52 (patch) | |
| tree | 07c315c5bfa192722e4bfb974b1df651089fbacc | |
| parent | 1d6f6e7c6a07832b3524871fdec86f5329736598 (diff) | |
actually parsing grammar and generating a def.c file
| -rw-r--r-- | demos/generate-parser.c | 2 | ||||
| -rw-r--r-- | demos/sample-files/gram-defs.c | 72 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 175 | ||||
| -rw-r--r-- | lr-parser.c | 2 | ||||
| -rw-r--r-- | util/arena.h | 10 | ||||
| -rw-r--r-- | util/list.h | 79 |
6 files changed, 298 insertions, 42 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c index 9987265..d1391d5 100644 --- a/demos/generate-parser.c +++ b/demos/generate-parser.c @@ -164,7 +164,7 @@ int main(int argc, char **argv) for(size_t i = 0; i < total_productions; i++) { printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1); printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i); - printf("{ intptr_t v;\n"); + printf("{ intptr_t v = 0;\n"); puts(semantic_action_str[i]); printf("return v; }\n"); printf("#undef A\n"); diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c index 733a866..b93a950 100644 --- a/demos/sample-files/gram-defs.c +++ b/demos/sample-files/gram-defs.c @@ -1,12 +1,13 @@ #include "util/util.h" -#define SYMBOLS(X) \ - X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ - X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \ - X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ - \ - X(Sp) X(S) X(Slist) X(Prod) X(Prec) \ - X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \ - X(SYMBOLS_END) \ +#define SYMBOLS(X) \ + X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) \ + X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ + \ + X(Sp) X(A) X(B) X(C) \ + X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \ + X(Actionlist) X(Idenlist) X(IorNlist) \ + X(SYMBOLS_END) \ #include "parts/symbol.h" enum symbol { SYMBOLS(X_TO_ENUM) }; @@ -20,25 +21,41 @@ IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } #include "parts/grammar.h" #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} -#define GRAMMAR_ACTION_DEF(X) \ - X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \ - X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \ - X(PROD(Slist, -->, S, DOT), "v = 0;") \ - X(PROD(S, -->, Prod), "v = 0;") \ - X(PROD(S, -->, Prec), "v = 0;") \ - X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \ - X(PROD(Idenlist, -->, IDEN), "v = 0;") \ - X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \ - X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \ - X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \ - X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \ - X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \ - X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \ - X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \ - X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \ - X(PROD(IorNlist, -->, IorN), "v = 0;") \ - X(PROD(IorN, -->, IDEN), "v = 0;") \ - X(PROD(IorN, -->, NUM), "v = 0;") +#define GRAMMAR_ACTION_DEF(X) \ + X(PROD(Sp, -->, A, B, C, END_INPUT), "") \ + \ + X(PROD(A, -->, TERMINAL, Idenlist, \ + SEMICOL, NONTERM, Idenlist, DOT), "handle_type(A(1), A(4))") \ + \ + X(PROD(B, -->, Preclist), "handle_prec(A(0));") \ + X(PROD(Preclist, -->, Prec, SEMICOL, Preclist), \ + "v = list_new_head(A(2), A(0));") \ + X(PROD(Preclist, -->, Prec, DOT), "v = A(0);") \ + X(PROD(Prec, -->, LEFT, IorNlist), \ + "v = prec_new(A(1), PRECEDENCE_LEFT_ASSOC);") \ + X(PROD(Prec, -->, RIGHT, IorNlist), \ + "v = prec_new(A(1), PRECEDENCE_RIGHT_ASSOC);") \ + \ + X(PROD(C, -->, Prodlist), "handle_prod(A(0));") \ + X(PROD(Prodlist, -->, Prod, SEMICOL, Prodlist), \ + "v = list_new_head(A(2), A(0));") \ + X(PROD(Prodlist, -->, Prod, DOT), "v = A(0);") \ + X(PROD(Prod, -->, IDEN, COLON, Actionlist), \ + "v = prod_new(A(0), A(2));") \ + X(PROD(Actionlist, -->, Idenlist, ACTION, PIPE, Actionlist), \ + "v = list_new_head(A(3), action_new(A(0), A(1)));") \ + X(PROD(Actionlist, -->, Idenlist, ACTION), \ + "v = action_new(A(0), A(1));") \ + \ + X(PROD(Idenlist, -->, IDEN, Idenlist), \ + "v = list_new_head(A(1), ptr_new(A(0)));") \ + X(PROD(Idenlist, -->, IDEN), "v = ptr_new(A(0));") \ + X(PROD(IorNlist, -->, IDEN, IorNlist), \ + "v = list_new_head(A(1), ptr_new(A(0)));") \ + X(PROD(IorNlist, -->, IDEN), "v = ptr_new(A(0));") \ + X(PROD(IorNlist, -->, NUM, IorNlist), \ + "v = list_new_head(A(1), num_new(A(0)));") \ + X(PROD(IorNlist, -->, NUM), "v = num_new(A(0));") \ #define X_GRAMMAR(G, A) G, #define X_ACTION(G, A) A, @@ -61,5 +78,6 @@ struct precedence_def { int *list; size_t nlist; }; + struct precedence_def *precedence_defs = NULL; size_t nprecedence_defs = 0; diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c index 89ef6b4..a5899ac 100644 --- a/demos/sample-files/gram-skeleton.c +++ b/demos/sample-files/gram-skeleton.c @@ -1,13 +1,14 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <stdint.h> #include <ctype.h> #define ARENA_IMPLEMENTATION #include "util/arena.h" -static char buf[1024]; -static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf)); +static char buf[2048]; +static struct arena_ctx global_arena; static void *xalloc(size_t sz) { void *addr = arena_allocate(&global_arena, sz); if(!addr) { @@ -17,6 +18,146 @@ static void *xalloc(size_t sz) { return addr; } +#include "parts/precedence.h" +#include "util/list.h" + +static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) +{ + if(head) list_add(new, head); + return new; +} + +struct ptr_entry { + intptr_t data; + struct list_head list; +}; + +struct prec_entry { + enum precedence_flag flag; + struct list_head *ptrlist; + struct list_head list; +}; + +struct strnptr_entry { + char *str; + struct list_head *ptrlist; + struct list_head list; +}; + +#define new_entry(type, entry, __do__) \ + { \ + type *entry = xalloc(sizeof(type)); \ + LIST_EMPTY(&entry->list); \ + __do__; \ + return &entry->list; \ + } + +struct list_head *ptr_new(void *ptr) + new_entry(struct ptr_entry, entry, { + entry->data = (intptr_t)ptr; + }); +struct list_head *num_new(intmax_t num) + new_entry(struct ptr_entry, entry, { + entry->data = (num << 1) | 0x1; + }); +struct list_head *prec_new(struct list_head *idenlist, enum precedence_flag flag) + new_entry(struct prec_entry, entry, { + entry->ptrlist = idenlist; + entry->flag = flag; + }); +struct list_head *action_new(struct list_head *idenlist, char *action) + new_entry(struct strnptr_entry, entry, { + entry->str = action; + entry->ptrlist = idenlist; + }); +struct list_head *prod_new(char *iden, struct list_head *actionlist) + new_entry(struct strnptr_entry, entry, { + entry->str = iden; + entry->ptrlist = actionlist; + }); + +void handle_type(struct list_head *terminals, struct list_head *nonterminals) +{ + printf("#include \"parts/symbol.h\"\n"); + printf("enum symbol { "); + list_for_each_entry(struct ptr_entry, entry, list, terminals) + printf("%s, ", (char *)entry->data); + printf("END_INPUT, "); + list_for_each_entry(struct ptr_entry, entry, list, nonterminals) + printf("%s, ", (char *)entry->data); + printf("SYMBOLS_END };\n"); + + printf("char **symbol_to_str = (char *([])){ "); + list_for_each_entry(struct ptr_entry, entry, list, terminals) + printf("\"%s\", ", (char *)entry->data); + printf("\"END_INPUT\","); + list_for_each_entry(struct ptr_entry, entry, list, nonterminals) + printf("\"%s\", ", (char *)entry->data); + printf("\"SYMBOLS_END\" };\n"); + + printf("IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < %s; }\n", + (char *)container_of(nonterminals, struct ptr_entry, list)->data); + printf("IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }\n"); + printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }\n"); +} + +void handle_prec(struct list_head *preclist) +{ + printf("#include \"parts/precedence.h\"\n"); + printf("struct precedence_def {\n"); + printf(" int flag;\n"); + printf(" int *list;\n"); + printf(" size_t nlist;\n"); + printf("};\n"); + printf("struct precedence_def *precedence_defs = (struct precedence_def[]){\n"); + list_for_each_entry(struct prec_entry, entry, list, preclist) { + printf("{ %d, (int[]){", entry->flag); + list_for_each_entry(struct ptr_entry, e, list, entry->ptrlist) + if((e->data & 0x1) == 0) printf("%s, ", (char *)e->data); + else printf("~%ju, ", e->data >> 1); + printf("}, %zu}, ", list_len(entry->ptrlist)); + } + printf("};\n"); + printf("size_t nprecedence_defs = %zu;\n", list_len(preclist)); +} + +void handle_prod(struct list_head *prodlist) +{ + size_t productions = 0; + + printf("#include \"parts/grammar.h\"\n"); + printf("struct production *grammar = (struct production[]){\n"); + list_for_each_entry(struct strnptr_entry, e1, list, prodlist) { + productions += list_len(e1->ptrlist); + list_for_each_entry(struct strnptr_entry, e2, list, e1->ptrlist) { + printf("{%s, (symbol[]){ ", e1->str); + list_for_each_entry(struct ptr_entry, e3, list, e2->ptrlist) + printf("%s, ", (char *)e3->data); + printf("}, %zu}, \n", list_len(e2->ptrlist)); + } + } + printf("};\n"); + printf("size_t total_productions = %zu;\n", productions); + + printf("char **semantic_action_str = (char *([])){"); + list_for_each_entry(struct strnptr_entry, e1, list, prodlist) + list_for_each_entry(struct strnptr_entry, e2, list, e1->ptrlist) + printf("\"%s\", ", e2->str); // todo: escape the quotes + printf("};\n"); +} + +#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new) + +#define ptr_new(iden) (intptr_t)ptr_new((void *)iden) +#define num_new(num) (intptr_t)num_new(num) +#define prec_new(idenlist, flag) (intptr_t)prec_new((struct list_head *)idenlist, flag) +#define action_new(idenlist, action) (intptr_t)action_new((struct list_head *)idenlist, (char *)action) +#define prod_new(iden, actionlist) (intptr_t)prod_new((char *)iden, (struct list_head *)actionlist) + +#define handle_type(terminals, nonterminals) handle_type((struct list_head *)terminals, (struct list_head *)nonterminals); +#define handle_prec(preclist) handle_prec((struct list_head *)preclist); +#define handle_prod(prodlist) handle_prod((struct list_head *)prodlist); + // generated #include "bin/gram.h" #include "bin/gram.c" @@ -33,12 +174,19 @@ symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return t->v; } static char *input = (char []){ - "-left B;" - "-right C;" - "-left D;" + "-terminal ID EQUAL STAR;" + "-nonterminal EP E L R." "" - "A: B {a}" - " | C N {d}." + "-left ID;" + "-right STAR;" + "-left EQUAL." + "" + "EP: E END_INPUT {1};" + "E : L EQUAL R {2}" + " | R {3};" + "L : STAR R {4}" + " | ID {5};" + "R : L {6}." }; struct token *toklist_eat() @@ -55,14 +203,17 @@ struct token *toklist_peek() { return &tok; } int main(void) { + global_arena = ARENA_CTX_INIT(buf, sizeof(buf)); + input = next_token(input); intptr_t value; if(lr_parser(&value)) { + printf(input); return 1; } - printf("OUTPUT: %jd\n", value); + fprintf(stderr, "OUTPUT: %jd\n", value); return 0; } @@ -117,10 +268,10 @@ static char *next_token(char *str) case '-': off = tillsep(++str); char *s = substring(str, off); - if(strcmp(s, "left") == 0) tok.s = D_LEFT; - else if(strcmp(s, "right") == 0) tok.s = D_RIGHT; - else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL; - else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM; + if(strcmp(s, "terminal") == 0) tok.s = TERMINAL; + else if(strcmp(s, "nonterminal") == 0) tok.s = NONTERM; + else if(strcmp(s, "left") == 0) tok.s = LEFT; + else if(strcmp(s, "right") == 0) tok.s = RIGHT; else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } break; case '{': diff --git a/lr-parser.c b/lr-parser.c index 3b6be84..621a9e7 100644 --- a/lr-parser.c +++ b/lr-parser.c @@ -60,7 +60,7 @@ int lr_parser(intptr_t *value) push(a_goto.arg); break; case ACTION_ACCEPT: - *value = *(stack_head-1); + *value = semantic_actions[0](stack_head); return 0; case ACTION_NOT_SET: default: diff --git a/util/arena.h b/util/arena.h index 3d82b95..1142321 100644 --- a/util/arena.h +++ b/util/arena.h @@ -9,18 +9,26 @@ struct arena_ctx { size_t offset; }; + #define ARENA_CTX_INIT(buffer, sz) (struct arena_ctx){(buffer), (sz), 0} void *arena_allocate(struct arena_ctx *ctx, size_t sz); void arena_reset(struct arena_ctx *ctx); #ifdef ARENA_IMPLEMENTATION +#define ARENA_ALLIGNMENT 2 +// #include <assert.h> void *arena_allocate(struct arena_ctx *ctx, size_t sz) { + if(sz % ARENA_ALLIGNMENT != 0) + sz += ARENA_ALLIGNMENT - (sz % ARENA_ALLIGNMENT); + if(ctx->offset + sz > ctx->size) return NULL; - void *off = ctx->buffer + ctx->offset; + void *off = (void *)((intptr_t)ctx->buffer + ctx->offset); ctx->offset += sz; + + // assert(((intptr_t)off & 0x1) == 0); return off; } diff --git a/util/list.h b/util/list.h new file mode 100644 index 0000000..d9531ea --- /dev/null +++ b/util/list.h @@ -0,0 +1,79 @@ +#ifndef LIST_H +#define LIST_H + +// #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member))) + +struct list_head { + struct list_head *prev; + struct list_head *next; +}; + +#define LIST_END NULL + +#define LIST_EMPTY(list) do { \ + (list)->next = LIST_END; \ + (list)->prev = LIST_END; \ + } while(0); + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +#define list_next_entry(entry, type, member) \ + list_entry(entry->member.next, type, member) + +#define list_for_each(pos, start) \ + for(struct list_head *pos = start; pos; pos = pos->next) +#define list_for_each_entry(type, entry, member, start) \ + for(type *entry = list_entry((start), type, member); \ + entry; \ + entry = (entry->member.next == LIST_END ? NULL : \ + list_next_entry(entry, type, member))) + +#define list_for_each_safe(pos, start) \ + for(struct list_head *pos = (start), *__next = LIST_END; \ + pos && (__next = pos->next,1); \ + pos = __next) + +static inline int list_is_head(struct list_head *l) +{ + return l->prev == LIST_END; +} + +static inline int list_is_tail(struct list_head *l) +{ + return l->next == LIST_END; +} + +static inline struct list_head *list_get_head(struct list_head *l) +{ + while(!list_is_head(l)) l = l->prev; + return l; +} + +static inline struct list_head *list_get_tail(struct list_head *l) +{ + while(!list_is_tail(l)) l = l->next; + return l; +} + +static inline struct list_head *list_add( + struct list_head *head, + struct list_head *new) +{ + if(head) { + // new->next = head->next; + head->next = new; + } + new->prev = head; + return new; +} + +static inline size_t list_len(struct list_head *head) +{ + size_t n = 0; + list_for_each(pos, list_get_head(head)) n++; + return n; +} + +#endif |
