aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkartofen <kartofen.mail.0@protonmail.com>2025-07-24 23:43:25 +0300
committerkartofen <kartofen.mail.0@protonmail.com>2025-07-24 23:43:25 +0300
commit059ee9afcc575572f87f224c93288e2835cd1a52 (patch)
tree07c315c5bfa192722e4bfb974b1df651089fbacc
parent1d6f6e7c6a07832b3524871fdec86f5329736598 (diff)
actually parsing grammar and generating a def.c file
-rw-r--r--demos/generate-parser.c2
-rw-r--r--demos/sample-files/gram-defs.c72
-rw-r--r--demos/sample-files/gram-skeleton.c175
-rw-r--r--lr-parser.c2
-rw-r--r--util/arena.h10
-rw-r--r--util/list.h79
6 files changed, 298 insertions, 42 deletions
diff --git a/demos/generate-parser.c b/demos/generate-parser.c
index 9987265..d1391d5 100644
--- a/demos/generate-parser.c
+++ b/demos/generate-parser.c
@@ -164,7 +164,7 @@ int main(int argc, char **argv)
for(size_t i = 0; i < total_productions; i++) {
printf("#define A(n) (*(stack_head-3*%zu+3*n-1))\n", grammar[i].nRHS-1);
printf("intptr_t __prod%zu_action(intmax_t *stack_head)\n", i);
- printf("{ intptr_t v;\n");
+ printf("{ intptr_t v = 0;\n");
puts(semantic_action_str[i]);
printf("return v; }\n");
printf("#undef A\n");
diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c
index 733a866..b93a950 100644
--- a/demos/sample-files/gram-defs.c
+++ b/demos/sample-files/gram-defs.c
@@ -1,12 +1,13 @@
#include "util/util.h"
-#define SYMBOLS(X) \
- X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
- X(D_LEFT) X(D_RIGHT) X(D_TERMINAL) X(D_NONTERM) \
- X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
- \
- X(Sp) X(S) X(Slist) X(Prod) X(Prec) \
- X(Prodlist) X(Idenlist) X(IorN) X(IorNlist) \
- X(SYMBOLS_END) \
+#define SYMBOLS(X) \
+ X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) \
+ X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
+ X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
+ \
+ X(Sp) X(A) X(B) X(C) \
+ X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \
+ X(Actionlist) X(Idenlist) X(IorNlist) \
+ X(SYMBOLS_END) \
#include "parts/symbol.h"
enum symbol { SYMBOLS(X_TO_ENUM) };
@@ -20,25 +21,41 @@ IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
#include "parts/grammar.h"
#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
-#define GRAMMAR_ACTION_DEF(X) \
- X(PROD(Sp, ->, Slist, END_INPUT), "v = 0;") \
- X(PROD(Slist, -->, S, SEMICOL, Slist), "v = 0;") \
- X(PROD(Slist, -->, S, DOT), "v = 0;") \
- X(PROD(S, -->, Prod), "v = 0;") \
- X(PROD(S, -->, Prec), "v = 0;") \
- X(PROD(Idenlist, -->, IDEN, Idenlist), "v = 0;") \
- X(PROD(Idenlist, -->, IDEN), "v = 0;") \
- X(PROD(Prod, -->, IDEN, COLON, Prodlist), "v = 0;") \
- X(PROD(Prodlist, -->, Idenlist, ACTION, PIPE, Prodlist), "printf(\"ACTION: '%s'\\n\", A(1));") \
- X(PROD(Prodlist, -->, Idenlist, ACTION), "printf(\"ACTION: '%s'\\n\", A(1));") \
- X(PROD(Prec, -->, D_TERMINAL, Idenlist), "v = 0;") \
- X(PROD(Prec, -->, D_NONTERM, Idenlist), "v = 0;") \
- X(PROD(Prec, -->, D_LEFT, IorNlist), "v = 0;") \
- X(PROD(Prec, -->, D_RIGHT, IorNlist), "v = 0;") \
- X(PROD(IorNlist, -->, IorN, IorNlist), "v = 0;") \
- X(PROD(IorNlist, -->, IorN), "v = 0;") \
- X(PROD(IorN, -->, IDEN), "v = 0;") \
- X(PROD(IorN, -->, NUM), "v = 0;")
+#define GRAMMAR_ACTION_DEF(X) \
+ X(PROD(Sp, -->, A, B, C, END_INPUT), "") \
+ \
+ X(PROD(A, -->, TERMINAL, Idenlist, \
+ SEMICOL, NONTERM, Idenlist, DOT), "handle_type(A(1), A(4))") \
+ \
+ X(PROD(B, -->, Preclist), "handle_prec(A(0));") \
+ X(PROD(Preclist, -->, Prec, SEMICOL, Preclist), \
+ "v = list_new_head(A(2), A(0));") \
+ X(PROD(Preclist, -->, Prec, DOT), "v = A(0);") \
+ X(PROD(Prec, -->, LEFT, IorNlist), \
+ "v = prec_new(A(1), PRECEDENCE_LEFT_ASSOC);") \
+ X(PROD(Prec, -->, RIGHT, IorNlist), \
+ "v = prec_new(A(1), PRECEDENCE_RIGHT_ASSOC);") \
+ \
+ X(PROD(C, -->, Prodlist), "handle_prod(A(0));") \
+ X(PROD(Prodlist, -->, Prod, SEMICOL, Prodlist), \
+ "v = list_new_head(A(2), A(0));") \
+ X(PROD(Prodlist, -->, Prod, DOT), "v = A(0);") \
+ X(PROD(Prod, -->, IDEN, COLON, Actionlist), \
+ "v = prod_new(A(0), A(2));") \
+ X(PROD(Actionlist, -->, Idenlist, ACTION, PIPE, Actionlist), \
+ "v = list_new_head(A(3), action_new(A(0), A(1)));") \
+ X(PROD(Actionlist, -->, Idenlist, ACTION), \
+ "v = action_new(A(0), A(1));") \
+ \
+ X(PROD(Idenlist, -->, IDEN, Idenlist), \
+ "v = list_new_head(A(1), ptr_new(A(0)));") \
+ X(PROD(Idenlist, -->, IDEN), "v = ptr_new(A(0));") \
+ X(PROD(IorNlist, -->, IDEN, IorNlist), \
+ "v = list_new_head(A(1), ptr_new(A(0)));") \
+ X(PROD(IorNlist, -->, IDEN), "v = ptr_new(A(0));") \
+ X(PROD(IorNlist, -->, NUM, IorNlist), \
+ "v = list_new_head(A(1), num_new(A(0)));") \
+ X(PROD(IorNlist, -->, NUM), "v = num_new(A(0));") \
#define X_GRAMMAR(G, A) G,
#define X_ACTION(G, A) A,
@@ -61,5 +78,6 @@ struct precedence_def {
int *list;
size_t nlist;
};
+
struct precedence_def *precedence_defs = NULL;
size_t nprecedence_defs = 0;
diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c
index 89ef6b4..a5899ac 100644
--- a/demos/sample-files/gram-skeleton.c
+++ b/demos/sample-files/gram-skeleton.c
@@ -1,13 +1,14 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <stdint.h>
#include <ctype.h>
#define ARENA_IMPLEMENTATION
#include "util/arena.h"
-static char buf[1024];
-static struct arena_ctx global_arena = ARENA_CTX_INIT(buf, sizeof(buf));
+static char buf[2048];
+static struct arena_ctx global_arena;
static void *xalloc(size_t sz) {
void *addr = arena_allocate(&global_arena, sz);
if(!addr) {
@@ -17,6 +18,146 @@ static void *xalloc(size_t sz) {
return addr;
}
+#include "parts/precedence.h"
+#include "util/list.h"
+
+static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new)
+{
+ if(head) list_add(new, head);
+ return new;
+}
+
+struct ptr_entry {
+ intptr_t data;
+ struct list_head list;
+};
+
+struct prec_entry {
+ enum precedence_flag flag;
+ struct list_head *ptrlist;
+ struct list_head list;
+};
+
+struct strnptr_entry {
+ char *str;
+ struct list_head *ptrlist;
+ struct list_head list;
+};
+
+#define new_entry(type, entry, __do__) \
+ { \
+ type *entry = xalloc(sizeof(type)); \
+ LIST_EMPTY(&entry->list); \
+ __do__; \
+ return &entry->list; \
+ }
+
+struct list_head *ptr_new(void *ptr)
+ new_entry(struct ptr_entry, entry, {
+ entry->data = (intptr_t)ptr;
+ });
+struct list_head *num_new(intmax_t num)
+ new_entry(struct ptr_entry, entry, {
+ entry->data = (num << 1) | 0x1;
+ });
+struct list_head *prec_new(struct list_head *idenlist, enum precedence_flag flag)
+ new_entry(struct prec_entry, entry, {
+ entry->ptrlist = idenlist;
+ entry->flag = flag;
+ });
+struct list_head *action_new(struct list_head *idenlist, char *action)
+ new_entry(struct strnptr_entry, entry, {
+ entry->str = action;
+ entry->ptrlist = idenlist;
+ });
+struct list_head *prod_new(char *iden, struct list_head *actionlist)
+ new_entry(struct strnptr_entry, entry, {
+ entry->str = iden;
+ entry->ptrlist = actionlist;
+ });
+
+void handle_type(struct list_head *terminals, struct list_head *nonterminals)
+{
+ printf("#include \"parts/symbol.h\"\n");
+ printf("enum symbol { ");
+ list_for_each_entry(struct ptr_entry, entry, list, terminals)
+ printf("%s, ", (char *)entry->data);
+ printf("END_INPUT, ");
+ list_for_each_entry(struct ptr_entry, entry, list, nonterminals)
+ printf("%s, ", (char *)entry->data);
+ printf("SYMBOLS_END };\n");
+
+ printf("char **symbol_to_str = (char *([])){ ");
+ list_for_each_entry(struct ptr_entry, entry, list, terminals)
+ printf("\"%s\", ", (char *)entry->data);
+ printf("\"END_INPUT\",");
+ list_for_each_entry(struct ptr_entry, entry, list, nonterminals)
+ printf("\"%s\", ", (char *)entry->data);
+ printf("\"SYMBOLS_END\" };\n");
+
+ printf("IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < %s; }\n",
+ (char *)container_of(nonterminals, struct ptr_entry, list)->data);
+ printf("IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }\n");
+ printf("IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }\n");
+}
+
+void handle_prec(struct list_head *preclist)
+{
+ printf("#include \"parts/precedence.h\"\n");
+ printf("struct precedence_def {\n");
+ printf(" int flag;\n");
+ printf(" int *list;\n");
+ printf(" size_t nlist;\n");
+ printf("};\n");
+ printf("struct precedence_def *precedence_defs = (struct precedence_def[]){\n");
+ list_for_each_entry(struct prec_entry, entry, list, preclist) {
+ printf("{ %d, (int[]){", entry->flag);
+ list_for_each_entry(struct ptr_entry, e, list, entry->ptrlist)
+ if((e->data & 0x1) == 0) printf("%s, ", (char *)e->data);
+ else printf("~%ju, ", e->data >> 1);
+ printf("}, %zu}, ", list_len(entry->ptrlist));
+ }
+ printf("};\n");
+ printf("size_t nprecedence_defs = %zu;\n", list_len(preclist));
+}
+
+void handle_prod(struct list_head *prodlist)
+{
+ size_t productions = 0;
+
+ printf("#include \"parts/grammar.h\"\n");
+ printf("struct production *grammar = (struct production[]){\n");
+ list_for_each_entry(struct strnptr_entry, e1, list, prodlist) {
+ productions += list_len(e1->ptrlist);
+ list_for_each_entry(struct strnptr_entry, e2, list, e1->ptrlist) {
+ printf("{%s, (symbol[]){ ", e1->str);
+ list_for_each_entry(struct ptr_entry, e3, list, e2->ptrlist)
+ printf("%s, ", (char *)e3->data);
+ printf("}, %zu}, \n", list_len(e2->ptrlist));
+ }
+ }
+ printf("};\n");
+ printf("size_t total_productions = %zu;\n", productions);
+
+ printf("char **semantic_action_str = (char *([])){");
+ list_for_each_entry(struct strnptr_entry, e1, list, prodlist)
+ list_for_each_entry(struct strnptr_entry, e2, list, e1->ptrlist)
+ printf("\"%s\", ", e2->str); // todo: escape the quotes
+ printf("};\n");
+}
+
+#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new)
+
+#define ptr_new(iden) (intptr_t)ptr_new((void *)iden)
+#define num_new(num) (intptr_t)num_new(num)
+#define prec_new(idenlist, flag) (intptr_t)prec_new((struct list_head *)idenlist, flag)
+#define action_new(idenlist, action) (intptr_t)action_new((struct list_head *)idenlist, (char *)action)
+#define prod_new(iden, actionlist) (intptr_t)prod_new((char *)iden, (struct list_head *)actionlist)
+
+#define handle_type(terminals, nonterminals) handle_type((struct list_head *)terminals, (struct list_head *)nonterminals);
+#define handle_prec(preclist) handle_prec((struct list_head *)preclist);
+#define handle_prod(prodlist) handle_prod((struct list_head *)prodlist);
+
// generated
#include "bin/gram.h"
#include "bin/gram.c"
@@ -33,12 +174,19 @@ symbol token_sym(struct token *t) { return t->s; }
intptr_t token_val(struct token *t) { return t->v; }
static char *input = (char []){
- "-left B;"
- "-right C;"
- "-left D;"
+ "-terminal ID EQUAL STAR;"
+ "-nonterminal EP E L R."
""
- "A: B {a}"
- " | C N {d}."
+ "-left ID;"
+ "-right STAR;"
+ "-left EQUAL."
+ ""
+ "EP: E END_INPUT {1};"
+ "E : L EQUAL R {2}"
+ " | R {3};"
+ "L : STAR R {4}"
+ " | ID {5};"
+ "R : L {6}."
};
struct token *toklist_eat()
@@ -55,14 +203,17 @@ struct token *toklist_peek() { return &tok; }
int main(void)
{
+ global_arena = ARENA_CTX_INIT(buf, sizeof(buf));
+
input = next_token(input);
intptr_t value;
if(lr_parser(&value)) {
+ printf(input);
return 1;
}
- printf("OUTPUT: %jd\n", value);
+ fprintf(stderr, "OUTPUT: %jd\n", value);
return 0;
}
@@ -117,10 +268,10 @@ static char *next_token(char *str)
case '-':
off = tillsep(++str);
char *s = substring(str, off);
- if(strcmp(s, "left") == 0) tok.s = D_LEFT;
- else if(strcmp(s, "right") == 0) tok.s = D_RIGHT;
- else if(strcmp(s, "terminal") == 0) tok.s = D_TERMINAL;
- else if(strcmp(s, "nonterminal") == 0) tok.s = D_NONTERM;
+ if(strcmp(s, "terminal") == 0) tok.s = TERMINAL;
+ else if(strcmp(s, "nonterminal") == 0) tok.s = NONTERM;
+ else if(strcmp(s, "left") == 0) tok.s = LEFT;
+ else if(strcmp(s, "right") == 0) tok.s = RIGHT;
else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; }
break;
case '{':
diff --git a/lr-parser.c b/lr-parser.c
index 3b6be84..621a9e7 100644
--- a/lr-parser.c
+++ b/lr-parser.c
@@ -60,7 +60,7 @@ int lr_parser(intptr_t *value)
push(a_goto.arg);
break;
case ACTION_ACCEPT:
- *value = *(stack_head-1);
+ *value = semantic_actions[0](stack_head);
return 0;
case ACTION_NOT_SET:
default:
diff --git a/util/arena.h b/util/arena.h
index 3d82b95..1142321 100644
--- a/util/arena.h
+++ b/util/arena.h
@@ -9,18 +9,26 @@ struct arena_ctx {
size_t offset;
};
+
#define ARENA_CTX_INIT(buffer, sz) (struct arena_ctx){(buffer), (sz), 0}
void *arena_allocate(struct arena_ctx *ctx, size_t sz);
void arena_reset(struct arena_ctx *ctx);
#ifdef ARENA_IMPLEMENTATION
+#define ARENA_ALLIGNMENT 2
+// #include <assert.h>
void *arena_allocate(struct arena_ctx *ctx, size_t sz)
{
+ if(sz % ARENA_ALLIGNMENT != 0)
+ sz += ARENA_ALLIGNMENT - (sz % ARENA_ALLIGNMENT);
+
if(ctx->offset + sz > ctx->size) return NULL;
- void *off = ctx->buffer + ctx->offset;
+ void *off = (void *)((intptr_t)ctx->buffer + ctx->offset);
ctx->offset += sz;
+
+ // assert(((intptr_t)off & 0x1) == 0);
return off;
}
diff --git a/util/list.h b/util/list.h
new file mode 100644
index 0000000..d9531ea
--- /dev/null
+++ b/util/list.h
@@ -0,0 +1,79 @@
+#ifndef LIST_H
+#define LIST_H
+
+// #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member)))
+
+struct list_head {
+ struct list_head *prev;
+ struct list_head *next;
+};
+
+#define LIST_END NULL
+
+#define LIST_EMPTY(list) do { \
+ (list)->next = LIST_END; \
+ (list)->prev = LIST_END; \
+ } while(0);
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_next_entry(entry, type, member) \
+ list_entry(entry->member.next, type, member)
+
+#define list_for_each(pos, start) \
+ for(struct list_head *pos = start; pos; pos = pos->next)
+#define list_for_each_entry(type, entry, member, start) \
+ for(type *entry = list_entry((start), type, member); \
+ entry; \
+ entry = (entry->member.next == LIST_END ? NULL : \
+ list_next_entry(entry, type, member)))
+
+#define list_for_each_safe(pos, start) \
+ for(struct list_head *pos = (start), *__next = LIST_END; \
+ pos && (__next = pos->next,1); \
+ pos = __next)
+
+static inline int list_is_head(struct list_head *l)
+{
+ return l->prev == LIST_END;
+}
+
+static inline int list_is_tail(struct list_head *l)
+{
+ return l->next == LIST_END;
+}
+
+static inline struct list_head *list_get_head(struct list_head *l)
+{
+ while(!list_is_head(l)) l = l->prev;
+ return l;
+}
+
+static inline struct list_head *list_get_tail(struct list_head *l)
+{
+ while(!list_is_tail(l)) l = l->next;
+ return l;
+}
+
+static inline struct list_head *list_add(
+ struct list_head *head,
+ struct list_head *new)
+{
+ if(head) {
+ // new->next = head->next;
+ head->next = new;
+ }
+ new->prev = head;
+ return new;
+}
+
+static inline size_t list_len(struct list_head *head)
+{
+ size_t n = 0;
+ list_for_each(pos, list_get_head(head)) n++;
+ return n;
+}
+
+#endif