aboutsummaryrefslogtreecommitdiff
path: root/demos
diff options
context:
space:
mode:
authorkartofen <kartofen.mail.0@protonmail.com>2025-08-26 01:17:10 +0300
committerkartofen <kartofen.mail.0@protonmail.com>2025-08-26 01:17:10 +0300
commit46e786db9d1b48b8fbc3502e36f093b755f3e09f (patch)
tree9e279216e68f3fe4b0849d1e07184fe674dc551f /demos
parent1c83c514c8108fccfec9764da5e4563b98eb871b (diff)
grammar for the grammar and lexing and parsing of a new language lbp
Diffstat (limited to 'demos')
-rw-r--r--demos/sample-files/gram-defs.c23
-rw-r--r--demos/sample-files/gram-skeleton.c12
-rw-r--r--demos/sample-files/gram.g42
-rw-r--r--demos/sample-files/lbp-code.lbp40
-rw-r--r--demos/sample-files/lbp-skeleton.c258
-rw-r--r--demos/sample-files/lbp.g39
6 files changed, 401 insertions, 13 deletions
diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c
index 49329bd..b1ae268 100644
--- a/demos/sample-files/gram-defs.c
+++ b/demos/sample-files/gram-defs.c
@@ -1,13 +1,13 @@
#include "util/util.h"
-#define SYMBOLS(X) \
- X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) \
- X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
- X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
- \
- X(Sp) X(A) X(B) X(C) \
- X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \
- X(Actionlist) X(Idenlist) X(IorNlist) \
- X(SYMBOLS_END) \
+#define SYMBOLS(X) \
+ X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) X(NOPREC) \
+ X(COLON) X(PIPE) X(SEMICOL) X(DOT) \
+ X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \
+ \
+ X(S) X(A) X(B) X(C) \
+ X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \
+ X(Actionlist) X(Idenlist) X(IorNlist) \
+ X(SYMBOLS_END) \
#include "parts/symbol.h"
enum symbol { SYMBOLS(X_TO_ENUM) };
@@ -15,20 +15,21 @@ size_t total_symbols = SYMBOLS_END;
char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) };
-IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; }
+IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < S; }
IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; }
IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; }
#include "parts/grammar.h"
#define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)}
#define GRAMMAR_ACTION_DEF(X) \
- X(PROD(Sp, -->, A, B, C, END_INPUT), "") \
+ X(PROD(S, -->, A, B, C, END_INPUT), "") \
\
X(PROD(A, -->, TERMINAL, Idenlist, \
SEMICOL, NONTERM, Idenlist, DOT), \
"handle_type(A(1), A(4))") \
\
X(PROD(B, -->, Preclist), "handle_prec(A(0));") \
+ X(PROD(B, -->, NOPREC, DOT), "handle_prec(NULL);") \
X(PROD(Preclist, -->, Prec, SEMICOL, Preclist), \
"v = list_new_head(A(2), A(0));") \
X(PROD(Preclist, -->, Prec, DOT), "v = A(0);") \
diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c
index 7a54548..4e40c14 100644
--- a/demos/sample-files/gram-skeleton.c
+++ b/demos/sample-files/gram-skeleton.c
@@ -5,7 +5,7 @@
#include <ctype.h>
#define INPUT_CAP 4096
-#define ARENA_CAP 4096
+#define ARENA_CAP 4096*2
#define ARENA_IMPLEMENTATION
#include "util/arena.h"
@@ -115,6 +115,13 @@ void handle_prec(struct list_head *preclist)
printf(" int *list;\n");
printf(" size_t nlist;\n");
printf("};\n");
+
+ if(!preclist) {
+ printf("struct precedence_def *precedence_defs = NULL;\n");
+ printf("size_t nprecedence_defs = 0;\n");
+ return;
+ }
+
printf("struct precedence_def *precedence_defs = (struct precedence_def[]){\n");
list_for_each_entry(struct prec_entry, entry, list, preclist) {
printf("{ %d, (int[]){", entry->flag);
@@ -211,7 +218,7 @@ int main(void)
intptr_t value;
if(lr_parser(&value)) {
- printf(input);
+ fprintf(stderr, input);
return 1;
}
@@ -274,6 +281,7 @@ static char *next_token(char *str)
else if(strcmp(s, "nonterminal") == 0) tok.s = NONTERM;
else if(strcmp(s, "left") == 0) tok.s = LEFT;
else if(strcmp(s, "right") == 0) tok.s = RIGHT;
+ else if(strcmp(s, "noprec") == 0) tok.s = NOPREC;
else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; }
break;
case '{':
diff --git a/demos/sample-files/gram.g b/demos/sample-files/gram.g
new file mode 100644
index 0000000..f9daded
--- /dev/null
+++ b/demos/sample-files/gram.g
@@ -0,0 +1,42 @@
+-terminal
+ TERMINAL NONTERM LEFT RIGHT COLON NOPREC
+ PIPE SEMICOL DOT
+ IDEN NUM ACTION;
+-nonterminal
+ S A B C
+ Type Prec Prod Preclist Prodlist
+ Actionlist Idenlist IorNlist.
+
+-noprec.
+
+S: A B C {};
+
+A: TERMINAL Idenlist SEMICOL NONTERM Idenlist DOT { handle_type(A(1), A(4)) };
+
+
+B: Preclist { handle_prec(A(0)); }
+ | NOPREC DOT { handle_prec(NULL); };
+
+Preclist: Prec SEMICOL Preclist { v = list_new_head(A(2), A(0)); }
+ | Prec DOT { v = A(0); };
+
+Prec: LEFT IorNlist { v = prec_new(A(1), PRECEDENCE_LEFT_ASSOC); }
+ | RIGHT IorNlist { v = prec_new(A(1), PRECEDENCE_RIGHT_ASSOC); };
+
+
+C: Prodlist { handle_prod(A(0)); };
+
+Prodlist: Prod SEMICOL Prodlist { v = list_new_head(A(2), A(0)); }
+ | Prod DOT { v = A(0); };
+
+Prod: IDEN COLON Actionlist { v = prod_new(A(0), A(2)); };
+
+Actionlist: Idenlist ACTION PIPE Actionlist { v = list_new_head(A(3), action_new(A(0), A(1))); }
+ | Idenlist ACTION { v = action_new(A(0), A(1)); };
+
+Idenlist: IDEN Idenlist { v = list_new_head(A(1), ptr_new(A(0))); }
+ | IDEN { v = ptr_new(A(0)); };
+IorNlist: IDEN IorNlist { v = list_new_head(A(1), ptr_new(A(0))); }
+ | IDEN { v = ptr_new(A(0)); }
+ | NUM IorNlist { v = list_new_head(A(1), num_new(A(0))); }
+ | NUM { v = num_new(A(0)); }.
diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp
new file mode 100644
index 0000000..df5bdcc
--- /dev/null
+++ b/demos/sample-files/lbp-code.lbp
@@ -0,0 +1,40 @@
+inbounds/int-function(low, high, val) {
+ > val low, < val high.
+},
+
+:downlink_fmt/enum {
+ (17 |_, :EXTENDED_SQUITTER);
+ (18 |_, :NON_TRANSPONDER).
+},
+
+:type_code/enum {
+ (inbounds 1 4 |_, :AIRCRAFT_IDEN);
+ (inbounds 5 8 |_, :SURFACE_POS);
+ (inbounds 9 18 |_, :AIR_POS);
+ (inbounds 20 22 |_, ---);
+ (19 |_, ---);
+ (28 |_, ---);
+ (29 |_, ---);
+ (31 |_, ---).
+},
+
+:aircraft_iden/struct {
+-.
+},
+
+:message/struct {
+ DF/enum(:downlinkfmt) |5,
+ CA/enum(:capabilities) |3,
+ ICAO/int-big |24,
+ TC/enum(:type_code) |5,
+
+ ((TC :type_code:AIRCRAFT_IDEN, aircraft_iden/struct(:aircraft_iden));
+ (TC :type_code:SURFACE_POS, surface_pos/struct {
+ POS |1,
+ - |_.
+ });
+ (TC :type_code:AIR_POS, air_pos/struct(:air_pos))
+ ) |51,
+
+ CRC |24.
+}.
diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c
new file mode 100644
index 0000000..ae0a17f
--- /dev/null
+++ b/demos/sample-files/lbp-skeleton.c
@@ -0,0 +1,258 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+
+// TODO: lr parser is bad for debugging
+
+#define INPUT_CAP 4096
+#define ARENA_CAP 4096
+
+#define ARENA_IMPLEMENTATION
+#include "util/arena.h"
+
+static char buf[ARENA_CAP];
+static struct arena_ctx global_arena;
+static void *xalloc(size_t sz) {
+ void *addr = arena_allocate(&global_arena, sz);
+ if(!addr) {
+ fprintf(stderr, "ERROR: Arena empty\n"); exit(1);
+ }
+
+ return addr;
+}
+
+// other things here
+#include "util/list.h"
+static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new)
+{
+ if(head) list_add(new, head);
+ return new;
+}
+
+#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new)
+
+// generated
+#include "bin/lbp.h"
+#include "bin/lbp.c"
+
+#include "util/dict.h"
+static struct dict types_dict;
+static struct string_token types_strings[] = {
+ {"int", T_INT},
+ {"enum", T_ENUM},
+ {"struct", T_STRUCT},
+ {"function", ST_FUNCTION},
+ {"big", ST_BIG},
+ {"little", ST_LITTLE},
+ {"native", ST_NATIVE},
+};
+static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings);
+static uint8_t dict_lowercase_char_to_bit[256] = {
+ ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7,
+ ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13,
+ ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19,
+ ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25,
+ ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1
+};
+
+
+#include "parts/toklist.h"
+struct token {
+ symbol s;
+ intptr_t v;
+};
+
+#include "util/queue.h"
+QUEUE_GENERATE(tokbuf, struct token, 16)
+
+symbol token_sym(struct token *t) { return t->s; }
+intptr_t token_val(struct token *t) { return t->v; }
+
+static void print_token(struct token *t);
+static char *next_token(char *str);
+
+static char *input;
+
+struct token *toklist_eat()
+{
+ static struct token t;
+ tokbuf_dequeue(&t); // err not checked
+ if(tokbuf_empty()) input = next_token(input);
+ return &t;
+}
+
+struct token *toklist_peek() {
+ static struct token t;
+ tokbuf_peek(&t); // err not checked
+ return &t;
+}
+
+// #define _LR_PARSER_DEBUG
+#include "lr-parser.c"
+
+int main(void)
+{
+ static char input_buf[INPUT_CAP];
+ if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) {
+ fprintf(stderr, "INPUT_CAP reached\n");
+ return 1;
+ }
+
+ global_arena = ARENA_CTX_INIT(buf, ARENA_CAP);
+
+ types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit);
+ dict_compile(&types_dict);
+
+ input = next_token(input_buf);
+
+ // while(1) {
+ // struct token *tok = toklist_eat();
+ // print_token(tok);
+ // if(token_sym(tok) == END_INPUT) break;
+ // } return 0;
+
+ intptr_t value;
+ if(lr_parser(&value)) {
+ fprintf(stderr, input);
+ return 1;
+ }
+
+ fprintf(stderr, "OUTPUT: %jd\n", value);
+
+ dict_free(&types_dict);
+ return 0;
+}
+
+static void print_token(struct token *tok)
+{
+ printf("%s\n", symbol_to_str[token_sym(tok)]);
+ if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok));
+}
+
+// STR UTIL
+
+#define strdup(...) _strdup(__VA_ARGS__)
+static inline char *_strdup(char *str)
+{
+ return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1);
+}
+
+static inline char *substring(char *str, size_t sub_end)
+{
+ static char sub[128];
+ if(!str) return sub;
+
+ if(sub_end+1 > sizeof(sub)) return NULL;
+
+ sub[sub_end] = '\0';
+ return memcpy(sub, str, sub_end);
+}
+
+static inline size_t tillch(char *str, size_t len, char ch)
+{
+ for(size_t i = 0; i < len; i++) if(str[i] == ch) return i;
+ return len;
+}
+
+// LEXER
+
+static inline int issep(char c)
+{
+ return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' ||
+ c == '.' || c == '(' || c == ')' || c == '{' || c == '}';
+}
+
+static inline int tillsep(char *str)
+{
+ size_t i = 0;
+ while(!issep(str[i++]));
+ return i-1;
+}
+
+static char *typelist_tokenize(char *str)
+{
+ size_t off = 0;
+ while(!issep(str[off]) && str[off] != '-') off++;
+
+ if(off > 0) {
+ int s = dict_check(&types_dict, substring(str, off));
+ if(s < 0) {
+ fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0));
+ return NULL;
+ }
+
+ tokbuf_enqueue(&(struct token){.s = s, .v = s});
+ }
+
+ str += off;
+
+ switch(str[0]) {
+ case '-': return typelist_tokenize(str+1);
+ case '(':
+ while((str = next_token(str)))
+ if(*(str-1)== ')') { // not really
+ if(str[0] == '-') return typelist_tokenize(str+1);
+ else return str;
+ }
+ return NULL;
+ default: return str;
+ }
+}
+
+static char *next_token(char *str)
+{
+ if(!str) return str;
+
+ struct token tok = {0};
+ size_t off = 0;
+ char c0 = str[0];
+
+ if(c0 == '\0') tok.s = END_INPUT;
+ if(isspace(c0)) return next_token(str+1);
+ else {
+ off = tillsep(str);
+ if(off == 0) { // sep
+ switch(str[off++]) {
+ case ',': tok.s = COMMA; break;
+ case ';': tok.s = SEMICOL; break;
+ case '.': tok.s = DOT; break;
+ case '(': tok.s = LPAREN; break;
+ case ')': tok.s = RPAREN; break;
+ case '{': tok.s = LBRACE; break;
+ case '}': tok.s = RBRACE; break;
+ case '/':
+ tok.s = TYPELIST_START; tokbuf_enqueue(&tok);
+ if(!(str = typelist_tokenize(str+off))) goto fail;
+ tok.s = TYPELIST_END; tokbuf_enqueue(&tok);
+ return str;
+ default: break;
+ }
+ } else if(c0 >= '0' && c0 <= '9') { // num
+ tok.s = NUM;
+ tok.v = (intptr_t)atoi(substring(str, off)); // not really
+ } else { // iden or atom (possibly with fields)
+ int hasfield = 0;
+ size_t sub_off;
+
+ do {
+ sub_off = tillch(str + 1, off - 1, ':') + 1;
+ if(hasfield)
+ tokbuf_enqueue(&(struct token){.s = COLON, .v = 0});
+
+ tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN,
+ .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))});
+ } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0);
+
+ return str;
+ }
+ }
+
+ tokbuf_enqueue(&tok);
+ return str+off;
+
+fail:
+ tokbuf_enqueue(&(struct token){.s = END_INPUT});
+ return NULL;
+}
diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g
new file mode 100644
index 0000000..bc82cb3
--- /dev/null
+++ b/demos/sample-files/lbp.g
@@ -0,0 +1,39 @@
+-terminal NUM IDEN ATOM
+ COMMA SEMICOL DOT COLON
+ RPAREN LPAREN RBRACE LBRACE
+
+ TYPELIST_START TYPELIST_END
+ T_INT T_ENUM T_STRUCT
+ ST_FUNCTION ST_BIG ST_LITTLE ST_NATIVE;
+
+-nonterminal S exprlist expr sym fieldlist basetype subtypelist.
+
+-left LPAREN;
+-left COMMA SEMICOL.
+
+S: exprlist DOT {};
+
+exprlist: expr {}
+ | exprlist expr {}
+ | exprlist COMMA exprlist {}
+ | exprlist SEMICOL exprlist {};
+
+expr: NUM {}
+ | sym {}
+ | sym fieldlist {}
+ | sym TYPELIST_START basetype TYPELIST_END {}
+ | sym TYPELIST_START basetype subtypelist TYPELIST_END {}
+ | LBRACE exprlist DOT RBRACE {}
+ | LPAREN exprlist RPAREN {};
+
+sym: IDEN {} | ATOM {};
+
+fieldlist: COLON IDEN {}
+ | fieldlist fieldlist {};
+
+basetype: T_INT {}
+ | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {}
+ | T_ENUM {} | T_ENUM LPAREN ATOM RPAREN {};
+subtypelist: ST_FUNCTION LPAREN exprlist RPAREN {}
+ | ST_BIG {} | ST_LITTLE {} | ST_NATIVE {}
+ | subtypelist subtypelist {}.