diff options
| author | kartofen <kartofen.mail.0@protonmail.com> | 2025-08-26 01:17:10 +0300 |
|---|---|---|
| committer | kartofen <kartofen.mail.0@protonmail.com> | 2025-08-26 01:17:10 +0300 |
| commit | 46e786db9d1b48b8fbc3502e36f093b755f3e09f (patch) | |
| tree | 9e279216e68f3fe4b0849d1e07184fe674dc551f /demos | |
| parent | 1c83c514c8108fccfec9764da5e4563b98eb871b (diff) | |
grammar for the grammar and lexing and parsing of a new language lbp
Diffstat (limited to 'demos')
| -rw-r--r-- | demos/sample-files/gram-defs.c | 23 | ||||
| -rw-r--r-- | demos/sample-files/gram-skeleton.c | 12 | ||||
| -rw-r--r-- | demos/sample-files/gram.g | 42 | ||||
| -rw-r--r-- | demos/sample-files/lbp-code.lbp | 40 | ||||
| -rw-r--r-- | demos/sample-files/lbp-skeleton.c | 258 | ||||
| -rw-r--r-- | demos/sample-files/lbp.g | 39 |
6 files changed, 401 insertions, 13 deletions
diff --git a/demos/sample-files/gram-defs.c b/demos/sample-files/gram-defs.c index 49329bd..b1ae268 100644 --- a/demos/sample-files/gram-defs.c +++ b/demos/sample-files/gram-defs.c @@ -1,13 +1,13 @@ #include "util/util.h" -#define SYMBOLS(X) \ - X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) \ - X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ - X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ - \ - X(Sp) X(A) X(B) X(C) \ - X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \ - X(Actionlist) X(Idenlist) X(IorNlist) \ - X(SYMBOLS_END) \ +#define SYMBOLS(X) \ + X(TERMINAL) X(NONTERM) X(LEFT) X(RIGHT) X(NOPREC) \ + X(COLON) X(PIPE) X(SEMICOL) X(DOT) \ + X(IDEN) X(NUM) X(ACTION) X(END_INPUT) \ + \ + X(S) X(A) X(B) X(C) \ + X(Type) X(Prec) X(Prod) X(Preclist) X(Prodlist) \ + X(Actionlist) X(Idenlist) X(IorNlist) \ + X(SYMBOLS_END) \ #include "parts/symbol.h" enum symbol { SYMBOLS(X_TO_ENUM) }; @@ -15,20 +15,21 @@ size_t total_symbols = SYMBOLS_END; char **symbol_to_str = (char *([])){ SYMBOLS(X_TO_STR) }; -IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < Sp; } +IMPLEMENT_FUNCPTR(int, symbol_is_terminal, (symbol s)) { return s < S; } IMPLEMENT_FUNCPTR(int, symbol_is_input_end, (symbol s)) { return s == END_INPUT; } IMPLEMENT_FUNCPTR(int, symbol_is_valid, (symbol s)) { return s < SYMBOLS_END; } #include "parts/grammar.h" #define PROD(LHS, _, ...) {LHS, (symbol[]){__VA_ARGS__}, sizeof((symbol[]){__VA_ARGS__})/sizeof(symbol)} #define GRAMMAR_ACTION_DEF(X) \ - X(PROD(Sp, -->, A, B, C, END_INPUT), "") \ + X(PROD(S, -->, A, B, C, END_INPUT), "") \ \ X(PROD(A, -->, TERMINAL, Idenlist, \ SEMICOL, NONTERM, Idenlist, DOT), \ "handle_type(A(1), A(4))") \ \ X(PROD(B, -->, Preclist), "handle_prec(A(0));") \ + X(PROD(B, -->, NOPREC, DOT), "handle_prec(NULL);") \ X(PROD(Preclist, -->, Prec, SEMICOL, Preclist), \ "v = list_new_head(A(2), A(0));") \ X(PROD(Preclist, -->, Prec, DOT), "v = A(0);") \ diff --git a/demos/sample-files/gram-skeleton.c b/demos/sample-files/gram-skeleton.c index 7a54548..4e40c14 100644 --- a/demos/sample-files/gram-skeleton.c +++ b/demos/sample-files/gram-skeleton.c @@ -5,7 +5,7 @@ #include <ctype.h> #define INPUT_CAP 4096 -#define ARENA_CAP 4096 +#define ARENA_CAP 4096*2 #define ARENA_IMPLEMENTATION #include "util/arena.h" @@ -115,6 +115,13 @@ void handle_prec(struct list_head *preclist) printf(" int *list;\n"); printf(" size_t nlist;\n"); printf("};\n"); + + if(!preclist) { + printf("struct precedence_def *precedence_defs = NULL;\n"); + printf("size_t nprecedence_defs = 0;\n"); + return; + } + printf("struct precedence_def *precedence_defs = (struct precedence_def[]){\n"); list_for_each_entry(struct prec_entry, entry, list, preclist) { printf("{ %d, (int[]){", entry->flag); @@ -211,7 +218,7 @@ int main(void) intptr_t value; if(lr_parser(&value)) { - printf(input); + fprintf(stderr, input); return 1; } @@ -274,6 +281,7 @@ static char *next_token(char *str) else if(strcmp(s, "nonterminal") == 0) tok.s = NONTERM; else if(strcmp(s, "left") == 0) tok.s = LEFT; else if(strcmp(s, "right") == 0) tok.s = RIGHT; + else if(strcmp(s, "noprec") == 0) tok.s = NOPREC; else { fprintf(stderr, "ERROR: Unknown directive '-%s'\n", s); goto fail; } break; case '{': diff --git a/demos/sample-files/gram.g b/demos/sample-files/gram.g new file mode 100644 index 0000000..f9daded --- /dev/null +++ b/demos/sample-files/gram.g @@ -0,0 +1,42 @@ +-terminal + TERMINAL NONTERM LEFT RIGHT COLON NOPREC + PIPE SEMICOL DOT + IDEN NUM ACTION; +-nonterminal + S A B C + Type Prec Prod Preclist Prodlist + Actionlist Idenlist IorNlist. + +-noprec. + +S: A B C {}; + +A: TERMINAL Idenlist SEMICOL NONTERM Idenlist DOT { handle_type(A(1), A(4)) }; + + +B: Preclist { handle_prec(A(0)); } + | NOPREC DOT { handle_prec(NULL); }; + +Preclist: Prec SEMICOL Preclist { v = list_new_head(A(2), A(0)); } + | Prec DOT { v = A(0); }; + +Prec: LEFT IorNlist { v = prec_new(A(1), PRECEDENCE_LEFT_ASSOC); } + | RIGHT IorNlist { v = prec_new(A(1), PRECEDENCE_RIGHT_ASSOC); }; + + +C: Prodlist { handle_prod(A(0)); }; + +Prodlist: Prod SEMICOL Prodlist { v = list_new_head(A(2), A(0)); } + | Prod DOT { v = A(0); }; + +Prod: IDEN COLON Actionlist { v = prod_new(A(0), A(2)); }; + +Actionlist: Idenlist ACTION PIPE Actionlist { v = list_new_head(A(3), action_new(A(0), A(1))); } + | Idenlist ACTION { v = action_new(A(0), A(1)); }; + +Idenlist: IDEN Idenlist { v = list_new_head(A(1), ptr_new(A(0))); } + | IDEN { v = ptr_new(A(0)); }; +IorNlist: IDEN IorNlist { v = list_new_head(A(1), ptr_new(A(0))); } + | IDEN { v = ptr_new(A(0)); } + | NUM IorNlist { v = list_new_head(A(1), num_new(A(0))); } + | NUM { v = num_new(A(0)); }. diff --git a/demos/sample-files/lbp-code.lbp b/demos/sample-files/lbp-code.lbp new file mode 100644 index 0000000..df5bdcc --- /dev/null +++ b/demos/sample-files/lbp-code.lbp @@ -0,0 +1,40 @@ +inbounds/int-function(low, high, val) { + > val low, < val high. +}, + +:downlink_fmt/enum { + (17 |_, :EXTENDED_SQUITTER); + (18 |_, :NON_TRANSPONDER). +}, + +:type_code/enum { + (inbounds 1 4 |_, :AIRCRAFT_IDEN); + (inbounds 5 8 |_, :SURFACE_POS); + (inbounds 9 18 |_, :AIR_POS); + (inbounds 20 22 |_, ---); + (19 |_, ---); + (28 |_, ---); + (29 |_, ---); + (31 |_, ---). +}, + +:aircraft_iden/struct { +-. +}, + +:message/struct { + DF/enum(:downlinkfmt) |5, + CA/enum(:capabilities) |3, + ICAO/int-big |24, + TC/enum(:type_code) |5, + + ((TC :type_code:AIRCRAFT_IDEN, aircraft_iden/struct(:aircraft_iden)); + (TC :type_code:SURFACE_POS, surface_pos/struct { + POS |1, + - |_. + }); + (TC :type_code:AIR_POS, air_pos/struct(:air_pos)) + ) |51, + + CRC |24. +}. diff --git a/demos/sample-files/lbp-skeleton.c b/demos/sample-files/lbp-skeleton.c new file mode 100644 index 0000000..ae0a17f --- /dev/null +++ b/demos/sample-files/lbp-skeleton.c @@ -0,0 +1,258 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <ctype.h> + +// TODO: lr parser is bad for debugging + +#define INPUT_CAP 4096 +#define ARENA_CAP 4096 + +#define ARENA_IMPLEMENTATION +#include "util/arena.h" + +static char buf[ARENA_CAP]; +static struct arena_ctx global_arena; +static void *xalloc(size_t sz) { + void *addr = arena_allocate(&global_arena, sz); + if(!addr) { + fprintf(stderr, "ERROR: Arena empty\n"); exit(1); + } + + return addr; +} + +// other things here +#include "util/list.h" +static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) +{ + if(head) list_add(new, head); + return new; +} + +#define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new) + +// generated +#include "bin/lbp.h" +#include "bin/lbp.c" + +#include "util/dict.h" +static struct dict types_dict; +static struct string_token types_strings[] = { + {"int", T_INT}, + {"enum", T_ENUM}, + {"struct", T_STRUCT}, + {"function", ST_FUNCTION}, + {"big", ST_BIG}, + {"little", ST_LITTLE}, + {"native", ST_NATIVE}, +}; +static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings); +static uint8_t dict_lowercase_char_to_bit[256] = { + ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, + ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, + ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, + ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, + ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 +}; + + +#include "parts/toklist.h" +struct token { + symbol s; + intptr_t v; +}; + +#include "util/queue.h" +QUEUE_GENERATE(tokbuf, struct token, 16) + +symbol token_sym(struct token *t) { return t->s; } +intptr_t token_val(struct token *t) { return t->v; } + +static void print_token(struct token *t); +static char *next_token(char *str); + +static char *input; + +struct token *toklist_eat() +{ + static struct token t; + tokbuf_dequeue(&t); // err not checked + if(tokbuf_empty()) input = next_token(input); + return &t; +} + +struct token *toklist_peek() { + static struct token t; + tokbuf_peek(&t); // err not checked + return &t; +} + +// #define _LR_PARSER_DEBUG +#include "lr-parser.c" + +int main(void) +{ + static char input_buf[INPUT_CAP]; + if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { + fprintf(stderr, "INPUT_CAP reached\n"); + return 1; + } + + global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); + + types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); + dict_compile(&types_dict); + + input = next_token(input_buf); + + // while(1) { + // struct token *tok = toklist_eat(); + // print_token(tok); + // if(token_sym(tok) == END_INPUT) break; + // } return 0; + + intptr_t value; + if(lr_parser(&value)) { + fprintf(stderr, input); + return 1; + } + + fprintf(stderr, "OUTPUT: %jd\n", value); + + dict_free(&types_dict); + return 0; +} + +static void print_token(struct token *tok) +{ + printf("%s\n", symbol_to_str[token_sym(tok)]); + if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok)); +} + +// STR UTIL + +#define strdup(...) _strdup(__VA_ARGS__) +static inline char *_strdup(char *str) +{ + return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); +} + +static inline char *substring(char *str, size_t sub_end) +{ + static char sub[128]; + if(!str) return sub; + + if(sub_end+1 > sizeof(sub)) return NULL; + + sub[sub_end] = '\0'; + return memcpy(sub, str, sub_end); +} + +static inline size_t tillch(char *str, size_t len, char ch) +{ + for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; + return len; +} + +// LEXER + +static inline int issep(char c) +{ + return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || + c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; +} + +static inline int tillsep(char *str) +{ + size_t i = 0; + while(!issep(str[i++])); + return i-1; +} + +static char *typelist_tokenize(char *str) +{ + size_t off = 0; + while(!issep(str[off]) && str[off] != '-') off++; + + if(off > 0) { + int s = dict_check(&types_dict, substring(str, off)); + if(s < 0) { + fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); + return NULL; + } + + tokbuf_enqueue(&(struct token){.s = s, .v = s}); + } + + str += off; + + switch(str[0]) { + case '-': return typelist_tokenize(str+1); + case '(': + while((str = next_token(str))) + if(*(str-1)== ')') { // not really + if(str[0] == '-') return typelist_tokenize(str+1); + else return str; + } + return NULL; + default: return str; + } +} + +static char *next_token(char *str) +{ + if(!str) return str; + + struct token tok = {0}; + size_t off = 0; + char c0 = str[0]; + + if(c0 == '\0') tok.s = END_INPUT; + if(isspace(c0)) return next_token(str+1); + else { + off = tillsep(str); + if(off == 0) { // sep + switch(str[off++]) { + case ',': tok.s = COMMA; break; + case ';': tok.s = SEMICOL; break; + case '.': tok.s = DOT; break; + case '(': tok.s = LPAREN; break; + case ')': tok.s = RPAREN; break; + case '{': tok.s = LBRACE; break; + case '}': tok.s = RBRACE; break; + case '/': + tok.s = TYPELIST_START; tokbuf_enqueue(&tok); + if(!(str = typelist_tokenize(str+off))) goto fail; + tok.s = TYPELIST_END; tokbuf_enqueue(&tok); + return str; + default: break; + } + } else if(c0 >= '0' && c0 <= '9') { // num + tok.s = NUM; + tok.v = (intptr_t)atoi(substring(str, off)); // not really + } else { // iden or atom (possibly with fields) + int hasfield = 0; + size_t sub_off; + + do { + sub_off = tillch(str + 1, off - 1, ':') + 1; + if(hasfield) + tokbuf_enqueue(&(struct token){.s = COLON, .v = 0}); + + tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN, + .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))}); + } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); + + return str; + } + } + + tokbuf_enqueue(&tok); + return str+off; + +fail: + tokbuf_enqueue(&(struct token){.s = END_INPUT}); + return NULL; +} diff --git a/demos/sample-files/lbp.g b/demos/sample-files/lbp.g new file mode 100644 index 0000000..bc82cb3 --- /dev/null +++ b/demos/sample-files/lbp.g @@ -0,0 +1,39 @@ +-terminal NUM IDEN ATOM + COMMA SEMICOL DOT COLON + RPAREN LPAREN RBRACE LBRACE + + TYPELIST_START TYPELIST_END + T_INT T_ENUM T_STRUCT + ST_FUNCTION ST_BIG ST_LITTLE ST_NATIVE; + +-nonterminal S exprlist expr sym fieldlist basetype subtypelist. + +-left LPAREN; +-left COMMA SEMICOL. + +S: exprlist DOT {}; + +exprlist: expr {} + | exprlist expr {} + | exprlist COMMA exprlist {} + | exprlist SEMICOL exprlist {}; + +expr: NUM {} + | sym {} + | sym fieldlist {} + | sym TYPELIST_START basetype TYPELIST_END {} + | sym TYPELIST_START basetype subtypelist TYPELIST_END {} + | LBRACE exprlist DOT RBRACE {} + | LPAREN exprlist RPAREN {}; + +sym: IDEN {} | ATOM {}; + +fieldlist: COLON IDEN {} + | fieldlist fieldlist {}; + +basetype: T_INT {} + | T_STRUCT {} | T_STRUCT LPAREN ATOM RPAREN {} + | T_ENUM {} | T_ENUM LPAREN ATOM RPAREN {}; +subtypelist: ST_FUNCTION LPAREN exprlist RPAREN {} + | ST_BIG {} | ST_LITTLE {} | ST_NATIVE {} + | subtypelist subtypelist {}. |
