#include #include #include #include #include // TODO: lr parser is bad for debugging #define INPUT_CAP 4096 #define ARENA_CAP 4096 #define ARENA_IMPLEMENTATION #include "util/arena.h" static char buf[ARENA_CAP]; static struct arena_ctx global_arena; static void *xalloc(size_t sz) { void *addr = arena_allocate(&global_arena, sz); if(!addr) { fprintf(stderr, "ERROR: Arena empty\n"); exit(1); } return addr; } // other things here #include "util/list.h" static inline struct list_head *list_new_head(struct list_head *head, struct list_head *new) { if(head) list_add(new, head); return new; } #define list_new_head(head, new) (intptr_t)list_new_head((struct list_head *)head, (struct list_head *)new) // generated #include "bin/lbp.h" #include "bin/lbp.c" #include "util/dict.h" static struct dict types_dict; static struct string_token types_strings[] = { {"int", T_INT}, {"enum", T_ENUM}, {"struct", T_STRUCT}, {"function", ST_FUNCTION}, {"big", ST_BIG}, {"little", ST_LITTLE}, {"native", ST_NATIVE}, }; static size_t ntypes_strings = sizeof(types_strings)/sizeof(*types_strings); static uint8_t dict_lowercase_char_to_bit[256] = { ['a'] = 2, ['b'] = 3, ['c'] = 4, ['d'] = 5, ['e'] = 6, ['f'] = 7, ['g'] = 8, ['h'] = 9, ['i'] = 10, ['j'] = 11, ['k'] = 12, ['l'] = 13, ['m'] = 14, ['n'] = 15, ['o'] = 16, ['p'] = 17, ['q'] = 18, ['r'] = 19, ['s'] = 20, ['t'] = 21, ['u'] = 22, ['v'] = 23, ['w'] = 24, ['x'] = 25, ['y'] = 26, ['z'] = 27, [ 0 ] = 1, [' '] = 1 }; #include "parts/toklist.h" struct token { symbol s; intptr_t v; }; #include "util/queue.h" QUEUE_GENERATE(tokbuf, struct token, 16) symbol token_sym(struct token *t) { return t->s; } intptr_t token_val(struct token *t) { return t->v; } static void print_token(struct token *t); static char *next_token(char *str); static char *input; struct token *toklist_eat() { static struct token t; tokbuf_dequeue(&t); // err not checked if(tokbuf_empty()) input = next_token(input); return &t; } struct token *toklist_peek() { static struct token t; tokbuf_peek(&t); // err not checked return &t; } // #define _LR_PARSER_DEBUG #include "lr-parser.c" int main(void) { static char input_buf[INPUT_CAP]; if(fread(input_buf, INPUT_CAP, 1, stdin) == INPUT_CAP) { fprintf(stderr, "INPUT_CAP reached\n"); return 1; } global_arena = ARENA_CTX_INIT(buf, ARENA_CAP); types_dict = DICT_INIT(types_strings, ntypes_strings, dict_lowercase_char_to_bit); dict_compile(&types_dict); input = next_token(input_buf); // while(1) { // struct token *tok = toklist_eat(); // print_token(tok); // if(token_sym(tok) == END_INPUT) break; // } return 0; intptr_t value; if(lr_parser(&value)) { fprintf(stderr, input); return 1; } fprintf(stderr, "OUTPUT: %jd\n", value); dict_free(&types_dict); return 0; } static void print_token(struct token *tok) { printf("%s\n", symbol_to_str[token_sym(tok)]); if(token_sym(tok) == IDEN || token_sym(tok) == ATOM) printf(" %s\n", (char *)token_val(tok)); } // STR UTIL #define strdup(...) _strdup(__VA_ARGS__) static inline char *_strdup(char *str) { return memcpy(xalloc(strlen(str) + 1), str, strlen(str)+1); } static inline char *substring(char *str, size_t sub_end) { static char sub[128]; if(!str) return sub; if(sub_end+1 > sizeof(sub)) return NULL; sub[sub_end] = '\0'; return memcpy(sub, str, sub_end); } static inline size_t tillch(char *str, size_t len, char ch) { for(size_t i = 0; i < len; i++) if(str[i] == ch) return i; return len; } // LEXER static inline int issep(char c) { return isspace(c) || c == '\0' || c == '/' || c == ',' || c == ';' || c == '.' || c == '(' || c == ')' || c == '{' || c == '}'; } static inline int tillsep(char *str) { size_t i = 0; while(!issep(str[i++])); return i-1; } static char *typelist_tokenize(char *str) { size_t off = 0; while(!issep(str[off]) && str[off] != '-') off++; if(off > 0) { int s = dict_check(&types_dict, substring(str, off)); if(s < 0) { fprintf(stderr, "ERROR: Unknown type or subtype %s\n", substring(NULL, 0)); return NULL; } tokbuf_enqueue(&(struct token){.s = s, .v = s}); } str += off; switch(str[0]) { case '-': return typelist_tokenize(str+1); case '(': while((str = next_token(str))) if(*(str-1)== ')') { // not really if(str[0] == '-') return typelist_tokenize(str+1); else return str; } return NULL; default: return str; } } static char *next_token(char *str) { if(!str) return str; struct token tok = {0}; size_t off = 0; char c0 = str[0]; if(c0 == '\0') tok.s = END_INPUT; if(isspace(c0)) return next_token(str+1); else { off = tillsep(str); if(off == 0) { // sep switch(str[off++]) { case ',': tok.s = COMMA; break; case ';': tok.s = SEMICOL; break; case '.': tok.s = DOT; break; case '(': tok.s = LPAREN; break; case ')': tok.s = RPAREN; break; case '{': tok.s = LBRACE; break; case '}': tok.s = RBRACE; break; case '/': tok.s = TYPELIST_START; tokbuf_enqueue(&tok); if(!(str = typelist_tokenize(str+off))) goto fail; tok.s = TYPELIST_END; tokbuf_enqueue(&tok); return str; default: break; } } else if(c0 >= '0' && c0 <= '9') { // num tok.s = NUM; tok.v = (intptr_t)atoi(substring(str, off)); // not really } else { // iden or atom (possibly with fields) int hasfield = 0; size_t sub_off; do { sub_off = tillch(str + 1, off - 1, ':') + 1; if(hasfield) tokbuf_enqueue(&(struct token){.s = COLON, .v = 0}); tokbuf_enqueue(&(struct token){.s = (!hasfield && str[0] == ':') ? ATOM : IDEN, .v = (intptr_t)strdup(substring(str+hasfield, sub_off-hasfield))}); } while(hasfield = 1, str += sub_off, off -= sub_off, off > 0); return str; } } tokbuf_enqueue(&tok); return str+off; fail: tokbuf_enqueue(&(struct token){.s = END_INPUT}); return NULL; }