#define _GNU_SOURCE #define _POSIX_C_SOURCE 200809L #define i_implement #include"lexer.h" typedef struct { const char *txt; TokenType type; } KW; static KW KWS[] = { {"return", TOK_RETURN}, {"if", TOK_IF}, {"break", TOK_BREAK}, {"goto", TOK_GOTO}, {"end", TOK_END}, {"do", TOK_DO}, {"for", TOK_FOR}, {"repeat", TOK_REPEAT}, {"until", TOK_UNTIL}, {"local", TOK_LOCAL}, {"then", TOK_THEN}, {"else", TOK_ELSE}, {"elseif", TOK_ELSEIF}, {"false", TOK_FALSE}, {"true", TOK_TRUE}, {"nil", TOK_NIL}, {"function", TOK_FUNCTION}, {"while", TOK_WHILE}, }; static TokenType is_kw(const char *str, size_t len) { for(int i = 0; i < sizeof(KWS) / sizeof(*KWS); i++) { if(len == strlen(KWS[i].txt) && !strncmp(KWS[i].txt, str, len)) { return KWS[i].type; } } return TOK_NAME; } vec_Token ltokenize(const char *buf, size_t len) { vec_Token tokens = {}; size_t row = 1; while(len) { if(isspace(buf[0])) { if(buf[0] == '\n') { row++; } buf++, len--; } else if(isalpha(buf[0])) { size_t idlen = 0; while(idlen < len && isalnum(buf[idlen])) { idlen++; } TokenType tt = is_kw(buf, idlen); vec_Token_push(&tokens, (Token) {.text = tt == TOK_NAME ? strndup(buf, idlen) : NULL, .type = tt}); buf += idlen, len -= idlen; } else if(buf[0] == '+') { vec_Token_push(&tokens, (Token) {.type = TOK_PLUS}); buf++, len--; } else if(buf[0] == '=') { if(len > 1 && buf[1] == '=') { vec_Token_push(&tokens, (Token) {.type = TOK_DOUBLE_EQUAL}); buf++, len--; buf++, len--; } else { vec_Token_push(&tokens, (Token) {.type = TOK_EQUAL}); buf++, len--; } } else if(buf[0] == '(') { vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_L}); buf++, len--; } else if(buf[0] == ')') { vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_R}); buf++, len--; } else if(buf[0] == '[') { vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_L}); buf++, len--; } else if(buf[0] == ']') { vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_R}); buf++, len--; } else if(buf[0] == '.') { vec_Token_push(&tokens, (Token) {.type = TOK_DOT}); buf++, len--; } else if(buf[0] == ',') { vec_Token_push(&tokens, (Token) {.type = TOK_COMMA}); buf++, len--; } else if(buf[0] == '%') { vec_Token_push(&tokens, (Token) {.type = TOK_PERCENT}); buf++, len--; } else if(buf[0] == '{') { vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_L}); buf++, len--; } else if(buf[0] == '}') { vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_R}); buf++, len--; } else if(buf[0] == '#') { vec_Token_push(&tokens, (Token) {.type = TOK_SHARP}); buf++, len--; } else if(len > 1 && buf[0] == '~' && buf[1] == '=') { vec_Token_push(&tokens, (Token) {.type = TOK_NOT_EQUAL}); buf++, len--; buf++, len--; } else if(isdigit(buf[0]) || (len > 1 && buf[0] == '-' && isdigit(buf[1]))) { size_t idlen = 0; if(buf[0] == '-') { idlen++; } while(idlen < len && isdigit(buf[idlen])) { idlen++; } vec_Token_push(&tokens, (Token) {.text = strndup(buf, idlen), .type = TOK_NUMBER}); buf += idlen, len -= idlen; } else if(buf[0] == '-' && (len == 1 || buf[1] != '-')) { vec_Token_push(&tokens, (Token) {.type = TOK_MINUS}); buf++, len--; } else if(len >= 2 && buf[0] == '-' && buf[1] == '-') { while(*buf != '\n') { buf++, len--; } row++; } else if(buf[0] == '\'' || buf[0] == '\"') { bool single = buf[0] == '\''; buf++, len--; size_t strlen = 1; while(strlen < len) { if(buf[strlen] == '\\') { strlen += 2; continue; } else if(buf[strlen] == (single ? '\'' : '\"')) { strlen++; break; } strlen++; } char *str = strndup(buf, strlen - 1); // TODO: unescaping vec_Token_push(&tokens, (Token) {.text = str, .type = TOK_STRING}); buf += strlen, len -= strlen; } else { assert(false); } } return tokens; } void lfreetoks(vec_Token *toks) { for(size_t i = 0; i < toks->size; i++) { Token *tok = &toks->data[i]; if(tok->text) { free(tok->text); } } vec_Token_drop(toks); }