151 lines
3.6 KiB
C
151 lines
3.6 KiB
C
#define _GNU_SOURCE
|
|
#define _POSIX_C_SOURCE 200809L
|
|
|
|
#define i_implement
|
|
#include"lexer.h"
|
|
|
|
typedef struct {
|
|
const char *txt;
|
|
TokenType type;
|
|
} KW;
|
|
|
|
static KW KWS[] = {
|
|
{"return", TOK_RETURN},
|
|
{"if", TOK_IF},
|
|
{"break", TOK_BREAK},
|
|
{"goto", TOK_GOTO},
|
|
{"end", TOK_END},
|
|
{"do", TOK_DO},
|
|
{"for", TOK_FOR},
|
|
{"repeat", TOK_REPEAT},
|
|
{"until", TOK_UNTIL},
|
|
{"local", TOK_LOCAL},
|
|
{"then", TOK_THEN},
|
|
{"else", TOK_ELSE},
|
|
{"elseif", TOK_ELSEIF},
|
|
{"false", TOK_FALSE},
|
|
{"true", TOK_TRUE},
|
|
{"nil", TOK_NIL},
|
|
{"function", TOK_FUNCTION},
|
|
{"while", TOK_WHILE},
|
|
};
|
|
|
|
static TokenType is_kw(const char *str, size_t len) {
|
|
for(int i = 0; i < sizeof(KWS) / sizeof(*KWS); i++) {
|
|
if(len == strlen(KWS[i].txt) && !strncmp(KWS[i].txt, str, len)) {
|
|
return KWS[i].type;
|
|
}
|
|
}
|
|
return TOK_NAME;
|
|
}
|
|
|
|
vec_Token ltokenize(const char *buf, size_t len) {
|
|
vec_Token tokens = {};
|
|
|
|
size_t row = 1;
|
|
|
|
while(len) {
|
|
if(isspace(buf[0])) {
|
|
if(buf[0] == '\n') {
|
|
row++;
|
|
}
|
|
buf++, len--;
|
|
} else if(isalpha(buf[0])) {
|
|
size_t idlen = 0;
|
|
while(idlen < len && isalnum(buf[idlen])) {
|
|
idlen++;
|
|
}
|
|
|
|
TokenType tt = is_kw(buf, idlen);
|
|
vec_Token_push(&tokens, (Token) {.text = tt == TOK_NAME ? strndup(buf, idlen) : NULL, .type = tt});
|
|
|
|
buf += idlen, len -= idlen;
|
|
} else if(buf[0] == '+') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_PLUS});
|
|
buf++, len--;
|
|
} else if(buf[0] == '=') {
|
|
if(len > 1 && buf[1] == '=') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_DOUBLE_EQUAL});
|
|
buf++, len--;
|
|
buf++, len--;
|
|
} else {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_EQUAL});
|
|
buf++, len--;
|
|
}
|
|
} else if(buf[0] == '(') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_L});
|
|
buf++, len--;
|
|
} else if(buf[0] == ')') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_R});
|
|
buf++, len--;
|
|
} else if(buf[0] == '[') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_L});
|
|
buf++, len--;
|
|
} else if(buf[0] == ']') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_R});
|
|
buf++, len--;
|
|
} else if(buf[0] == '.') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_DOT});
|
|
buf++, len--;
|
|
} else if(buf[0] == ',') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_COMMA});
|
|
buf++, len--;
|
|
} else if(buf[0] == '%') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_PERCENT});
|
|
buf++, len--;
|
|
} else if(buf[0] == '{') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_L});
|
|
buf++, len--;
|
|
} else if(buf[0] == '}') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_R});
|
|
buf++, len--;
|
|
} else if(len > 1 && buf[0] == '~' && buf[1] == '=') {
|
|
vec_Token_push(&tokens, (Token) {.type = TOK_NOT_EQUAL});
|
|
buf++, len--;
|
|
buf++, len--;
|
|
} else if(isdigit(buf[0]) || (len > 1 && buf[0] == '-' && isdigit(buf[1]))) {
|
|
size_t idlen = 0;
|
|
|
|
if(buf[0] == '-') {
|
|
idlen++;
|
|
}
|
|
while(idlen < len && isdigit(buf[idlen])) {
|
|
idlen++;
|
|
}
|
|
|
|
vec_Token_push(&tokens, (Token) {.text = strndup(buf, idlen), .type = TOK_NUMBER});
|
|
|
|
buf += idlen, len -= idlen;
|
|
} else if(buf[0] == '\'' || buf[0] == '\"') {
|
|
bool single = buf[0] == '\'';
|
|
|
|
buf++, len--;
|
|
|
|
size_t strlen = 1;
|
|
|
|
while(strlen < len) {
|
|
if(buf[strlen] == '\\') {
|
|
strlen += 2;
|
|
continue;
|
|
} else if(buf[strlen] == (single ? '\'' : '\"')) {
|
|
strlen++;
|
|
break;
|
|
}
|
|
strlen++;
|
|
}
|
|
|
|
char *str = strndup(buf, strlen - 1);
|
|
|
|
// TODO: unescaping
|
|
|
|
vec_Token_push(&tokens, (Token) {.text = str, .type = TOK_STRING});
|
|
|
|
buf += strlen, len -= strlen;
|
|
} else {
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|