From 945bb2a672641cb75d300c946d4b49f77267c3c2 Mon Sep 17 00:00:00 2001 From: Mid <245600-midn@users.noreply.gitlab.com> Date: Sun, 27 Aug 2023 19:48:06 +0300 Subject: [PATCH] Initial commit --- .gitignore | 9 + Makefile | 21 + README.md | 23 ++ src/ast.c | 33 ++ src/ast.h | 264 +++++++++++++ src/cg.c | 319 +++++++++++++++ src/cg.h | 8 + src/dstr.c | 141 +++++++ src/dstr.h | 15 + src/lexer.c | 320 +++++++++++++++ src/lexer.h | 54 +++ src/ntc.c | 42 ++ src/ntc.h | 6 + src/optims.c | 93 +++++ src/optims.h | 5 + src/parse.c | 867 +++++++++++++++++++++++++++++++++++++++++ src/parse.h | 8 + src/reporting.c | 19 + src/reporting.h | 10 + src/types.c | 155 ++++++++ src/types.h | 73 ++++ src/utils.h | 17 + src/vartable.c | 43 ++ src/vartable.h | 56 +++ tests/arrays.nct | 2 + tests/bf.nct | 81 ++++ tests/bit-rounding.nct | 2 + tests/cat.nct | 10 + tests/functions.nct | 9 + tests/if.nct | 11 + tests/mbr.nct | 22 ++ tests/ops.nct | 8 + tests/scoping.nct | 5 + 33 files changed, 2751 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 src/ast.c create mode 100644 src/ast.h create mode 100644 src/cg.c create mode 100644 src/cg.h create mode 100644 src/dstr.c create mode 100644 src/dstr.h create mode 100644 src/lexer.c create mode 100644 src/lexer.h create mode 100644 src/ntc.c create mode 100644 src/ntc.h create mode 100644 src/optims.c create mode 100644 src/optims.h create mode 100644 src/parse.c create mode 100644 src/parse.h create mode 100644 src/reporting.c create mode 100644 src/reporting.h create mode 100644 src/types.c create mode 100644 src/types.h create mode 100644 src/utils.h create mode 100644 src/vartable.c create mode 100644 src/vartable.h create mode 100644 tests/arrays.nct create mode 100644 tests/bf.nct create mode 100644 tests/bit-rounding.nct create mode 100644 tests/cat.nct create mode 100644 tests/functions.nct create mode 100644 tests/if.nct create mode 100644 tests/mbr.nct create mode 100644 tests/ops.nct create mode 100644 tests/scoping.nct diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08419f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +a.out +tests/*.o +tests/*.asm +ntc +ntc.exe +*.o +*.err +dos4gw.exe +massif.out.* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac3531c --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +rwildcard=$(wildcard $1$2) $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)) + +SOURCES := $(call rwildcard,src/,*.c) +HEADERS := $(call rwildcard,src/,*.h) + +PREFIX = /usr/local + +.PHONY: install clean + +ntc: $(SOURCES) $(HEADERS) +ifdef OW + wcl $(if $(GAS),-DSYNTAX_GAS=1,) $(if $(DEBUG),-DDEBUG=1,) -fe="ntc.exe" -0 -bcl=dos -mt $(if $(DEBUG),,-d0 -os -om -ob -oi -ol -ox) -lr -za99 -i=src $(SOURCES) +else + cc $(if $(GAS),-DSYNTAX_GAS=1,) $(if $(DEBUG),-DDEBUG=1,) -Wall -o ntc -fno-PIE -no-pie -std=gnu11 $(if $(DEBUG),-O0 -g,-Os -s) -fms-extensions -Isrc $(SOURCES) +endif + +install: ntc + mv ./ntc $(PREFIX)/bin + +clean: + rm ./ntc diff --git a/README.md b/README.md new file mode 100644 index 0000000..5666daa --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# N19 Reference Compiler + +Made to compile fast and produce not great, but acceptable output. Currently only 386 output supported (protected and partially real mode). + +# Installation + + make + + sudo make install + +# Command-line usage + + ntc arg1=val1 arg2=val2 ... argn=valn + +# Valid arguments + +This will be ported into a man page later: + + (TBA) x86_target: Target processor feature set (0 for 8086, 3 for 80386, m for generic x86_64) + (TBA) x86_mode: Target operating mode (16 for 16-bit real mode, 32 for 32-bit protected mode or long mode, 64 for 64-bit long mode) + in: Input Nectar source file + +Unknown arguments are ignored. diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..7f152c5 --- /dev/null +++ b/src/ast.c @@ -0,0 +1,33 @@ +#include"ast.h" + +#include +#include +#include + +int BINOP_COMMUTATIVE[] = { + [BINOP_ADD] = 1, + [BINOP_SUB] = 0, + [BINOP_MUL] = 1, + [BINOP_DIV] = 0 +}; + + +AST *ast_expression_optimize(AST *ast) { + return ast; +} + +int ast_expression_equal(AST *a, AST *b) { + if(a->nodeKind != b->nodeKind) return 0; + + if(a->nodeKind == AST_EXPR_PRIMITIVE) { + return a->exprPrim.val == b->exprPrim.val; + } else if(a->nodeKind == AST_EXPR_VAR) { + return a->exprVar.thing == b->exprVar.thing; + } else if(a->nodeKind == AST_EXPR_UNARY_OP) { + return a->exprUnOp.operator == b->exprUnOp.operator && ast_expression_equal(a->exprUnOp.operand, b->exprUnOp.operand); + } else if(a->nodeKind == AST_EXPR_BINARY_OP) { + return a->exprBinOp.operator == b->exprBinOp.operator && ast_expression_equal(a->exprBinOp.operands[0], b->exprBinOp.operands[0]) && ast_expression_equal(a->exprBinOp.operands[1], b->exprBinOp.operands[1]); + } + + return 0; +} \ No newline at end of file diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..c7ae211 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,264 @@ +#ifndef NCTREF_AST_H +#define NCTREF_AST_H + +#include"types.h" +#include"lexer.h" +#include"vartable.h" + +typedef enum { + AST_CHUNK, + AST_STMT_DECL, + AST_TYPE_IDENTIFIER, + AST_EXPR_PRIMITIVE, + AST_STMT_IF, + AST_EXPR_BINARY_OP, + AST_EXPR_VAR, + AST_TYPE_POINTER, + AST_EXPR_UNARY_OP, + AST_STMT_LOOP, + AST_STMT_BREAK, + AST_STMT_CONTINUE, + AST_EXPR_CALL, + AST_STMT_EXPR, + AST_STMT_ASSIGN, + AST_STMT_EXT_ALIGN, + AST_EXPR_STRING_LITERAL, + AST_EXPR_CAST, + AST_EXPR_ARRAY, + AST_STMT_EXT_ORG, + AST_STMT_EXT_SECTION, +} ASTKind; + +typedef enum { + BINOP_ADD = 0, + BINOP_SUB = 1, + BINOP_BITWISE_AND = 2, + BINOP_BITWISE_OR = 3, + BINOP_BITWISE_XOR = 4, + BINOP_SIMPLES = 5, + BINOP_MUL = 5, + BINOP_DIV = 6, + + BINOP_EQUAL = 7, + BINOP_NEQUAL = 8, + + BINOP_WTF = 999, +} BinaryOp; +extern int BINOP_COMMUTATIVE[]; + +static inline int binop_is_comparison(BinaryOp op) { + return op == BINOP_EQUAL || op == BINOP_NEQUAL; +} + +static inline BinaryOp binop_comp_opposite(BinaryOp op) { + if(op == BINOP_EQUAL) { + return BINOP_NEQUAL; + } else if(op == BINOP_NEQUAL) { + return BINOP_EQUAL; + } + return BINOP_WTF; +} + +typedef enum { + UNOP_DEREF = 0, + UNOP_NEGATE = 1, + UNOP_BITWISE_NOT = 2, + UNOP_REF = 3, +} UnaryOp; + +typedef enum { + EXPRESSION_CONSTANT_TRUTHY, + EXPRESSION_CONSTANT_FALSY, + EXPRESSION_NOT_CONSTANT +} ASTExprConstantType; + +union AST; + +typedef struct { + ASTKind nodeKind; + Type *type; + ASTExprConstantType constantType; +} ASTExpr; + +typedef struct { + ASTExpr; + + int val; +} ASTExprPrimitive; + +typedef struct { + ASTExpr; + + union AST *operands[2]; + BinaryOp operator; +} ASTExprBinaryOp; + +typedef struct { + ASTExpr; + + UnaryOp operator; + union AST *operand; +} ASTExprUnaryOp; + +typedef struct { + ASTExpr; + + VarTableEntry *thing; +} ASTExprVar; + +typedef struct { + ASTExpr; + + union AST *what; + + union AST **args; +} ASTExprCall; + +typedef struct { + ASTExpr; + + size_t length; + char *data; +} ASTExprStringLiteral; + +typedef struct { + ASTKind nodeKind; + + size_t size; +} ASTType; + +typedef struct { + ASTType; + + Token identifier; +} ASTTypeIdentifier; + +typedef struct { + ASTType; + + union AST *child; + int levels; +} ASTTypePointer; + +typedef struct { + ASTKind nodeKind; + union AST *next; +} ASTStmt; + +typedef struct { + ASTStmt; + + VarTableEntry *thing; + + union AST *expression; +} ASTStmtDecl; + +typedef struct { + ASTKind nodeKind; + + /* Flattened variable array for global register allocation */ + size_t varCount; + VarTableEntry **vars; + + union AST *statementFirst; + union AST *statementLast; +} ASTChunk; + +typedef struct { + ASTStmt; + + union AST *expression; + + union AST *then; +} ASTStmtIf; + +typedef struct { + ASTStmt; + + ASTChunk *body; +} ASTStmtLoop; + +typedef struct { + ASTStmt; +} ASTStmtBreak; + +typedef struct { + ASTStmt; +} ASTStmtContinue; + +typedef struct { + ASTStmt; + + union AST *expr; +} ASTStmtExpr; + +typedef struct { + ASTStmt; + + union AST *what; + union AST *to; +} ASTStmtAssign; + +typedef struct { + ASTStmt; + + int val; +} ASTStmtExtAlign; + +typedef struct { + ASTExpr; + + union AST *what; + Type *to; + + char reinterpretation; /* 1 = as, 0 = to */ +} ASTExprCast; + +typedef struct { + ASTExpr; + + union AST **items; +} ASTExprArray; + +typedef struct { + ASTStmt; + + size_t val; +} ASTStmtExtOrg; + +typedef struct { + ASTStmt; + + Token name; +} ASTStmtExtSection; + +typedef union AST { + ASTKind nodeKind; + + ASTChunk chunk; + ASTStmt statement; + ASTStmtDecl stmtDecl; + ASTStmtIf stmtIf; + ASTStmtLoop stmtLoop; + ASTStmtBreak stmtBreak; + ASTStmtContinue stmtContinue; + ASTStmtExpr stmtExpr; + ASTStmtAssign stmtAssign; + ASTExpr expression; + ASTExprPrimitive exprPrim; + ASTExprBinaryOp exprBinOp; + ASTExprUnaryOp exprUnOp; + ASTExprVar exprVar; + ASTExprCall exprCall; + ASTStmtExtAlign stmtExtAlign; + ASTExprStringLiteral exprStrLit; + ASTExprCast exprCast; + ASTExprArray exprArray; + ASTStmtExtOrg stmtExtOrg; + ASTStmtExtSection stmtExtSection; +} AST; + +AST *ast_expression_optimize(AST*); +int ast_expression_equal(AST*, AST*); + +#endif diff --git a/src/cg.c b/src/cg.c new file mode 100644 index 0000000..d4994e2 --- /dev/null +++ b/src/cg.c @@ -0,0 +1,319 @@ +#include"cg.h" + +#include +#include +#include +#include + +#define REGS 4 +static const char *regs[REGS][3] = {{"al", "ax", "eax"}, {"bl", "bx", "ebx"}, {"cl", "cx", "ecx"}, {"dl", "dx", "edx"}, {"sil", "si", "esi"}, {"dil", "di", "edi"}}; + +static const char *BINOP_SIMPLE_INSTRS[] = {[BINOP_ADD] = "add", [BINOP_SUB] = "sub", [BINOP_BITWISE_AND] = "and", [BINOP_BITWISE_OR] = "or", [BINOP_BITWISE_XOR] = "xor"}; + +static size_t nextLocalLabel = 0; + +#define LOOPSTACKSIZE 64 +static size_t loopStackStart[LOOPSTACKSIZE]; +static size_t loopStackEnd[LOOPSTACKSIZE]; +static size_t loopStackIdx; + +static const char *direct(int size) { + switch(size) { + case 1: return "db"; + case 2: return "dw"; + case 4: return "dd"; + case 8: return "dq"; + } + abort(); +} + +static const char *spec(int size) { + switch(size) { + case 1: return "byte"; + case 2: return "word"; + case 4: return "dword"; + case 8: return "qword"; + } + abort(); +} + +static const char *specexpr(AST *e) { + return spec(type_size(e->expression.type)); +} + +static const char *xv(VarTableEntry *v) { + assert(v->kind == VARTABLEENTRY_VAR); + +#define XVBUFS 8 +#define XVBUFSZ 8 + static char bufs[XVBUFS][XVBUFSZ]; + static int bufidx = 0; + + char *ret = bufs[bufidx]; + +#ifdef DEBUG + snprintf(ret, XVBUFSZ, "@%i", v->data.var.color); +#else + snprintf(ret, XVBUFSZ, "%s", regs[v->data.var.color][2]); +#endif + + bufidx = (bufidx + 1) % XVBUFS; + + return ret; +} + +static const char *xj(BinaryOp op) { + switch(op) { + case BINOP_EQUAL: return "e"; + case BINOP_NEQUAL: return "ne"; + default: return "wtf"; + } +} + +static const char *xop(AST *e) { +#define XOPBUFS 16 +#define XOPBUFSZ 24 + static char bufs[XOPBUFS][XOPBUFSZ]; + static int bufidx = 0; + + char *ret = bufs[bufidx]; + + if(e->nodeKind == AST_EXPR_VAR) { + VarTableEntry *v = e->exprVar.thing; + + if(v->kind == VARTABLEENTRY_VAR) { + return xv(v); + } else if(v->kind == VARTABLEENTRY_SYMBOL) { + snprintf(ret, XOPBUFSZ, "[%s]", v->data.symbol.name); + } else abort(); + } else if(e->nodeKind == AST_EXPR_PRIMITIVE) { + snprintf(ret, XOPBUFSZ, "%i", e->exprPrim.val); + } else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_DEREF && e->exprUnOp.operand->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operator == BINOP_ADD && e->exprUnOp.operand->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operand->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && e->exprUnOp.operand->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) { + snprintf(ret, XOPBUFSZ, "[%s + %s]", + e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name, + xv(e->exprUnOp.operand->exprBinOp.operands[1]->exprVar.thing)); + } else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_DEREF && e->exprUnOp.operand->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operator == BINOP_ADD && e->exprUnOp.operand->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operand->exprBinOp.operands[1]->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operator == BINOP_MUL && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[0]->nodeKind == AST_EXPR_PRIMITIVE && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) { + snprintf(ret, XOPBUFSZ, "[%s + %i * %s]", + e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name, + e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[0]->exprPrim.val, + xv(e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->exprVar.thing)); + } else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL) { + snprintf(ret, XOPBUFSZ, "%s", e->exprUnOp.operand->exprVar.thing->data.symbol.name); + } else { + return NULL; + } + + bufidx = (bufidx + 1) % XOPBUFS; + + return ret; +} + +void cg_chunk(AST *a) { + AST *s = a->chunk.statementFirst; + + // Potentially complex pattern matching + while(s) { + if(s->nodeKind == AST_STMT_EXT_SECTION) { + + Token t = s->stmtExtSection.name; + printf("section %.*s\n", (int) t.length, t.content); + + } else if(s->nodeKind == AST_STMT_EXT_ORG) { + + printf("org %lu\n", s->stmtExtOrg.val); + + } else if(s->nodeKind == AST_STMT_DECL && s->stmtDecl.thing->kind == VARTABLEENTRY_SYMBOL) { + VarTableEntry *v = s->stmtDecl.thing; + + if(v->data.symbol.isExternal) { + printf("extern %s\n", v->data.symbol.name); + } else { + if(!v->data.symbol.isLocal) { + printf("global %s\n", v->data.symbol.name); + } + + if(s->stmtDecl.expression) { + puts("A"); + } else { + printf("%s resb %lu\n", v->data.symbol.name, type_size(s->stmtDecl.thing->type)); + } + } + } else if(s->nodeKind == AST_STMT_ASSIGN) { + + if(s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && ast_expression_equal(s->stmtAssign.what, s->stmtAssign.to->exprBinOp.operands[0]) && (s->stmtAssign.to->exprBinOp.operator == BINOP_ADD || s->stmtAssign.to->exprBinOp.operator == BINOP_SUB) && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_PRIMITIVE && s->stmtAssign.to->exprBinOp.operands[1]->exprPrim.val == 1) { + + // inc or dec + + static const char *instrs[] = {"inc", "dec"}; + printf("%s %s %s\n", instrs[s->stmtAssign.to->exprBinOp.operator == BINOP_SUB], specexpr(s->stmtAssign.what), xop(s->stmtAssign.what)); + + } else if(s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && s->stmtAssign.to->exprBinOp.operator == BINOP_ADD && s->stmtAssign.to->exprBinOp.operands[0]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing->kind == VARTABLEENTRY_VAR && s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) { + + printf("lea %s, [%s + %s]\n", + xv(s->stmtAssign.what->exprVar.thing), + xv(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing), + xv(s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing)); + + } else if(s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && s->stmtAssign.to->exprBinOp.operator == BINOP_ADD && s->stmtAssign.to->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) { + + printf("lea %s, [%s + %s]\n", + xv(s->stmtAssign.what->exprVar.thing), + s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name, + xv(s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing)); + + } else { + + printf("mov %s, %s\n", xop(s->stmtAssign.what), xop(s->stmtAssign.to)); + + } + + } else if(s->nodeKind == AST_STMT_LOOP) { + + size_t lbl0 = nextLocalLabel++; + size_t lbl1 = nextLocalLabel++; + + loopStackStart[loopStackIdx] = lbl0; + loopStackEnd[loopStackIdx] = lbl1; + loopStackIdx++; + + printf(".L%lu:\n", lbl0); + + cg_chunk(s->stmtLoop.body); + + printf("jmp .L%lu\n", lbl0); + + printf(".L%lu:\n", lbl1); + + loopStackIdx--; + + } else if(s->nodeKind == AST_STMT_BREAK) { + + printf("jmp .L%lu\n", loopStackEnd[loopStackIdx - 1]); + + } else if(s->nodeKind == AST_STMT_CONTINUE) { + + printf("jmp .L%lu\n", loopStackStart[loopStackIdx - 1]); + + } else if(s->nodeKind == AST_STMT_IF) { + + assert(s->stmtIf.expression->nodeKind == AST_EXPR_BINARY_OP && binop_is_comparison(s->stmtIf.expression->exprBinOp.operator)); + + size_t lbl = nextLocalLabel++; + + printf("cmp %s %s, %s\n", specexpr(s->stmtIf.expression->exprBinOp.operands[0]), xop(s->stmtIf.expression->exprBinOp.operands[0]), xop(s->stmtIf.expression->exprBinOp.operands[1])); + printf("j%s .L%lu\n", xj(binop_comp_opposite(s->stmtIf.expression->exprBinOp.operator)), lbl); + + cg_chunk(s->stmtIf.then); + + printf(".L%lu:\n", lbl); + + } else if(s->nodeKind == AST_STMT_EXPR) { + + AST *e = s->stmtExpr.expr; + + if(e->nodeKind == AST_EXPR_CALL) { + puts("push eax"); + puts("push ecx"); + puts("push edx"); + + int argCount = e->exprCall.what->expression.type->function.argCount; + + size_t argSize = 0; + + for(int i = argCount - 1; i >= 0; i--) { + printf("push %s\n", xop(e->exprCall.args[i])); + + argSize += (type_size(e->exprCall.args[i]->expression.type) + 3) & ~3; + } + + assert(e->exprCall.what->nodeKind == AST_EXPR_VAR && e->exprCall.what->exprVar.thing->kind == VARTABLEENTRY_SYMBOL); + + printf("call %s\n", e->exprCall.what->exprVar.thing->data.symbol.name); + + printf("add esp, %lu\n", argSize); + + puts("pop edx"); + puts("pop ecx"); + puts("pop eax"); + } + + } + + s = s->statement.next; + } +} + +/* Welsh-Powell graph coloring */ +static int comparator(const void *A, const void *B) { + VarTableEntry *const *a = A; + VarTableEntry *const *b = B; + return ((*a)->data.var.degree * (*a)->data.var.priority) - ((*b)->data.var.degree * (*b)->data.var.priority); +} +void cg_go(AST *a) { + typedef VarTableEntry *Adjacency[2]; + + size_t adjCount = 0; + Adjacency *adjs = malloc(sizeof(*adjs) * adjCount); + + VarTableEntry **vars = a->chunk.vars; + + for(size_t v1i = 0; v1i < a->chunk.varCount; v1i++) { + for(size_t v2i = 0; v2i < a->chunk.varCount; v2i++) { + if(v1i == v2i) continue; + + VarTableEntry *v1 = vars[v1i]; + VarTableEntry *v2 = vars[v2i]; + + /* 1D intersection test */ + if((v1->data.var.start >= v2->data.var.start && v1->data.var.start <= v2->data.var.end) + || (v1->data.var.end >= v2->data.var.start && v1->data.var.end <= v2->data.var.end)) { + + VarTableEntry *min = v1 < v2 ? v1 : v2; + VarTableEntry *max = v1 < v2 ? v2 : v1; + + for(size_t a = 0; a < adjCount; a++) { + if(adjs[a][0] == min && adjs[a][1] == max) { + goto cont; + } + } + + adjs = realloc(adjs, sizeof(*adjs) * ++adjCount); + adjs[adjCount - 1][0] = min; + adjs[adjCount - 1][1] = max; + +cont:; + } + } + } + + for(size_t a = 0; a < adjCount; a++) { + adjs[a][0]->data.var.degree++; + adjs[a][1]->data.var.degree++; + } + + qsort(vars, a->chunk.varCount, sizeof(*vars), comparator); + + /* Welsh plow my ass */ + for(int v = 0; v < a->chunk.varCount; v++) { + for(int c = 0;; c++) { + for(int a = 0; a < adjCount; a++) { + if(adjs[a][0] == vars[v] && adjs[a][1]->data.var.color == c) { + goto nextColor; + } else if(adjs[a][1] == vars[v] && adjs[a][0]->data.var.color == c) { + goto nextColor; + } + } + + vars[v]->data.var.color = c; + break; + +nextColor:; + } + } + + free(adjs); + + cg_chunk(a); + + free(vars); +} \ No newline at end of file diff --git a/src/cg.h b/src/cg.h new file mode 100644 index 0000000..29e551c --- /dev/null +++ b/src/cg.h @@ -0,0 +1,8 @@ +#ifndef H_CG +#define H_CG + +#include"ast.h" + +void cg_go(union AST*); + +#endif diff --git a/src/dstr.c b/src/dstr.c new file mode 100644 index 0000000..e087341 --- /dev/null +++ b/src/dstr.c @@ -0,0 +1,141 @@ +#include"dstr.h" + +#include +#include +#include +#include + +static int ilen(int i) { + if(i == 0) return 1; + + int ret = 0; + if(i < 0) { + ret = 1; + i = -i; + } + while(i > 0) { + ret++; + i = i / 10; + } + return ret; +} + +static char *myitoa(int src) { + static char ret[12]; + snprintf(ret, 12, "%i", src); + return ret; +} + +typedef struct { + size_t length; + char data[]; +} dstrInternal; + +dstr dstrempty() { + dstrInternal *i = malloc(sizeof(dstrInternal) + 1); + i->length = 0; + i->data[0] = '\0'; + return (dstr) i + sizeof(dstrInternal); +} + +dstr dstrz(const char *src) { + size_t len = strlen(src); + dstrInternal *i = malloc(sizeof(dstrInternal) + len + 1); + i->length = len; + memcpy(i->data, src, len + 1); + return (dstr) i + sizeof(dstrInternal); +} + +dstr dstrfmt(dstr original, const char *fmt, ...) { + dstrInternal *originalInternal = (dstrInternal*) (original - sizeof(dstrInternal)); + + const char *start = fmt; + + va_list list; + va_start(list, fmt); + + size_t totalLength = 0; + + while(*fmt) { + if(*fmt == '%') { + switch(*++fmt) { + case 's': + totalLength += strlen(va_arg(list, char*)); + break; + case 'c': + if(va_arg(list, int)) totalLength++; + break; + case 'S': { + dstrInternal *i = (dstrInternal*) (va_arg(list, dstr) - sizeof(dstrInternal)); + totalLength += i->length; + break; + } + case 'i': + totalLength += ilen(va_arg(list, int)); + break; + default: { + totalLength++; + } + } + } else totalLength++; + + fmt++; + } + + va_end(list); + + fmt = start; + + originalInternal = realloc(originalInternal, sizeof(dstrInternal) + originalInternal->length + totalLength + 1); + + va_start(list, fmt); + + char *dst = originalInternal->data + originalInternal->length; + originalInternal->length += totalLength; + originalInternal->data[originalInternal->length] = 0; + + while(*fmt) { + if(*fmt == '%') { + switch(*++fmt) { + case 's': { + char *asdfasdf = va_arg(list, char*); + strcpy(dst, asdfasdf); + dst += strlen(asdfasdf); + break; + } + case 'c': { + int c = va_arg(list, int); + if(c) { + *(dst++) = c; + } + break; + } + case 'S': { + dstrInternal *i = (dstrInternal*) (va_arg(list, dstr) - sizeof(dstrInternal)); + memcpy(dst, i->data, i->length); + dst += i->length; + break; + } + case 'i': { + const char *asdf = myitoa(va_arg(list, int)); + strcpy(dst, asdf); + dst += strlen(asdf); + break; + } + default: { + *(dst++) = *fmt; + } + } + } else { + *(dst++) = *fmt; + } + fmt++; + } + va_end(list); + + return (dstr) originalInternal + sizeof(dstrInternal); +} + +void dstrfree(dstr s) { + free(s - sizeof(dstrInternal)); +} diff --git a/src/dstr.h b/src/dstr.h new file mode 100644 index 0000000..50bf4e0 --- /dev/null +++ b/src/dstr.h @@ -0,0 +1,15 @@ +#ifndef _DSTR_H +#define _DSTR_H + +#include + +/* Originally used sds, but it didn't support OpenWatcom. This isn't as optimized, but it's good enough. */ + +typedef char *dstr; + +dstr dstrempty(); +dstr dstrraw(const char*); +dstr dstrfmt(dstr, const char*, ...); +void dstrfree(dstr); + +#endif diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..ad55c90 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,320 @@ +#include"lexer.h" +#include +#include +#include +#include"reporting.h" + +// Comply to same order as in the TokenKind enum from src/lexer.h +char *TOKEN_NAMES[] = { + "identifier", + "'local'", + "EOF", + "number", + "';'", + "':'", + "'if'", + "'('", + "')'", + "'{'", + "'}'", + "'='", + "'+'", + "'-'", + "'*'", + "'/'", + "'extern'", + "'loop'", + "'break'", + "','", + "'&'", + "'|'", + "'^'", + "'~'", + "'=='", + "'['", + "']'", + "'?'", + "string" + "'!='", + "'!'" +}; + +static int isAlpha(int c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static int isNum(int c) { + return c >= '0' && c <= '9'; +} + +static int isAlphanum(int c) { + return isAlpha(c) || isNum(c); +} + +static int isWS(int c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\b' || c == '\t'; +} + +static size_t currentRow = 0; +static size_t currentColumn = 0; +static int ungetted = EOF; + +int nextc(FILE *f) { + if(ungetted != EOF) { + int ret = ungetted; + ungetted = EOF; + return ret; + } + + int c = fgetc(f); + if(c == '\n') { + currentRow++; + currentColumn = 0; + } else if(c != EOF) { + currentColumn++; + } + return c; +} + +void pushc(int c, FILE *f) { + ungetted = c; +} + +Token nct_tokenize(FILE *f) { + Token tok; + tok.content = NULL; + tok.row = currentRow; + tok.column = currentColumn; + + int c = nextc(f); + + if(c == EOF) { + tok.type = TOKEN_EOF; + return tok; + } + + if(c == ';') { + tok.type = TOKEN_SEMICOLON; + return tok; + } else if(c == ':') { + tok.type = TOKEN_COLON; + return tok; + } else if(c == '(') { + tok.type = TOKEN_PAREN_L; + return tok; + } else if(c == ')') { + tok.type = TOKEN_PAREN_R; + return tok; + } else if(c == '{') { + tok.type = TOKEN_SQUIGGLY_L; + return tok; + } else if(c == '}') { + tok.type = TOKEN_SQUIGGLY_R; + return tok; + } else if(c == '+') { + tok.type = TOKEN_PLUS; + return tok; + } else if(c == '-') { + tok.type = TOKEN_MINUS; + return tok; + } else if(c == '*') { + tok.type = TOKEN_STAR; + return tok; + } else if(c == '&') { + tok.type = TOKEN_AMPERSAND; + return tok; + } else if(c == '|') { + tok.type = TOKEN_VERTICAL_BAR; + return tok; + } else if(c == '^') { + tok.type = TOKEN_CARET; + return tok; + } else if(c == '~') { + tok.type = TOKEN_TILDE; + return tok; + } else if(c == '[') { + tok.type = TOKEN_SQUAREN_L; + return tok; + } else if(c == ']') { + tok.type = TOKEN_SQUAREN_R; + return tok; + } else if(c == '?') { + tok.type = TOKEN_QUESTION_MARK; + return tok; + } else if(c == '!') { + tok.type = TOKEN_EXCLAMATION; + int c = nextc(f); + if(c == '=') { + tok.type = TOKEN_EXCLAMATION_EQUALS; + } else ungetc(c, f); + return tok; + } else if(c == '/') { + int c = nextc(f); + if(c == '*') { /* This is a comment; skip. */ + while(1) { + while((c = nextc(f)) != '*'); + if(nextc(f) == '/') { + return nct_tokenize(f); + } + } + } else { + ungetc(c, f); + tok.type = TOKEN_SLASH; + return tok; + } + } else if(c == '=') { + tok.type = TOKEN_EQUALS; + int c = nextc(f); + if(c == '=') { + tok.type = TOKEN_DOUBLE_EQUALS; + } else ungetc(c, f); + return tok; + } else if(c == ',') { + tok.type = TOKEN_COMMA; + return tok; + } else if(c == '"') { + int capacity = 5; + char *content = malloc(capacity); + + size_t i = 0; + int c; + while(c = nextc(f), c != '"') { + if(i == capacity - 1) { + content = realloc(content, capacity += 4); + } + + if(c == '\\') { + c = nextc(f); + + if(c == '0') c = 0; + else if(c == 'n') c = '\n'; + else if(c == 't') c = '\t'; + } + + content[i++] = c; + } + + content[i] = 0; + + tok.type = TOKEN_STRING; + tok.content = content; + tok.length = i; + return tok; + } else if(isAlpha(c) || c == '@') { + int capacity = 5; + char *content = malloc(capacity); + + size_t i = 0; + content[i++] = c; + + while(c = nextc(f), (isAlphanum(c) || c == '@')) { + if(i == capacity - 1) { + content = realloc(content, capacity += 4); + } + + content[i++] = c; + } + + pushc(c, f); + + content[i] = 0; + if(!strcmp(content, "local")) { + free(content); + tok.type = TOKEN_LOCAL; + return tok; + } else if(!strcmp(content, "if")) { + free(content); + tok.type = TOKEN_IF; + return tok; + } else if(!strcmp(content, "extern")) { + free(content); + tok.type = TOKEN_EXTERN; + return tok; + } else if(!strcmp(content, "loop")) { + free(content); + tok.type = TOKEN_LOOP; + return tok; + } else if(!strcmp(content, "break")) { + free(content); + tok.type = TOKEN_BREAK; + return tok; + } else if(!strcmp(content, "continue")) { + free(content); + tok.type = TOKEN_CONTINUE; + return tok; + } + + tok.type = TOKEN_IDENTIFIER; + tok.content = content; + return tok; + } else if(isNum(c)) { + int capacity = 32; + char *content = malloc(capacity); + + size_t i = 0; + content[i++] = c; + + while(c = nextc(f), isNum(c)) { + if(i == capacity - 1) { + content = realloc(content, capacity += 4); + } + + content[i++] = c; + } + + content[i] = 0; + + int base = strtol(content, NULL, 10); + + if(c == 'r') { + content[i++] = c; + + while(c = nextc(f), (isNum(c) || (base > 10 && c >= 'A' && c < ('A' + base - 10)))) { + if(i == 31) { + stahp(1, 6180, "Numbers have a maximum size of 31."); + } + + content[i++] = c; + } + } + + pushc(c, f); + + tok.type = TOKEN_NUMBER; + tok.content = content; + return tok; + } else if(isWS(c)) { + int c; + + while(c = nextc(f), isWS(c)) { + } + + pushc(c, f); + + return nct_tokenize(f); + } + + stahp(currentRow, currentColumn, "Invalid character '%c' (byte %i)", c, c); +} + +Token *nct_lex(FILE *f) { + size_t length = 8, index = 0; + Token *list = malloc(sizeof(*list) * length); + + while(1) { + list[index] = nct_tokenize(f); + + if(list[index].type == TOKEN_EOF) { + return list; + } + + index++; + + if(index == length) { + length *= 2; + list = realloc(list, sizeof(*list) * length); + } + } + + return NULL; /* Doesn't reach here. */ +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..c54fcb3 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,54 @@ +#ifndef NCTREF_LEXER_H +#define NCTREF_LEXER_H + +#include + +extern char *TOKEN_NAMES[]; + +typedef enum { + TOKEN_IDENTIFIER, + TOKEN_LOCAL, + TOKEN_EOF, + TOKEN_NUMBER, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_IF, + TOKEN_PAREN_L, + TOKEN_PAREN_R, + TOKEN_SQUIGGLY_L, + TOKEN_SQUIGGLY_R, + TOKEN_EQUALS, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_EXTERN, + TOKEN_LOOP, + TOKEN_BREAK, + TOKEN_COMMA, + TOKEN_AMPERSAND, + TOKEN_VERTICAL_BAR, + TOKEN_CARET, + TOKEN_TILDE, + TOKEN_DOUBLE_EQUALS, + TOKEN_SQUAREN_L, + TOKEN_SQUAREN_R, + TOKEN_QUESTION_MARK, + TOKEN_STRING, + TOKEN_EXCLAMATION_EQUALS, + TOKEN_EXCLAMATION, + TOKEN_CONTINUE, +} TokenKind; + +typedef struct { + TokenKind type; + int row, column; + + char *content; /* NULL for keywords. */ + size_t length; /* Not valid for everything. */ +} Token; + +Token nct_tokenize(FILE*); +Token *nct_lex(FILE*); + +#endif diff --git a/src/ntc.c b/src/ntc.c new file mode 100644 index 0000000..cc56f0d --- /dev/null +++ b/src/ntc.c @@ -0,0 +1,42 @@ +#include +#include +#include + +#include"lexer.h" +#include"parse.h" +#include"ntc.h" +#include"reporting.h" +#include"cg.h" + +static int argc; +static char **argv; + +const char* ntc_get_arg(const char *name) { + for(int i = 1; i < argc; i++) { + if(strstr(argv[i], name) == argv[i]) { + return argv[i] + strlen(name) + 1; + } + } + return NULL; +} + +int main(int argc_, char **argv_) { + argc = argc_; + argv = argv_; + + const char *in = ntc_get_arg("in"); + + FILE *f = in ? fopen(in, "rb") : stdin; + + Token *tokens = nct_lex(f); + + if(in) fclose(f); + + AST *chunk = nct_parse(tokens); + + free(tokens); + + cg_go(chunk); + + return 0; +} diff --git a/src/ntc.h b/src/ntc.h new file mode 100644 index 0000000..a1fba8d --- /dev/null +++ b/src/ntc.h @@ -0,0 +1,6 @@ +#ifndef NTC_H +#define NTC_H + +const char* ntc_get_arg(const char *name); + +#endif \ No newline at end of file diff --git a/src/optims.c b/src/optims.c new file mode 100644 index 0000000..1806835 --- /dev/null +++ b/src/optims.c @@ -0,0 +1,93 @@ +#include"optims.h" + +#include + +// Currently performs only copy propagation. +// But CP is NECESSARY, otherwise it creates too many variables +// that are unable to be coalesced by the regallocator + +static void recalc_lifespan(VarTableEntry *vte) { + assert(vte->kind == VARTABLEENTRY_VAR); + + size_t start = 0xFFFFFFFF, end = 0; + + UseDef *ud = vte->data.var.usedefFirst; + while(ud) { + if(ud->t < start) start = ud->t; + + if(ud->t > end) end = ud->t; + + ud = ud->next; + } + + vte->data.var.start = start; + vte->data.var.end = end; +} + +void optim_chunk(ASTChunk *chu) { + AST *s = chu->statementFirst, *sPrev = NULL; + while(s) { + if(s->nodeKind == AST_STMT_ASSIGN && s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_VAR) { + VarTableEntry *dst = ((AST*) s->stmtAssign.what)->exprVar.thing; + VarTableEntry *src = ((AST*) s->stmtAssign.to)->exprVar.thing; + + if(dst->kind == VARTABLEENTRY_VAR && src->kind == VARTABLEENTRY_VAR) { + // Find reaching source definition + + UseDef *srcUD = src->data.var.usedefFirst; + while(srcUD && srcUD->use != s->stmtAssign.to) { + srcUD = srcUD->next; + } + + if(!srcUD) { + goto copypropfail; + } + + // Find first use of this def + + UseDef *dstUDPrev = NULL; + UseDef *dstUD = dst->data.var.usedefFirst; + while(dstUD->def != s) { + dstUDPrev = dstUD; + dstUD = dstUD->next; + } + + // Update all definitions + + while(dstUD && dstUD->def == s) { + ((AST*) dstUD->use)->exprVar.thing = src; + + UseDef *next = dstUD->next; + + dstUD->def = srcUD->def; + dstUD->next = srcUD->next; + srcUD->next = dstUD; + + dstUD = next; + + if(dstUDPrev) { + dstUDPrev->next = dstUD; + } else { + dst->data.var.usedefFirst = dstUD; + } + } + + if(!dstUD) { + // dst was never used again -> DELETE ASSIGNMENT COMPLETELY + + if(sPrev) { + sPrev->statement.next = s->statement.next; + // TODO: free + } + } + + recalc_lifespan(dst); + recalc_lifespan(src); + } + } + +copypropfail: + sPrev = s; + s = s->statement.next; + } +} \ No newline at end of file diff --git a/src/optims.h b/src/optims.h new file mode 100644 index 0000000..c1247fe --- /dev/null +++ b/src/optims.h @@ -0,0 +1,5 @@ +#pragma once + +#include"ast.h" + +void optim_chunk(ASTChunk*); \ No newline at end of file diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..76f2942 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,867 @@ +#include"parse.h" + +#include +#include +#include +#include"utils.h" +#include"vartable.h" +#include"reporting.h" +#include +#include + +typedef struct { + Token *tokens; + ssize_t i; + + size_t t; + + VarTable *scope; + + ASTChunk *currentChunk; + + ASTChunk *topLevel; +} Parser; + +static Token get(Parser *P) { + if(P->tokens[P->i].type == TOKEN_EOF) { + return P->tokens[P->i]; + } else { + return P->tokens[P->i++]; + } +} + +static Token expect(Parser *P, TokenKind t) { + Token tok = get(P); + + if(tok.type != t) { + stahp(tok.row, tok.column, "Expected %s, got %s.", TOKEN_NAMES[t], TOKEN_NAMES[tok.type]); + } + + return tok; +} + +static Token peek(Parser *P, int depth) { + int j = 0; + for(; j < depth; j++) { + if(P->tokens[P->i + j].type == TOKEN_EOF) { + break; + } + } + return P->tokens[P->i + j]; +} + +static int maybe(Parser *P, TokenKind t) { + if(peek(P, 0).type == t) { + get(P); + return 1; + } + return 0; +} + +static void pushstat(Parser *P, void *a) { + if(P->currentChunk->statementFirst) { + P->currentChunk->statementLast->statement.next = a; + P->currentChunk->statementLast = a; + } else { + P->currentChunk->statementFirst = P->currentChunk->statementLast = a; + } +} + +static ASTExprPrimitive *parse_prim(Parser *P) { + ASTExprPrimitive *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_PRIMITIVE; + ret->type = (Type*) primitive_parse("s16"); + + Token tok = get(P); + + const char *str = tok.content; + int base = 10; + if(strchr(str, 'r')) { + base = strtol(str, (char**) &str, 10); + str++; /* Go past the r. */ + } + + ret->val = strtol(str, NULL, base); + + ret->constantType = ret->val == 0 ? EXPRESSION_CONSTANT_FALSY : EXPRESSION_CONSTANT_TRUTHY; + + return ret; +} + +static AST *exprvar(Parser *P, VarTableEntry *v) { + if(v->kind == VARTABLEENTRY_VAR) { + v->data.var.end = P->t; + } + + AST *a = malloc(sizeof(ASTExprVar)); + a->nodeKind = AST_EXPR_VAR; + a->exprVar.type = v->type; + a->exprVar.thing = v; + return a; +} + +AST *nct_cast_expr(AST *what, Type *to) { + if(what == NULL) return NULL; + + /* Only exists at parse-time, hence not part of type system and is handled separately */ + if(what->nodeKind == AST_EXPR_STRING_LITERAL) { + if(to->type == TYPE_TYPE_ARRAY && type_equal(primitive_parse("u8"), to->array.of) && to->array.length == what->exprStrLit.length) { + ASTExprArray *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_ARRAY; + ret->constantType = what->expression.constantType; + ret->items = malloc(sizeof(*ret->items) * to->array.length); + ret->type = to; + + for(int i = 0; i < to->array.length; i++) { + uint8_t bajt = what->exprStrLit.data[i]; + + ASTExprPrimitive *item = malloc(sizeof(*item)); + item->nodeKind = AST_EXPR_PRIMITIVE; + item->constantType = bajt == 0 ? EXPRESSION_CONSTANT_FALSY : EXPRESSION_CONSTANT_TRUTHY; + item->type = to->array.of; + item->val = bajt; + + ret->items[i] = (AST*) item; + } + + return (AST*) ret; + } else if(to->type == TYPE_TYPE_PRIMITIVE && to->primitive.width == 32) { + ASTExprPrimitive *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_PRIMITIVE; + ret->type = primitive_parse("u32"); + memcpy(&ret->val, what->exprStrLit.data, sizeof(ret->val)); + ret->constantType = what->expression.constantType; + return (AST*) ret; + } else abort(); + } + + if(type_equal(what->expression.type, to)) return what; + + if(!type_is_castable(what->expression.type, to)) { + return NULL; + } + + if(what->nodeKind == AST_EXPR_PRIMITIVE && to->type == TYPE_TYPE_PRIMITIVE) { + ASTExprPrimitive *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_PRIMITIVE; + ret->constantType = what->exprPrim.val == 0 ? EXPRESSION_CONSTANT_FALSY : EXPRESSION_CONSTANT_FALSY; + ret->type = to; + ret->val = what->exprPrim.val & (((int64_t) 1 << to->primitive.width) - 1); + return (AST*) ret; + } else { + ASTExprCast *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_CAST; + ret->constantType = EXPRESSION_CONSTANT_TRUTHY; + ret->type = to; + ret->what = what; + ret->to = to; + return (AST*) ret; + } + + abort(); +} + +AST *nct_parse_expression(Parser *P, int lOP) { + if(lOP == 5) { + if(peek(P, 0).type == TOKEN_NUMBER) { + return (AST*) parse_prim(P); + } else if(peek(P, 0).type == TOKEN_IDENTIFIER) { + return exprvar(P, vartable_find(P->scope, get(P).content)); + } else if(peek(P, 0).type == TOKEN_STRING) { + ASTExprStringLiteral *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_EXPR_STRING_LITERAL; + + Token tok = get(P); + + ret->type = &TYPE_ERROR; + ret->data = tok.content; + ret->length = tok.length; + + ret->constantType = EXPRESSION_CONSTANT_FALSY; + + for(size_t i = 0; i < tok.length; i++) { + if(tok.content[i]) { + ret->constantType = EXPRESSION_CONSTANT_TRUTHY; + break; + } + } + + return (AST*) ret; + } + } else if(lOP == 4) { + if(maybe(P, TOKEN_STAR)) { + ASTExprUnaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_UNARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->operator = UNOP_DEREF; + astop->operand = nct_parse_expression(P, lOP); /* Not +1! */ + astop->type = astop->operand->expression.type->pointer.of; + + return (AST*) astop; + } else if(maybe(P, TOKEN_AMPERSAND)) { + ASTExprUnaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_UNARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->operator = UNOP_REF; + astop->operand = nct_parse_expression(P, lOP); + astop->type = type_pointer_wrap(astop->operand->expression.type); + + return (AST*) astop; + } else if(maybe(P, TOKEN_MINUS)) { + AST *operand = nct_parse_expression(P, lOP); + + if(operand->nodeKind == AST_EXPR_PRIMITIVE) { + operand->exprPrim.val *= -1; + return operand; + } else { + ASTExprUnaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_UNARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->operator = UNOP_NEGATE; + astop->operand = operand; + astop->type = operand->expression.type; + + return (AST*) astop; + } + } else if(maybe(P, TOKEN_TILDE)) { + AST *child = nct_parse_expression(P, lOP); + + if(child->nodeKind == AST_EXPR_PRIMITIVE) { + child->exprPrim.val = \ + ~child->exprPrim.val; + return child; + } else { + ASTExprUnaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_UNARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->operator = UNOP_BITWISE_NOT; + astop->operand = child; + astop->type = child->expression.type; + + return (AST *) astop; + } + } else return nct_parse_expression(P, lOP + 1); + } else if(lOP == 3) { + AST *ret = nct_parse_expression(P, lOP + 1); + + while(peek(P, 0).type == TOKEN_PAREN_L || peek(P, 0).type == TOKEN_SQUAREN_L) { + if(maybe(P, TOKEN_PAREN_L)) { + if(ret->expression.type->type != TYPE_TYPE_FUNCTION) { + stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Only function types may be called."); + } + + ASTExprCall *call = malloc(sizeof(*call)); + call->nodeKind = AST_EXPR_CALL; + call->constantType = EXPRESSION_NOT_CONSTANT; + call->type = ret->expression.type->function.ret; + call->what = ret; + call->args = NULL; + ret = (AST*) call; + + int argCount = 0; + + if(!maybe(P, TOKEN_PAREN_R)) { + while(peek(P, 0).type != TOKEN_PAREN_R && peek(P, 0).type != TOKEN_COMMA) { + call->args = realloc(call->args, (argCount + 1) * sizeof(AST*)); + call->args[argCount] = nct_parse_expression(P, 0); + + argCount++; + + if(maybe(P, TOKEN_PAREN_R)) { + break; + } else expect(P, TOKEN_COMMA); + } + } + + /* TODO: Check argument count. */ + } else if(maybe(P, TOKEN_SQUAREN_L)) { + ASTExprUnaryOp *ref = malloc(sizeof(*ref)); + ref->nodeKind = AST_EXPR_UNARY_OP; + ref->constantType = EXPRESSION_NOT_CONSTANT; + ref->operator = UNOP_REF; + ref->operand = ret; + ref->type = type_pointer_wrap(ret->expression.type->array.of); + + ASTExprBinaryOp *child = malloc(sizeof(*child)); + child->nodeKind = AST_EXPR_BINARY_OP; + child->constantType = EXPRESSION_NOT_CONSTANT; + child->operands[0] = (AST*) ref; + child->operands[1] = nct_parse_expression(P, 0); + child->operator = BINOP_ADD; + child->type = ref->type; + + int typesize = type_size(ret->expression.type->array.of); + if(typesize != 1) { + ASTExprPrimitive *scale = malloc(sizeof(*scale)); + scale->nodeKind = AST_EXPR_PRIMITIVE; + scale->type = primitive_parse("u16"); + scale->val = typesize; + + ASTExprBinaryOp *mul = malloc(sizeof(*mul)); + mul->nodeKind = AST_EXPR_BINARY_OP; + mul->operator = BINOP_MUL; + mul->operands[0] = scale; + mul->operands[1] = child->operands[1]; + + child->operands[1] = mul; + } + + ASTExprUnaryOp *unop = malloc(sizeof(*unop)); + unop->nodeKind = AST_EXPR_UNARY_OP; + unop->constantType = EXPRESSION_NOT_CONSTANT; + unop->type = ret->expression.type->array.of; + unop->operator = UNOP_DEREF; + unop->operand = (AST*) child; + + ret = (AST*) unop; + + expect(P, TOKEN_SQUAREN_R); + } else abort(); + } + + return ret; + } else if(lOP == 2) { + AST *ret = nct_parse_expression(P, lOP + 1); + + if(peek(P, 0).type == TOKEN_STAR || peek(P, 0).type == TOKEN_SLASH) { + while(1) { + BinaryOp op; + if(maybe(P, TOKEN_STAR)) op = BINOP_MUL; + else if(maybe(P, TOKEN_SLASH)) op = BINOP_DIV; + else break; + + ASTExprBinaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_BINARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->type = ret->expression.type; + astop->operator = op; + astop->operands[0] = ret; + + AST *operand = nct_parse_expression(P, lOP + 1); + + if(operand->expression.type->type != TYPE_TYPE_PRIMITIVE) { + stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Invalid combination of operator and operand types."); + } + + if(0) if(operand->nodeKind == AST_EXPR_PRIMITIVE) { + VarTableEntry *thing = malloc(sizeof(*thing)); + thing->type = operand->expression.type; + thing->kind = VARTABLEENTRY_VAR; + thing->data.var.start = thing->data.var.end = P->t; + + P->topLevel->vars = realloc(P->topLevel->vars, sizeof(*P->topLevel->vars) * (P->topLevel->varCount + 1)); + P->topLevel->vars[P->topLevel->varCount++] = thing; + + AST *decl = malloc(sizeof(ASTStmtDecl)); + decl->nodeKind = AST_STMT_DECL; + decl->stmtDecl.thing = thing; + decl->stmtDecl.expression = operand; + pushstat(P, decl); + + AST *operand2 = malloc(sizeof(ASTExprVar)); + operand2->nodeKind = AST_EXPR_VAR; + operand2->expression.type = operand->expression.type; + operand2->expression.constantType = EXPRESSION_NOT_CONSTANT; + operand2->exprVar.thing = thing; + operand = operand2; + } + + astop->operands[1] = operand; + + if(!astop->type) { + astop->type = operand->expression.type; + } else { + if(type_size(operand->expression.type) > type_size(astop->type)) { + astop->type = operand->expression.type; + } + } + + ret = (AST*) astop; + } + } + + return ret; + } else if(lOP == 1) { + AST *ret = nct_parse_expression(P, lOP + 1); + + if( + peek(P, 0).type == TOKEN_PLUS + || peek(P, 0).type == TOKEN_MINUS + || peek(P, 0).type == TOKEN_AMPERSAND + || peek(P, 0).type == TOKEN_VERTICAL_BAR + || peek(P, 0).type == TOKEN_CARET + ) { + while(1) { + BinaryOp op; + if(maybe(P, TOKEN_PLUS)) op = BINOP_ADD; + else if(maybe(P, TOKEN_MINUS)) op = BINOP_SUB; + else if(maybe(P, TOKEN_AMPERSAND)) op = BINOP_BITWISE_AND; + else if(maybe(P, TOKEN_VERTICAL_BAR)) op = BINOP_BITWISE_OR; + else if(maybe(P, TOKEN_CARET)) op = BINOP_BITWISE_XOR; + else break; + + ASTExprBinaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_BINARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->type = ret->expression.type; + astop->operator = op; + astop->operands[0] = ret; + + ASTExpr *operand = &(astop->operands[1] = nct_parse_expression(P, lOP + 1))->expression; + + if(operand->type->type != TYPE_TYPE_PRIMITIVE) { + stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Invalid combination of operator and operand types."); + } + + if(!astop->type) { + astop->type = operand->type; + } else { + if(type_size(operand->type) > type_size(astop->type)) { + astop->type = operand->type; + } + } + + ret = (AST*) astop; + } + } + + return ret; + } else if(lOP == 0) { + AST *ret = nct_parse_expression(P, lOP + 1); + + if(peek(P, 0).type == TOKEN_DOUBLE_EQUALS || peek(P, 0).type == TOKEN_EXCLAMATION_EQUALS) { + while(1) { + BinaryOp op; + if(maybe(P, TOKEN_DOUBLE_EQUALS)) op = BINOP_EQUAL; + else if(maybe(P, TOKEN_EXCLAMATION_EQUALS)) op = BINOP_NEQUAL; + else break; + + ASTExprBinaryOp *astop = malloc(sizeof(*astop)); + astop->nodeKind = AST_EXPR_BINARY_OP; + astop->constantType = EXPRESSION_NOT_CONSTANT; + astop->type = NULL; + astop->operator = op; + astop->operands[0] = ret; + + ASTExpr *operand = &(astop->operands[1] = nct_parse_expression(P, lOP + 1))->expression; + + if(operand->type->type != TYPE_TYPE_PRIMITIVE) { + stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Invalid combination of operator and operand types."); + } + + if(!astop->type) { + astop->type = operand->type; + } else { + if(type_size(operand->type) > type_size(astop->type)) { + astop->type = operand->type; + } + } + + ret = (AST*) astop; + } + } + + ret = ast_expression_optimize(ret); + + return ret; + } +#ifdef DEBUG + else abort(); +#endif + + return NULL; +} + +/* Since this function backtracks, don't use aborting functions like expect. */ +Type *nct_parse_typename(Parser *P) { + int oldIdx = P->i; + + if(peek(P, 0).type != TOKEN_IDENTIFIER) { + goto backtrack; + } + + Type *ret = (Type*) primitive_parse(expect(P, TOKEN_IDENTIFIER).content); + + if(!ret) { + goto backtrack; + } + + while(peek(P, 0).type == TOKEN_PAREN_L || peek(P, 0).type == TOKEN_STAR || peek(P, 0).type == TOKEN_SQUAREN_L) { + if(maybe(P, TOKEN_STAR)) { + TypePointer *ptr = malloc(sizeof(*ptr)); + ptr->type = TYPE_TYPE_POINTER; + ptr->of = ret; + + ret = (Type*) ptr; + } else if(maybe(P, TOKEN_PAREN_L)) { + TypeFunction *fun = malloc(sizeof(*fun)); + fun->type = TYPE_TYPE_FUNCTION; + fun->ret = ret; + fun->argCount = 0; + fun->args = malloc(0); + + if(!maybe(P, TOKEN_PAREN_R)) { + while(1) { + fun->argCount++; + fun->args = realloc(fun->args, sizeof(Type*) * fun->argCount); + if((fun->args[fun->argCount - 1] = nct_parse_typename(P)) == NULL) { + free(fun); + goto backtrack; + } + + if(maybe(P, TOKEN_PAREN_R)) { + break; + } else expect(P, TOKEN_COMMA); + } + } + + ret = (Type*) fun; + } else if(maybe(P, TOKEN_SQUAREN_L)) { + TypeArray *arr = malloc(sizeof(*arr)); + arr->type = TYPE_TYPE_ARRAY; + arr->of = ret; + + if(peek(P, 0).type == TOKEN_NUMBER) { + ASTExprPrimitive *prim = parse_prim(P); + + arr->length = prim->val; + + free(prim); + } else if(maybe(P, TOKEN_QUESTION_MARK)) { + arr->length = 0; + } else { + //stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Array size must be either constant or '?'."); + goto backtrack; + } + + expect(P, TOKEN_SQUAREN_R); + + ret = (Type*) arr; + } + } + + return ret; +backtrack: + P->i = oldIdx; + return NULL; +} + +/* Potentially backtracking. Returns NULL upon failure. */ +static AST *parse_declaration(Parser *P) { + int oldIdx = P->i; + + int isLocal = maybe(P, TOKEN_LOCAL); + int isExternal = 0; + if(!isLocal) { + isExternal = maybe(P, TOKEN_EXTERN); + } + + Type *type = nct_parse_typename(P); + + if(!type) goto backtrack; + if(peek(P, 0).type != TOKEN_IDENTIFIER) goto backtrack; + + Token name = expect(P, TOKEN_IDENTIFIER); + + VarTableEntry *entry; + if(peek(P, 0).type == TOKEN_COLON && (entry = vartable_get(P->scope, name.content))) { + /* Forward declared. */ + } else { + entry = calloc(sizeof(*entry), 1); + entry->type = type; + vartable_set(P->scope, name.content, entry); + } + + ASTStmtDecl *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_DECL; + ret->thing = entry; + ret->next = NULL; + + if(maybe(P, TOKEN_EQUALS)) { + if(isLocal || isExternal) { /* Impossible, error. */ + fputs("'local' and 'extern' keywords are to be used for symbol declaration only.\n", stderr); + abort(); + return NULL; + } + + entry->kind = VARTABLEENTRY_VAR; + entry->data.var.start = entry->data.var.end = P->t; + entry->data.var.priority = 1; + + ret->expression = NULL; + + pushstat(P, ret); + + AST *assign = malloc(sizeof(ASTStmtAssign)); + assign->nodeKind = AST_STMT_ASSIGN; + assign->stmtAssign.what = exprvar(P, entry); + assign->stmtAssign.to = nct_parse_expression(P, 0); + + ret = assign; + } else if(maybe(P, TOKEN_COLON)) { + if(isExternal) { + fputs("External symbols may not be defined.\n", stderr); + abort(); + return NULL; + } + + entry->kind = VARTABLEENTRY_SYMBOL; + entry->data.symbol.isLocal = isLocal; + entry->data.symbol.isExternal = isExternal; + entry->data.symbol.name = name.content; + + ret->expression = nct_cast_expr(nct_parse_expression(P, 0), type); + + if(ret->expression) { + if(ret->expression->expression.constantType == EXPRESSION_NOT_CONSTANT) { + stahp(1, 4142, "Symbol declaration may contain constant expressions only."); + } + } + } else if(isExternal) { + entry->kind = VARTABLEENTRY_SYMBOL; + entry->data.symbol.isLocal = isLocal; + entry->data.symbol.isExternal = isExternal; + entry->data.symbol.name = name.content; + } else { + entry->kind = VARTABLEENTRY_VAR; + entry->data.var.start = entry->data.var.end = P->t; + entry->data.var.priority = 1; + + ret->expression = NULL; + } + + expect(P, TOKEN_SEMICOLON); + + return (AST*) ret; +backtrack: + P->i = oldIdx; + return NULL; +} + +ASTChunk *nct_parse_chunk(Parser*, int, int); +void nct_parse_statement(Parser *P) { + P->t++; + + if(maybe(P, TOKEN_IF)) { + expect(P, TOKEN_PAREN_L); + AST *e = nct_parse_expression(P, 0); + expect(P, TOKEN_PAREN_R); + + ASTStmtIf *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_IF; + ret->next = NULL; + + ret->expression = e; + + expect(P, TOKEN_SQUIGGLY_L); + ret->then = (AST*) nct_parse_chunk(P, 0, 0); + expect(P, TOKEN_SQUIGGLY_R); + + pushstat(P, ret); + return; + } else if(maybe(P, TOKEN_LOOP)) { + ASTStmtLoop *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_LOOP; + ret->next = NULL; + + expect(P, TOKEN_SQUIGGLY_L); + ret->body = nct_parse_chunk(P, 0, 1); + expect(P, TOKEN_SQUIGGLY_R); + + pushstat(P, ret); + return; + } else if(maybe(P, TOKEN_BREAK)) { + ASTStmtBreak *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_BREAK; + ret->next = NULL; + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } else if(maybe(P, TOKEN_CONTINUE)) { + ASTStmtContinue *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_CONTINUE; + ret->next = NULL; + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } else if(peek(P, 0).type == TOKEN_IDENTIFIER) { + if(!strcmp(peek(P, 0).content, "@align")) { + ASTStmtExtAlign *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_EXT_ALIGN; + ret->next = NULL; + + get(P); + + expect(P, TOKEN_PAREN_L); + ASTExprPrimitive *val = parse_prim(P); + ret->val = val->val; + free(val); + expect(P, TOKEN_PAREN_R); + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } else if(!strcmp(peek(P, 0).content, "@org")) { + ASTStmtExtOrg *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_EXT_ORG; + ret->next = NULL; + + get(P); + + expect(P, TOKEN_PAREN_L); + ASTExprPrimitive *val = parse_prim(P); + ret->val = val->val; + free(val); + expect(P, TOKEN_PAREN_R); + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } else if(!strcmp(peek(P, 0).content, "@section")) { + ASTStmtExtSection *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_EXT_SECTION; + ret->next = NULL; + + get(P); + + expect(P, TOKEN_PAREN_L); + ret->name = expect(P, TOKEN_STRING); + expect(P, TOKEN_PAREN_R); + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } + } + + { + AST *decl = parse_declaration(P); + if(decl) { + pushstat(P, decl); + return; + } + } + + AST *e = nct_parse_expression(P, 0); + + if(maybe(P, TOKEN_EQUALS)) { + ASTStmtAssign *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_ASSIGN; + ret->next = NULL; + ret->what = e; + ret->to = nct_parse_expression(P, 0);//nct_cast_expr(nct_parse_expression(P, 0), ret->what->expression.type); + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } else { + ASTStmtExpr *ret = malloc(sizeof(*ret)); + ret->nodeKind = AST_STMT_EXPR; + ret->next = NULL; + ret->expr = e; + + expect(P, TOKEN_SEMICOLON); + + pushstat(P, ret); + return; + } +} + +ASTChunk *nct_parse_chunk(Parser *P, int isTopLevel, int varPrioritize) { + AST *ret = malloc(sizeof(ASTChunk)); + ret->nodeKind = AST_CHUNK; + ret->chunk.statementFirst = ret->chunk.statementLast = NULL; + ret->chunk.varCount = 0; + ret->chunk.vars = NULL; + + AST *oldChunk = P->currentChunk; + P->currentChunk = (AST*) ret; + + P->scope = vartable_new(P->scope); + + if(isTopLevel) { + P->topLevel = &ret->chunk; + } + + /* Find all symbol names and struct types ahead of time. Searches for colons as those can only mean symbol declarations */ + + { + size_t oldIdx = P->i; + + while(1) { + TokenKind k = get(P).type; + if(k == (isTopLevel ? TOKEN_EOF : TOKEN_SQUIGGLY_R)) { + break; + } else if(k == TOKEN_SQUIGGLY_L) { /* Don't enter deeper scopes. */ + int depth = 0; + + while(1) { + switch(get(P).type) { + case TOKEN_SQUIGGLY_L: depth++; break; + case TOKEN_SQUIGGLY_R: if(depth-- == 0) goto stomp; break; + default:; + } + } + stomp:; + } else if(k == TOKEN_COLON) { + /* Move back to beginning of declaration. */ + do { + P->i--; + } while(P->i >= 0 && P->tokens[P->i].type != TOKEN_SEMICOLON && P->tokens[P->i].type != TOKEN_SQUIGGLY_R && P->tokens[P->i].type != TOKEN_PAREN_R); + P->i++; + + ASTStmtDecl *d = &parse_declaration(P)->stmtDecl; + if(!d) abort(); + + free(d); /* We don't need it. */ + } + } + P->i = oldIdx; + } + + /* Now actual parsing. */ + + while(peek(P, 0).type != (isTopLevel ? TOKEN_EOF : TOKEN_SQUIGGLY_R)) { + nct_parse_statement(P); + } + + size_t nonSymbols = 0; + for(size_t i = 0; i < P->scope->count; i++) { + if(P->scope->data[i]->kind == VARTABLEENTRY_VAR) { + nonSymbols++; + + if(varPrioritize) { + P->scope->data[i]->data.var.priority++; + } + } + } + P->topLevel->vars = realloc(P->topLevel->vars, sizeof(*P->topLevel->vars) * (P->topLevel->varCount + nonSymbols)); + for(size_t i = 0; i < P->scope->count; i++) { + if(P->scope->data[i]->kind == VARTABLEENTRY_VAR) { + P->topLevel->vars[P->topLevel->varCount++] = P->scope->data[i]; + } + } + + P->scope = P->scope->parent; + + P->currentChunk = oldChunk; + + return &ret->chunk; +} + +AST *nct_parse(Token *tokens) { + Parser P; + P.tokens = tokens; + P.t = 0; + P.i = 0; + P.scope = NULL; + return (AST*) nct_parse_chunk(&P, 1, 0); +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..c673b49 --- /dev/null +++ b/src/parse.h @@ -0,0 +1,8 @@ +#ifndef NCTREF_PARSE_H +#define NCTREF_PARSE_H + +#include"ast.h" + +AST *nct_parse(Token*); + +#endif diff --git a/src/reporting.c b/src/reporting.c new file mode 100644 index 0000000..951ed82 --- /dev/null +++ b/src/reporting.c @@ -0,0 +1,19 @@ +#include"reporting.h" + +#include +#include +#include + +/* Abort immediately on first error (for now) */ +void stahp(int row, int column, const char *error, ...) { + va_list l; + va_start(l, error); + + fprintf(stderr, "error %i:%i: ", row, column); + vfprintf(stderr, error, l); + fputc('\n', stderr); + + va_end(l); + + exit(1); +} \ No newline at end of file diff --git a/src/reporting.h b/src/reporting.h new file mode 100644 index 0000000..5aed95c --- /dev/null +++ b/src/reporting.h @@ -0,0 +1,10 @@ +#ifndef NCTREF_REPORTING_H +#define NCTREF_REPORTING_H + +#ifndef _GNUC +#define __attribute__(x) +#endif + +void __attribute__((noreturn)) stahp(int, int, const char*, ...); + +#endif diff --git a/src/types.c b/src/types.c new file mode 100644 index 0000000..fd4fc59 --- /dev/null +++ b/src/types.c @@ -0,0 +1,155 @@ +#include"types.h" + +#include"utils.h" +#include +#include +#include + +#include"ntc.h" + +Type TYPE_ERROR = {.type = TYPE_TYPE_ERROR}; + +static TypePrimitive *primitiveDatabase[128]; + +Type *primitive_parse(const char *src) { + size_t hash = djb2(src) % 128; + + for(TypePrimitive *t = primitiveDatabase[hash]; t; t = t->next) { + if(!strcmp(src, t->src)) { + return (Type*) t; + } + } + + TypePrimitive *ret = malloc(sizeof(*ret)); + ret->type = TYPE_TYPE_PRIMITIVE; + ret->src = src; + + if(*src == 'n') { + src++; + ret->isNative = 1; + } else { + ret->isNative = 0; + } + + if(*src == 'u') { + src++; + ret->isUnsigned = 1; + } else if(*src == 's') { + src++; + ret->isUnsigned = 0; + } else { + free(ret); + return NULL; + } + + if(*src == 'm') { + src++; + ret->isMinimum = 1; + } else { + ret->isMinimum = 0; + } + + if(*src == 'f') { + src++; + ret->isFloat = 1; + } else { + ret->isFloat = 0; + } + + ret->width = strtol(src, (char**) &src, 10); + + if(*src == 'b') { + src++; + ret->base = strtol(src, (char**) &src, 10); + } else { + ret->base = 2; + } + + if(*src == 'v') { + src++; + ret->vector = strtol(src, (char**) &src, 10); + } else { + ret->vector = 1; + } + + ret->next = primitiveDatabase[hash]; + primitiveDatabase[hash] = ret; + + return (Type*) ret; +} + +size_t type_size(Type *t) { + if(t->type == TYPE_TYPE_PRIMITIVE) { + /* Round to nearest highest power of two. */ + uint16_t w = (t->primitive.width + 7) / 8; + w--; + w |= w >> 1; + w |= w >> 2; + w |= w >> 4; + w |= w >> 8; + w++; + + return w; + } else if(t->type == TYPE_TYPE_POINTER || t->type == TYPE_TYPE_FUNCTION) { + return 4; + } else if(t->type == TYPE_TYPE_ARRAY) { + return type_size(t->array.of) * t->array.length; + } + + abort(); + return -1; +} + +int type_equal(Type *O, Type *T) { + if(O == T) return 1; + if(O->type != T->type) return 0; + + if(O->type == TYPE_TYPE_PRIMITIVE) { + TypePrimitive *o = &O->primitive, *t = &T->primitive; + return o->width == t->width \ + && o->base == t->base \ + && o->isFloat == t->isFloat \ + && o->isUnsigned == t->isUnsigned \ + && o->isNative == t->isNative \ + && o->isMinimum == t->isMinimum \ + && o->vector == t->vector; + } else if(O->type == TYPE_TYPE_POINTER) { + return type_equal(O->pointer.of, T->pointer.of); + } else if(O->type == TYPE_TYPE_ARRAY) { + return type_equal(O->array.of, T->array.of) && O->array.length == T->array.length; + } + + /* Consider nominal typing. */ + + return 0; +} + +/* TODO: cache */ +Type *type_pointer_wrap(Type *t) { + TypePointer *ret = malloc(sizeof(*ret)); + ret->type = TYPE_TYPE_POINTER; + ret->of = t; + return (Type*) ret; +} + +int type_is_castable(Type *from, Type *to) { + if(type_equal(from, to)) return 2; + + if(from->type == TYPE_TYPE_POINTER && to->type == TYPE_TYPE_POINTER) { + return 2; + } + + if(from->type == TYPE_TYPE_PRIMITIVE && to->type == TYPE_TYPE_PRIMITIVE) { + if(from->primitive.width > to->primitive.width) { + return 1; + } else { + return 2; + } + } + + if(from->type == TYPE_TYPE_PRIMITIVE && to->type == TYPE_TYPE_POINTER) { + return 2; + } + + return 0; +} \ No newline at end of file diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..ba3170a --- /dev/null +++ b/src/types.h @@ -0,0 +1,73 @@ +#ifndef NCTREF_TYPES_H +#define NCTREF_TYPES_H + +#include +#include + +typedef enum { + TYPE_TYPE_PRIMITIVE, TYPE_TYPE_COMPOUND, TYPE_TYPE_POINTER, TYPE_TYPE_FUNCTION, TYPE_TYPE_ARRAY, TYPE_TYPE_ERROR +} TypeType; + +union Type; + +typedef struct TypePrimitive { + TypeType type; + + const char *src; + + uint16_t width; + int base; + + int isFloat; + int isUnsigned; + int isNative; + int isMinimum; + + int vector; /* 1 for no vector. */ + + struct TypePrimitive *next; +} TypePrimitive; + +typedef struct TypePointer { + TypeType type; + + union Type *of; +} TypePointer; + +typedef struct TypeFunction { + TypeType type; + + union Type *ret; + + union Type **args; + size_t argCount; +} TypeFunction; + +typedef struct TypeArray { + TypeType type; + + union Type *of; + size_t length; /* 0 means unknown */ +} TypeArray; + +typedef union Type { + TypeType type; + + TypePrimitive primitive; + TypePointer pointer; + TypeFunction function; + TypeArray array; +} Type; + +extern Type TYPE_ERROR; + +Type *primitive_parse(const char*); + +size_t type_size(Type*); +int type_equal(Type*, Type*); +Type *type_pointer_wrap(Type*); + +/* 0 = not castable, 1 = explicitly castable, 2 = implicitly castable */ +int type_is_castable(Type *from, Type *to); + +#endif diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..7b41d05 --- /dev/null +++ b/src/utils.h @@ -0,0 +1,17 @@ +#ifndef NCTREF_UTILS_H +#define NCTREF_UTILS_H + +#include + +inline static size_t djb2(const char *str) { + size_t hash = 5381; + + int c; + while((c = *str++)) { + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + } + + return hash; +} + +#endif diff --git a/src/vartable.c b/src/vartable.c new file mode 100644 index 0000000..8127ec5 --- /dev/null +++ b/src/vartable.c @@ -0,0 +1,43 @@ +#include"vartable.h" + +#include"utils.h" +#include +#include + +VarTable *vartable_new(VarTable *parent) { + VarTable *ret = malloc(sizeof(*ret)); + ret->parent = parent; + ret->count = 0; + ret->names = NULL; + ret->data = NULL; + + return ret; +} + +VarTableEntry *vartable_get(VarTable *this, const char *name) { + for(size_t v = 0; v < this->count; v++) { + if(!strcmp(name, this->names[v])) return this->data[v]; + } + return NULL; +} + +VarTableEntry *vartable_find(VarTable *this, const char *name) { + VarTable *tbl = this; + while(tbl) { + VarTableEntry *entry = vartable_get(tbl, name); + if(entry) { + return entry; + } + tbl = tbl->parent; + } + return NULL; +} + +VarTableEntry *vartable_set(VarTable *this, const char *name, VarTableEntry *e) { + this->names = realloc(this->names, sizeof(*this->names) * (this->count + 1)); + this->data = realloc(this->data, sizeof(*this->data) * (this->count + 1)); + this->names[this->count] = name; + this->data[this->count] = e; + this->count++; + return e; +} diff --git a/src/vartable.h b/src/vartable.h new file mode 100644 index 0000000..c7bed10 --- /dev/null +++ b/src/vartable.h @@ -0,0 +1,56 @@ +#ifndef NCTREF_VARTABLE_H +#define NCTREF_VARTABLE_H + +#include"types.h" + +typedef enum { + VARTABLEENTRY_SYMBOL, VARTABLEENTRY_TYPE, VARTABLEENTRY_VAR +} VarTableEntryKind; + +typedef struct UseDef { + union AST *use; //expr + union AST *def; //assign stmt + size_t t; + struct UseDef *next; +} UseDef; + +typedef struct VarTableEntry { + Type *type; + + VarTableEntryKind kind; + struct { + union { + struct { + char isLocal; + char isExternal; + const char *name; + } symbol; + struct { + uint16_t color, degree; + size_t start, end; + uint8_t priority; + + UseDef *usedefFirst; + UseDef *usedefLast; + } var; + }; + struct VarTableEntry *offset; + } data; + + void *userdata; +} VarTableEntry; + +typedef struct VarTable { + struct VarTable *parent; + + size_t count; + const char **names; + VarTableEntry **data; +} VarTable; + +VarTable *vartable_new(VarTable*); +VarTableEntry *vartable_get(VarTable*, const char*); +VarTableEntry *vartable_find(VarTable*, const char*); +VarTableEntry *vartable_set(VarTable*, const char*, VarTableEntry*); + +#endif diff --git a/tests/arrays.nct b/tests/arrays.nct new file mode 100644 index 0000000..a747577 --- /dev/null +++ b/tests/arrays.nct @@ -0,0 +1,2 @@ +u32[5] arr; +arr[0] = 0; \ No newline at end of file diff --git a/tests/bf.nct b/tests/bf.nct new file mode 100644 index 0000000..96e965c --- /dev/null +++ b/tests/bf.nct @@ -0,0 +1,81 @@ +@section(".data"); + +local u8[16384] data:; +local u8[16384] code:; + +local u32[64] stck:; + +@section(".text"); + +extern u32(u32, u8*, u32) write; +extern u32(u32, u8*, u32) read; + +read(0, &code, 16384); + +u32 codePtr = 0; +u32 dataPtr = 0; +u32 stckPtr = 16rFFFFFFFF; + +loop { + if(code[codePtr] == 62) { + dataPtr = dataPtr + 1; + } + if(code[codePtr] == 60) { + dataPtr = dataPtr - 1; + } + if(code[codePtr] == 43) { + data[dataPtr] = data[dataPtr] + 1; + } + if(code[codePtr] == 45) { + data[dataPtr] = data[dataPtr] - 1; + } + if(code[codePtr] == 46) { + u32 z = &data + dataPtr; + write(1, z, 1); + } + if(code[codePtr] == 44) { + u32 z = &data + dataPtr; + read(0, z, 1); + } + if(code[codePtr] == 91) { + if(data[dataPtr] == 0) { + u32 depth = 0; + loop { + if(code[codePtr] == 91) { + depth = depth + 1; + } + if(code[codePtr] == 93) { + depth = depth - 1; + } + if(depth == 0) { + break; + } + codePtr = codePtr + 1; + } + } + if(data[dataPtr] != 0) { + stckPtr = stckPtr + 1; + stck[stckPtr] = codePtr; + } + + codePtr = codePtr + 1; + continue; + } + if(code[codePtr] == 93) { + if(data[dataPtr] == 0) { + stckPtr = stckPtr - 1; + } + if(data[dataPtr] != 0) { + codePtr = stck[stckPtr]; + } + } + if(code[codePtr] == 0) { + loop {} + } + + codePtr = codePtr + 1; +} + +codePtr; +dataPtr; +stckPtr; \ No newline at end of file diff --git a/tests/bit-rounding.nct b/tests/bit-rounding.nct new file mode 100644 index 0000000..f90f904 --- /dev/null +++ b/tests/bit-rounding.nct @@ -0,0 +1,2 @@ +u32 x: 123; +u33 y: 5; \ No newline at end of file diff --git a/tests/cat.nct b/tests/cat.nct new file mode 100644 index 0000000..6f2b450 --- /dev/null +++ b/tests/cat.nct @@ -0,0 +1,10 @@ +extern u8(u8) putchar; +extern u32() getchar; + +loop { + u32 a = getchar(); + if(a == -1) { + break; + } + putchar(a); +} \ No newline at end of file diff --git a/tests/functions.nct b/tests/functions.nct new file mode 100644 index 0000000..645847d --- /dev/null +++ b/tests/functions.nct @@ -0,0 +1,9 @@ +extern u32() getchar; +extern void(u32) putchar; + +loop { + u8 a = getchar(); + if(a - 48) { + putchar(a); + } +} \ No newline at end of file diff --git a/tests/if.nct b/tests/if.nct new file mode 100644 index 0000000..e588179 --- /dev/null +++ b/tests/if.nct @@ -0,0 +1,11 @@ +u16 x: 5; +loop { + u16* y = 257; + u9 w = -4; + u4 z = 3 + *y; + u2 o = -w; + + if(x != 0) { + break; + } +} \ No newline at end of file diff --git a/tests/mbr.nct b/tests/mbr.nct new file mode 100644 index 0000000..2b06acb --- /dev/null +++ b/tests/mbr.nct @@ -0,0 +1,22 @@ +@org(16r7C00); + +u8* dest = 16rB8000; +u8* src = &string; + +loop { + if(*src == 0) { + break; + } + *dest = *src; + dest = dest + 1; + *dest = "_"; + dest = dest + 1; + src = src + 1; +} + +loop {} + +u8[19] string: "Hello from Nectar!\0"; + +@align(510); +u16 bootsig: 16rAA55; \ No newline at end of file diff --git a/tests/ops.nct b/tests/ops.nct new file mode 100644 index 0000000..713aced --- /dev/null +++ b/tests/ops.nct @@ -0,0 +1,8 @@ +u16 a = 12; +u16 b = a & 6; +u16 c = b ^ a | 3; +u16 d = 11 * c; + +if(a) { + u16 e = b + c + d; +} \ No newline at end of file diff --git a/tests/scoping.nct b/tests/scoping.nct new file mode 100644 index 0000000..df82ea2 --- /dev/null +++ b/tests/scoping.nct @@ -0,0 +1,5 @@ +u8 a = 5; +if(a) { + u8 a = 10; /* Should not cause scoping errors. */ +} +u8 b = 15; /* `a` in the if statement scope should be free'd. */ \ No newline at end of file