Initial commit

This commit is contained in:
Mid 2023-08-27 19:48:06 +03:00
commit 945bb2a672
33 changed files with 2751 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
a.out
tests/*.o
tests/*.asm
ntc
ntc.exe
*.o
*.err
dos4gw.exe
massif.out.*

21
Makefile Normal file
View File

@ -0,0 +1,21 @@
rwildcard=$(wildcard $1$2) $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2))
SOURCES := $(call rwildcard,src/,*.c)
HEADERS := $(call rwildcard,src/,*.h)
PREFIX = /usr/local
.PHONY: install clean
ntc: $(SOURCES) $(HEADERS)
ifdef OW
wcl $(if $(GAS),-DSYNTAX_GAS=1,) $(if $(DEBUG),-DDEBUG=1,) -fe="ntc.exe" -0 -bcl=dos -mt $(if $(DEBUG),,-d0 -os -om -ob -oi -ol -ox) -lr -za99 -i=src $(SOURCES)
else
cc $(if $(GAS),-DSYNTAX_GAS=1,) $(if $(DEBUG),-DDEBUG=1,) -Wall -o ntc -fno-PIE -no-pie -std=gnu11 $(if $(DEBUG),-O0 -g,-Os -s) -fms-extensions -Isrc $(SOURCES)
endif
install: ntc
mv ./ntc $(PREFIX)/bin
clean:
rm ./ntc

23
README.md Normal file
View File

@ -0,0 +1,23 @@
# N19 Reference Compiler
Made to compile fast and produce not great, but acceptable output. Currently only 386 output supported (protected and partially real mode).
# Installation
make
sudo make install
# Command-line usage
ntc arg1=val1 arg2=val2 ... argn=valn
# Valid arguments
This will be ported into a man page later:
(TBA) x86_target: Target processor feature set (0 for 8086, 3 for 80386, m for generic x86_64)
(TBA) x86_mode: Target operating mode (16 for 16-bit real mode, 32 for 32-bit protected mode or long mode, 64 for 64-bit long mode)
in: Input Nectar source file
Unknown arguments are ignored.

33
src/ast.c Normal file
View File

@ -0,0 +1,33 @@
#include"ast.h"
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
int BINOP_COMMUTATIVE[] = {
[BINOP_ADD] = 1,
[BINOP_SUB] = 0,
[BINOP_MUL] = 1,
[BINOP_DIV] = 0
};
AST *ast_expression_optimize(AST *ast) {
return ast;
}
int ast_expression_equal(AST *a, AST *b) {
if(a->nodeKind != b->nodeKind) return 0;
if(a->nodeKind == AST_EXPR_PRIMITIVE) {
return a->exprPrim.val == b->exprPrim.val;
} else if(a->nodeKind == AST_EXPR_VAR) {
return a->exprVar.thing == b->exprVar.thing;
} else if(a->nodeKind == AST_EXPR_UNARY_OP) {
return a->exprUnOp.operator == b->exprUnOp.operator && ast_expression_equal(a->exprUnOp.operand, b->exprUnOp.operand);
} else if(a->nodeKind == AST_EXPR_BINARY_OP) {
return a->exprBinOp.operator == b->exprBinOp.operator && ast_expression_equal(a->exprBinOp.operands[0], b->exprBinOp.operands[0]) && ast_expression_equal(a->exprBinOp.operands[1], b->exprBinOp.operands[1]);
}
return 0;
}

264
src/ast.h Normal file
View File

@ -0,0 +1,264 @@
#ifndef NCTREF_AST_H
#define NCTREF_AST_H
#include"types.h"
#include"lexer.h"
#include"vartable.h"
typedef enum {
AST_CHUNK,
AST_STMT_DECL,
AST_TYPE_IDENTIFIER,
AST_EXPR_PRIMITIVE,
AST_STMT_IF,
AST_EXPR_BINARY_OP,
AST_EXPR_VAR,
AST_TYPE_POINTER,
AST_EXPR_UNARY_OP,
AST_STMT_LOOP,
AST_STMT_BREAK,
AST_STMT_CONTINUE,
AST_EXPR_CALL,
AST_STMT_EXPR,
AST_STMT_ASSIGN,
AST_STMT_EXT_ALIGN,
AST_EXPR_STRING_LITERAL,
AST_EXPR_CAST,
AST_EXPR_ARRAY,
AST_STMT_EXT_ORG,
AST_STMT_EXT_SECTION,
} ASTKind;
typedef enum {
BINOP_ADD = 0,
BINOP_SUB = 1,
BINOP_BITWISE_AND = 2,
BINOP_BITWISE_OR = 3,
BINOP_BITWISE_XOR = 4,
BINOP_SIMPLES = 5,
BINOP_MUL = 5,
BINOP_DIV = 6,
BINOP_EQUAL = 7,
BINOP_NEQUAL = 8,
BINOP_WTF = 999,
} BinaryOp;
extern int BINOP_COMMUTATIVE[];
static inline int binop_is_comparison(BinaryOp op) {
return op == BINOP_EQUAL || op == BINOP_NEQUAL;
}
static inline BinaryOp binop_comp_opposite(BinaryOp op) {
if(op == BINOP_EQUAL) {
return BINOP_NEQUAL;
} else if(op == BINOP_NEQUAL) {
return BINOP_EQUAL;
}
return BINOP_WTF;
}
typedef enum {
UNOP_DEREF = 0,
UNOP_NEGATE = 1,
UNOP_BITWISE_NOT = 2,
UNOP_REF = 3,
} UnaryOp;
typedef enum {
EXPRESSION_CONSTANT_TRUTHY,
EXPRESSION_CONSTANT_FALSY,
EXPRESSION_NOT_CONSTANT
} ASTExprConstantType;
union AST;
typedef struct {
ASTKind nodeKind;
Type *type;
ASTExprConstantType constantType;
} ASTExpr;
typedef struct {
ASTExpr;
int val;
} ASTExprPrimitive;
typedef struct {
ASTExpr;
union AST *operands[2];
BinaryOp operator;
} ASTExprBinaryOp;
typedef struct {
ASTExpr;
UnaryOp operator;
union AST *operand;
} ASTExprUnaryOp;
typedef struct {
ASTExpr;
VarTableEntry *thing;
} ASTExprVar;
typedef struct {
ASTExpr;
union AST *what;
union AST **args;
} ASTExprCall;
typedef struct {
ASTExpr;
size_t length;
char *data;
} ASTExprStringLiteral;
typedef struct {
ASTKind nodeKind;
size_t size;
} ASTType;
typedef struct {
ASTType;
Token identifier;
} ASTTypeIdentifier;
typedef struct {
ASTType;
union AST *child;
int levels;
} ASTTypePointer;
typedef struct {
ASTKind nodeKind;
union AST *next;
} ASTStmt;
typedef struct {
ASTStmt;
VarTableEntry *thing;
union AST *expression;
} ASTStmtDecl;
typedef struct {
ASTKind nodeKind;
/* Flattened variable array for global register allocation */
size_t varCount;
VarTableEntry **vars;
union AST *statementFirst;
union AST *statementLast;
} ASTChunk;
typedef struct {
ASTStmt;
union AST *expression;
union AST *then;
} ASTStmtIf;
typedef struct {
ASTStmt;
ASTChunk *body;
} ASTStmtLoop;
typedef struct {
ASTStmt;
} ASTStmtBreak;
typedef struct {
ASTStmt;
} ASTStmtContinue;
typedef struct {
ASTStmt;
union AST *expr;
} ASTStmtExpr;
typedef struct {
ASTStmt;
union AST *what;
union AST *to;
} ASTStmtAssign;
typedef struct {
ASTStmt;
int val;
} ASTStmtExtAlign;
typedef struct {
ASTExpr;
union AST *what;
Type *to;
char reinterpretation; /* 1 = as, 0 = to */
} ASTExprCast;
typedef struct {
ASTExpr;
union AST **items;
} ASTExprArray;
typedef struct {
ASTStmt;
size_t val;
} ASTStmtExtOrg;
typedef struct {
ASTStmt;
Token name;
} ASTStmtExtSection;
typedef union AST {
ASTKind nodeKind;
ASTChunk chunk;
ASTStmt statement;
ASTStmtDecl stmtDecl;
ASTStmtIf stmtIf;
ASTStmtLoop stmtLoop;
ASTStmtBreak stmtBreak;
ASTStmtContinue stmtContinue;
ASTStmtExpr stmtExpr;
ASTStmtAssign stmtAssign;
ASTExpr expression;
ASTExprPrimitive exprPrim;
ASTExprBinaryOp exprBinOp;
ASTExprUnaryOp exprUnOp;
ASTExprVar exprVar;
ASTExprCall exprCall;
ASTStmtExtAlign stmtExtAlign;
ASTExprStringLiteral exprStrLit;
ASTExprCast exprCast;
ASTExprArray exprArray;
ASTStmtExtOrg stmtExtOrg;
ASTStmtExtSection stmtExtSection;
} AST;
AST *ast_expression_optimize(AST*);
int ast_expression_equal(AST*, AST*);
#endif

319
src/cg.c Normal file
View File

@ -0,0 +1,319 @@
#include"cg.h"
#include<stdlib.h>
#include<signal.h>
#include<string.h>
#include<assert.h>
#define REGS 4
static const char *regs[REGS][3] = {{"al", "ax", "eax"}, {"bl", "bx", "ebx"}, {"cl", "cx", "ecx"}, {"dl", "dx", "edx"}, {"sil", "si", "esi"}, {"dil", "di", "edi"}};
static const char *BINOP_SIMPLE_INSTRS[] = {[BINOP_ADD] = "add", [BINOP_SUB] = "sub", [BINOP_BITWISE_AND] = "and", [BINOP_BITWISE_OR] = "or", [BINOP_BITWISE_XOR] = "xor"};
static size_t nextLocalLabel = 0;
#define LOOPSTACKSIZE 64
static size_t loopStackStart[LOOPSTACKSIZE];
static size_t loopStackEnd[LOOPSTACKSIZE];
static size_t loopStackIdx;
static const char *direct(int size) {
switch(size) {
case 1: return "db";
case 2: return "dw";
case 4: return "dd";
case 8: return "dq";
}
abort();
}
static const char *spec(int size) {
switch(size) {
case 1: return "byte";
case 2: return "word";
case 4: return "dword";
case 8: return "qword";
}
abort();
}
static const char *specexpr(AST *e) {
return spec(type_size(e->expression.type));
}
static const char *xv(VarTableEntry *v) {
assert(v->kind == VARTABLEENTRY_VAR);
#define XVBUFS 8
#define XVBUFSZ 8
static char bufs[XVBUFS][XVBUFSZ];
static int bufidx = 0;
char *ret = bufs[bufidx];
#ifdef DEBUG
snprintf(ret, XVBUFSZ, "@%i", v->data.var.color);
#else
snprintf(ret, XVBUFSZ, "%s", regs[v->data.var.color][2]);
#endif
bufidx = (bufidx + 1) % XVBUFS;
return ret;
}
static const char *xj(BinaryOp op) {
switch(op) {
case BINOP_EQUAL: return "e";
case BINOP_NEQUAL: return "ne";
default: return "wtf";
}
}
static const char *xop(AST *e) {
#define XOPBUFS 16
#define XOPBUFSZ 24
static char bufs[XOPBUFS][XOPBUFSZ];
static int bufidx = 0;
char *ret = bufs[bufidx];
if(e->nodeKind == AST_EXPR_VAR) {
VarTableEntry *v = e->exprVar.thing;
if(v->kind == VARTABLEENTRY_VAR) {
return xv(v);
} else if(v->kind == VARTABLEENTRY_SYMBOL) {
snprintf(ret, XOPBUFSZ, "[%s]", v->data.symbol.name);
} else abort();
} else if(e->nodeKind == AST_EXPR_PRIMITIVE) {
snprintf(ret, XOPBUFSZ, "%i", e->exprPrim.val);
} else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_DEREF && e->exprUnOp.operand->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operator == BINOP_ADD && e->exprUnOp.operand->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operand->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && e->exprUnOp.operand->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) {
snprintf(ret, XOPBUFSZ, "[%s + %s]",
e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name,
xv(e->exprUnOp.operand->exprBinOp.operands[1]->exprVar.thing));
} else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_DEREF && e->exprUnOp.operand->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operator == BINOP_ADD && e->exprUnOp.operand->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operand->exprBinOp.operands[1]->nodeKind == AST_EXPR_BINARY_OP && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operator == BINOP_MUL && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[0]->nodeKind == AST_EXPR_PRIMITIVE && e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) {
snprintf(ret, XOPBUFSZ, "[%s + %i * %s]",
e->exprUnOp.operand->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name,
e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[0]->exprPrim.val,
xv(e->exprUnOp.operand->exprBinOp.operands[1]->exprBinOp.operands[1]->exprVar.thing));
} else if(e->nodeKind == AST_EXPR_UNARY_OP && e->exprUnOp.operator == UNOP_REF && e->exprUnOp.operand->nodeKind == AST_EXPR_VAR && e->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL) {
snprintf(ret, XOPBUFSZ, "%s", e->exprUnOp.operand->exprVar.thing->data.symbol.name);
} else {
return NULL;
}
bufidx = (bufidx + 1) % XOPBUFS;
return ret;
}
void cg_chunk(AST *a) {
AST *s = a->chunk.statementFirst;
// Potentially complex pattern matching
while(s) {
if(s->nodeKind == AST_STMT_EXT_SECTION) {
Token t = s->stmtExtSection.name;
printf("section %.*s\n", (int) t.length, t.content);
} else if(s->nodeKind == AST_STMT_EXT_ORG) {
printf("org %lu\n", s->stmtExtOrg.val);
} else if(s->nodeKind == AST_STMT_DECL && s->stmtDecl.thing->kind == VARTABLEENTRY_SYMBOL) {
VarTableEntry *v = s->stmtDecl.thing;
if(v->data.symbol.isExternal) {
printf("extern %s\n", v->data.symbol.name);
} else {
if(!v->data.symbol.isLocal) {
printf("global %s\n", v->data.symbol.name);
}
if(s->stmtDecl.expression) {
puts("A");
} else {
printf("%s resb %lu\n", v->data.symbol.name, type_size(s->stmtDecl.thing->type));
}
}
} else if(s->nodeKind == AST_STMT_ASSIGN) {
if(s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && ast_expression_equal(s->stmtAssign.what, s->stmtAssign.to->exprBinOp.operands[0]) && (s->stmtAssign.to->exprBinOp.operator == BINOP_ADD || s->stmtAssign.to->exprBinOp.operator == BINOP_SUB) && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_PRIMITIVE && s->stmtAssign.to->exprBinOp.operands[1]->exprPrim.val == 1) {
// inc or dec
static const char *instrs[] = {"inc", "dec"};
printf("%s %s %s\n", instrs[s->stmtAssign.to->exprBinOp.operator == BINOP_SUB], specexpr(s->stmtAssign.what), xop(s->stmtAssign.what));
} else if(s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && s->stmtAssign.to->exprBinOp.operator == BINOP_ADD && s->stmtAssign.to->exprBinOp.operands[0]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing->kind == VARTABLEENTRY_VAR && s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) {
printf("lea %s, [%s + %s]\n",
xv(s->stmtAssign.what->exprVar.thing),
xv(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing),
xv(s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing));
} else if(s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_BINARY_OP && s->stmtAssign.to->exprBinOp.operator == BINOP_ADD && s->stmtAssign.to->exprBinOp.operands[0]->nodeKind == AST_EXPR_UNARY_OP && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operator == UNOP_REF && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[1]->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->kind == VARTABLEENTRY_SYMBOL && s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing->kind == VARTABLEENTRY_VAR) {
printf("lea %s, [%s + %s]\n",
xv(s->stmtAssign.what->exprVar.thing),
s->stmtAssign.to->exprBinOp.operands[0]->exprUnOp.operand->exprVar.thing->data.symbol.name,
xv(s->stmtAssign.to->exprBinOp.operands[1]->exprVar.thing));
} else {
printf("mov %s, %s\n", xop(s->stmtAssign.what), xop(s->stmtAssign.to));
}
} else if(s->nodeKind == AST_STMT_LOOP) {
size_t lbl0 = nextLocalLabel++;
size_t lbl1 = nextLocalLabel++;
loopStackStart[loopStackIdx] = lbl0;
loopStackEnd[loopStackIdx] = lbl1;
loopStackIdx++;
printf(".L%lu:\n", lbl0);
cg_chunk(s->stmtLoop.body);
printf("jmp .L%lu\n", lbl0);
printf(".L%lu:\n", lbl1);
loopStackIdx--;
} else if(s->nodeKind == AST_STMT_BREAK) {
printf("jmp .L%lu\n", loopStackEnd[loopStackIdx - 1]);
} else if(s->nodeKind == AST_STMT_CONTINUE) {
printf("jmp .L%lu\n", loopStackStart[loopStackIdx - 1]);
} else if(s->nodeKind == AST_STMT_IF) {
assert(s->stmtIf.expression->nodeKind == AST_EXPR_BINARY_OP && binop_is_comparison(s->stmtIf.expression->exprBinOp.operator));
size_t lbl = nextLocalLabel++;
printf("cmp %s %s, %s\n", specexpr(s->stmtIf.expression->exprBinOp.operands[0]), xop(s->stmtIf.expression->exprBinOp.operands[0]), xop(s->stmtIf.expression->exprBinOp.operands[1]));
printf("j%s .L%lu\n", xj(binop_comp_opposite(s->stmtIf.expression->exprBinOp.operator)), lbl);
cg_chunk(s->stmtIf.then);
printf(".L%lu:\n", lbl);
} else if(s->nodeKind == AST_STMT_EXPR) {
AST *e = s->stmtExpr.expr;
if(e->nodeKind == AST_EXPR_CALL) {
puts("push eax");
puts("push ecx");
puts("push edx");
int argCount = e->exprCall.what->expression.type->function.argCount;
size_t argSize = 0;
for(int i = argCount - 1; i >= 0; i--) {
printf("push %s\n", xop(e->exprCall.args[i]));
argSize += (type_size(e->exprCall.args[i]->expression.type) + 3) & ~3;
}
assert(e->exprCall.what->nodeKind == AST_EXPR_VAR && e->exprCall.what->exprVar.thing->kind == VARTABLEENTRY_SYMBOL);
printf("call %s\n", e->exprCall.what->exprVar.thing->data.symbol.name);
printf("add esp, %lu\n", argSize);
puts("pop edx");
puts("pop ecx");
puts("pop eax");
}
}
s = s->statement.next;
}
}
/* Welsh-Powell graph coloring */
static int comparator(const void *A, const void *B) {
VarTableEntry *const *a = A;
VarTableEntry *const *b = B;
return ((*a)->data.var.degree * (*a)->data.var.priority) - ((*b)->data.var.degree * (*b)->data.var.priority);
}
void cg_go(AST *a) {
typedef VarTableEntry *Adjacency[2];
size_t adjCount = 0;
Adjacency *adjs = malloc(sizeof(*adjs) * adjCount);
VarTableEntry **vars = a->chunk.vars;
for(size_t v1i = 0; v1i < a->chunk.varCount; v1i++) {
for(size_t v2i = 0; v2i < a->chunk.varCount; v2i++) {
if(v1i == v2i) continue;
VarTableEntry *v1 = vars[v1i];
VarTableEntry *v2 = vars[v2i];
/* 1D intersection test */
if((v1->data.var.start >= v2->data.var.start && v1->data.var.start <= v2->data.var.end)
|| (v1->data.var.end >= v2->data.var.start && v1->data.var.end <= v2->data.var.end)) {
VarTableEntry *min = v1 < v2 ? v1 : v2;
VarTableEntry *max = v1 < v2 ? v2 : v1;
for(size_t a = 0; a < adjCount; a++) {
if(adjs[a][0] == min && adjs[a][1] == max) {
goto cont;
}
}
adjs = realloc(adjs, sizeof(*adjs) * ++adjCount);
adjs[adjCount - 1][0] = min;
adjs[adjCount - 1][1] = max;
cont:;
}
}
}
for(size_t a = 0; a < adjCount; a++) {
adjs[a][0]->data.var.degree++;
adjs[a][1]->data.var.degree++;
}
qsort(vars, a->chunk.varCount, sizeof(*vars), comparator);
/* Welsh plow my ass */
for(int v = 0; v < a->chunk.varCount; v++) {
for(int c = 0;; c++) {
for(int a = 0; a < adjCount; a++) {
if(adjs[a][0] == vars[v] && adjs[a][1]->data.var.color == c) {
goto nextColor;
} else if(adjs[a][1] == vars[v] && adjs[a][0]->data.var.color == c) {
goto nextColor;
}
}
vars[v]->data.var.color = c;
break;
nextColor:;
}
}
free(adjs);
cg_chunk(a);
free(vars);
}

8
src/cg.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef H_CG
#define H_CG
#include"ast.h"
void cg_go(union AST*);
#endif

141
src/dstr.c Normal file
View File

@ -0,0 +1,141 @@
#include"dstr.h"
#include<stdarg.h>
#include<stdlib.h>
#include<string.h>
#include<stdio.h>
static int ilen(int i) {
if(i == 0) return 1;
int ret = 0;
if(i < 0) {
ret = 1;
i = -i;
}
while(i > 0) {
ret++;
i = i / 10;
}
return ret;
}
static char *myitoa(int src) {
static char ret[12];
snprintf(ret, 12, "%i", src);
return ret;
}
typedef struct {
size_t length;
char data[];
} dstrInternal;
dstr dstrempty() {
dstrInternal *i = malloc(sizeof(dstrInternal) + 1);
i->length = 0;
i->data[0] = '\0';
return (dstr) i + sizeof(dstrInternal);
}
dstr dstrz(const char *src) {
size_t len = strlen(src);
dstrInternal *i = malloc(sizeof(dstrInternal) + len + 1);
i->length = len;
memcpy(i->data, src, len + 1);
return (dstr) i + sizeof(dstrInternal);
}
dstr dstrfmt(dstr original, const char *fmt, ...) {
dstrInternal *originalInternal = (dstrInternal*) (original - sizeof(dstrInternal));
const char *start = fmt;
va_list list;
va_start(list, fmt);
size_t totalLength = 0;
while(*fmt) {
if(*fmt == '%') {
switch(*++fmt) {
case 's':
totalLength += strlen(va_arg(list, char*));
break;
case 'c':
if(va_arg(list, int)) totalLength++;
break;
case 'S': {
dstrInternal *i = (dstrInternal*) (va_arg(list, dstr) - sizeof(dstrInternal));
totalLength += i->length;
break;
}
case 'i':
totalLength += ilen(va_arg(list, int));
break;
default: {
totalLength++;
}
}
} else totalLength++;
fmt++;
}
va_end(list);
fmt = start;
originalInternal = realloc(originalInternal, sizeof(dstrInternal) + originalInternal->length + totalLength + 1);
va_start(list, fmt);
char *dst = originalInternal->data + originalInternal->length;
originalInternal->length += totalLength;
originalInternal->data[originalInternal->length] = 0;
while(*fmt) {
if(*fmt == '%') {
switch(*++fmt) {
case 's': {
char *asdfasdf = va_arg(list, char*);
strcpy(dst, asdfasdf);
dst += strlen(asdfasdf);
break;
}
case 'c': {
int c = va_arg(list, int);
if(c) {
*(dst++) = c;
}
break;
}
case 'S': {
dstrInternal *i = (dstrInternal*) (va_arg(list, dstr) - sizeof(dstrInternal));
memcpy(dst, i->data, i->length);
dst += i->length;
break;
}
case 'i': {
const char *asdf = myitoa(va_arg(list, int));
strcpy(dst, asdf);
dst += strlen(asdf);
break;
}
default: {
*(dst++) = *fmt;
}
}
} else {
*(dst++) = *fmt;
}
fmt++;
}
va_end(list);
return (dstr) originalInternal + sizeof(dstrInternal);
}
void dstrfree(dstr s) {
free(s - sizeof(dstrInternal));
}

15
src/dstr.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef _DSTR_H
#define _DSTR_H
#include<stddef.h>
/* Originally used sds, but it didn't support OpenWatcom. This isn't as optimized, but it's good enough. */
typedef char *dstr;
dstr dstrempty();
dstr dstrraw(const char*);
dstr dstrfmt(dstr, const char*, ...);
void dstrfree(dstr);
#endif

320
src/lexer.c Normal file
View File

@ -0,0 +1,320 @@
#include"lexer.h"
#include<stdlib.h>
#include<assert.h>
#include<string.h>
#include"reporting.h"
// Comply to same order as in the TokenKind enum from src/lexer.h
char *TOKEN_NAMES[] = {
"identifier",
"'local'",
"EOF",
"number",
"';'",
"':'",
"'if'",
"'('",
"')'",
"'{'",
"'}'",
"'='",
"'+'",
"'-'",
"'*'",
"'/'",
"'extern'",
"'loop'",
"'break'",
"','",
"'&'",
"'|'",
"'^'",
"'~'",
"'=='",
"'['",
"']'",
"'?'",
"string"
"'!='",
"'!'"
};
static int isAlpha(int c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static int isNum(int c) {
return c >= '0' && c <= '9';
}
static int isAlphanum(int c) {
return isAlpha(c) || isNum(c);
}
static int isWS(int c) {
return c == ' ' || c == '\n' || c == '\r' || c == '\b' || c == '\t';
}
static size_t currentRow = 0;
static size_t currentColumn = 0;
static int ungetted = EOF;
int nextc(FILE *f) {
if(ungetted != EOF) {
int ret = ungetted;
ungetted = EOF;
return ret;
}
int c = fgetc(f);
if(c == '\n') {
currentRow++;
currentColumn = 0;
} else if(c != EOF) {
currentColumn++;
}
return c;
}
void pushc(int c, FILE *f) {
ungetted = c;
}
Token nct_tokenize(FILE *f) {
Token tok;
tok.content = NULL;
tok.row = currentRow;
tok.column = currentColumn;
int c = nextc(f);
if(c == EOF) {
tok.type = TOKEN_EOF;
return tok;
}
if(c == ';') {
tok.type = TOKEN_SEMICOLON;
return tok;
} else if(c == ':') {
tok.type = TOKEN_COLON;
return tok;
} else if(c == '(') {
tok.type = TOKEN_PAREN_L;
return tok;
} else if(c == ')') {
tok.type = TOKEN_PAREN_R;
return tok;
} else if(c == '{') {
tok.type = TOKEN_SQUIGGLY_L;
return tok;
} else if(c == '}') {
tok.type = TOKEN_SQUIGGLY_R;
return tok;
} else if(c == '+') {
tok.type = TOKEN_PLUS;
return tok;
} else if(c == '-') {
tok.type = TOKEN_MINUS;
return tok;
} else if(c == '*') {
tok.type = TOKEN_STAR;
return tok;
} else if(c == '&') {
tok.type = TOKEN_AMPERSAND;
return tok;
} else if(c == '|') {
tok.type = TOKEN_VERTICAL_BAR;
return tok;
} else if(c == '^') {
tok.type = TOKEN_CARET;
return tok;
} else if(c == '~') {
tok.type = TOKEN_TILDE;
return tok;
} else if(c == '[') {
tok.type = TOKEN_SQUAREN_L;
return tok;
} else if(c == ']') {
tok.type = TOKEN_SQUAREN_R;
return tok;
} else if(c == '?') {
tok.type = TOKEN_QUESTION_MARK;
return tok;
} else if(c == '!') {
tok.type = TOKEN_EXCLAMATION;
int c = nextc(f);
if(c == '=') {
tok.type = TOKEN_EXCLAMATION_EQUALS;
} else ungetc(c, f);
return tok;
} else if(c == '/') {
int c = nextc(f);
if(c == '*') { /* This is a comment; skip. */
while(1) {
while((c = nextc(f)) != '*');
if(nextc(f) == '/') {
return nct_tokenize(f);
}
}
} else {
ungetc(c, f);
tok.type = TOKEN_SLASH;
return tok;
}
} else if(c == '=') {
tok.type = TOKEN_EQUALS;
int c = nextc(f);
if(c == '=') {
tok.type = TOKEN_DOUBLE_EQUALS;
} else ungetc(c, f);
return tok;
} else if(c == ',') {
tok.type = TOKEN_COMMA;
return tok;
} else if(c == '"') {
int capacity = 5;
char *content = malloc(capacity);
size_t i = 0;
int c;
while(c = nextc(f), c != '"') {
if(i == capacity - 1) {
content = realloc(content, capacity += 4);
}
if(c == '\\') {
c = nextc(f);
if(c == '0') c = 0;
else if(c == 'n') c = '\n';
else if(c == 't') c = '\t';
}
content[i++] = c;
}
content[i] = 0;
tok.type = TOKEN_STRING;
tok.content = content;
tok.length = i;
return tok;
} else if(isAlpha(c) || c == '@') {
int capacity = 5;
char *content = malloc(capacity);
size_t i = 0;
content[i++] = c;
while(c = nextc(f), (isAlphanum(c) || c == '@')) {
if(i == capacity - 1) {
content = realloc(content, capacity += 4);
}
content[i++] = c;
}
pushc(c, f);
content[i] = 0;
if(!strcmp(content, "local")) {
free(content);
tok.type = TOKEN_LOCAL;
return tok;
} else if(!strcmp(content, "if")) {
free(content);
tok.type = TOKEN_IF;
return tok;
} else if(!strcmp(content, "extern")) {
free(content);
tok.type = TOKEN_EXTERN;
return tok;
} else if(!strcmp(content, "loop")) {
free(content);
tok.type = TOKEN_LOOP;
return tok;
} else if(!strcmp(content, "break")) {
free(content);
tok.type = TOKEN_BREAK;
return tok;
} else if(!strcmp(content, "continue")) {
free(content);
tok.type = TOKEN_CONTINUE;
return tok;
}
tok.type = TOKEN_IDENTIFIER;
tok.content = content;
return tok;
} else if(isNum(c)) {
int capacity = 32;
char *content = malloc(capacity);
size_t i = 0;
content[i++] = c;
while(c = nextc(f), isNum(c)) {
if(i == capacity - 1) {
content = realloc(content, capacity += 4);
}
content[i++] = c;
}
content[i] = 0;
int base = strtol(content, NULL, 10);
if(c == 'r') {
content[i++] = c;
while(c = nextc(f), (isNum(c) || (base > 10 && c >= 'A' && c < ('A' + base - 10)))) {
if(i == 31) {
stahp(1, 6180, "Numbers have a maximum size of 31.");
}
content[i++] = c;
}
}
pushc(c, f);
tok.type = TOKEN_NUMBER;
tok.content = content;
return tok;
} else if(isWS(c)) {
int c;
while(c = nextc(f), isWS(c)) {
}
pushc(c, f);
return nct_tokenize(f);
}
stahp(currentRow, currentColumn, "Invalid character '%c' (byte %i)", c, c);
}
Token *nct_lex(FILE *f) {
size_t length = 8, index = 0;
Token *list = malloc(sizeof(*list) * length);
while(1) {
list[index] = nct_tokenize(f);
if(list[index].type == TOKEN_EOF) {
return list;
}
index++;
if(index == length) {
length *= 2;
list = realloc(list, sizeof(*list) * length);
}
}
return NULL; /* Doesn't reach here. */
}

54
src/lexer.h Normal file
View File

@ -0,0 +1,54 @@
#ifndef NCTREF_LEXER_H
#define NCTREF_LEXER_H
#include<stdio.h>
extern char *TOKEN_NAMES[];
typedef enum {
TOKEN_IDENTIFIER,
TOKEN_LOCAL,
TOKEN_EOF,
TOKEN_NUMBER,
TOKEN_SEMICOLON,
TOKEN_COLON,
TOKEN_IF,
TOKEN_PAREN_L,
TOKEN_PAREN_R,
TOKEN_SQUIGGLY_L,
TOKEN_SQUIGGLY_R,
TOKEN_EQUALS,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
TOKEN_EXTERN,
TOKEN_LOOP,
TOKEN_BREAK,
TOKEN_COMMA,
TOKEN_AMPERSAND,
TOKEN_VERTICAL_BAR,
TOKEN_CARET,
TOKEN_TILDE,
TOKEN_DOUBLE_EQUALS,
TOKEN_SQUAREN_L,
TOKEN_SQUAREN_R,
TOKEN_QUESTION_MARK,
TOKEN_STRING,
TOKEN_EXCLAMATION_EQUALS,
TOKEN_EXCLAMATION,
TOKEN_CONTINUE,
} TokenKind;
typedef struct {
TokenKind type;
int row, column;
char *content; /* NULL for keywords. */
size_t length; /* Not valid for everything. */
} Token;
Token nct_tokenize(FILE*);
Token *nct_lex(FILE*);
#endif

42
src/ntc.c Normal file
View File

@ -0,0 +1,42 @@
#include<errno.h>
#include<string.h>
#include<stdlib.h>
#include"lexer.h"
#include"parse.h"
#include"ntc.h"
#include"reporting.h"
#include"cg.h"
static int argc;
static char **argv;
const char* ntc_get_arg(const char *name) {
for(int i = 1; i < argc; i++) {
if(strstr(argv[i], name) == argv[i]) {
return argv[i] + strlen(name) + 1;
}
}
return NULL;
}
int main(int argc_, char **argv_) {
argc = argc_;
argv = argv_;
const char *in = ntc_get_arg("in");
FILE *f = in ? fopen(in, "rb") : stdin;
Token *tokens = nct_lex(f);
if(in) fclose(f);
AST *chunk = nct_parse(tokens);
free(tokens);
cg_go(chunk);
return 0;
}

6
src/ntc.h Normal file
View File

@ -0,0 +1,6 @@
#ifndef NTC_H
#define NTC_H
const char* ntc_get_arg(const char *name);
#endif

93
src/optims.c Normal file
View File

@ -0,0 +1,93 @@
#include"optims.h"
#include<assert.h>
// Currently performs only copy propagation.
// But CP is NECESSARY, otherwise it creates too many variables
// that are unable to be coalesced by the regallocator
static void recalc_lifespan(VarTableEntry *vte) {
assert(vte->kind == VARTABLEENTRY_VAR);
size_t start = 0xFFFFFFFF, end = 0;
UseDef *ud = vte->data.var.usedefFirst;
while(ud) {
if(ud->t < start) start = ud->t;
if(ud->t > end) end = ud->t;
ud = ud->next;
}
vte->data.var.start = start;
vte->data.var.end = end;
}
void optim_chunk(ASTChunk *chu) {
AST *s = chu->statementFirst, *sPrev = NULL;
while(s) {
if(s->nodeKind == AST_STMT_ASSIGN && s->stmtAssign.what->nodeKind == AST_EXPR_VAR && s->stmtAssign.to->nodeKind == AST_EXPR_VAR) {
VarTableEntry *dst = ((AST*) s->stmtAssign.what)->exprVar.thing;
VarTableEntry *src = ((AST*) s->stmtAssign.to)->exprVar.thing;
if(dst->kind == VARTABLEENTRY_VAR && src->kind == VARTABLEENTRY_VAR) {
// Find reaching source definition
UseDef *srcUD = src->data.var.usedefFirst;
while(srcUD && srcUD->use != s->stmtAssign.to) {
srcUD = srcUD->next;
}
if(!srcUD) {
goto copypropfail;
}
// Find first use of this def
UseDef *dstUDPrev = NULL;
UseDef *dstUD = dst->data.var.usedefFirst;
while(dstUD->def != s) {
dstUDPrev = dstUD;
dstUD = dstUD->next;
}
// Update all definitions
while(dstUD && dstUD->def == s) {
((AST*) dstUD->use)->exprVar.thing = src;
UseDef *next = dstUD->next;
dstUD->def = srcUD->def;
dstUD->next = srcUD->next;
srcUD->next = dstUD;
dstUD = next;
if(dstUDPrev) {
dstUDPrev->next = dstUD;
} else {
dst->data.var.usedefFirst = dstUD;
}
}
if(!dstUD) {
// dst was never used again -> DELETE ASSIGNMENT COMPLETELY
if(sPrev) {
sPrev->statement.next = s->statement.next;
// TODO: free
}
}
recalc_lifespan(dst);
recalc_lifespan(src);
}
}
copypropfail:
sPrev = s;
s = s->statement.next;
}
}

5
src/optims.h Normal file
View File

@ -0,0 +1,5 @@
#pragma once
#include"ast.h"
void optim_chunk(ASTChunk*);

867
src/parse.c Normal file
View File

@ -0,0 +1,867 @@
#include"parse.h"
#include<assert.h>
#include<stdlib.h>
#include<string.h>
#include"utils.h"
#include"vartable.h"
#include"reporting.h"
#include<stdint.h>
#include<signal.h>
typedef struct {
Token *tokens;
ssize_t i;
size_t t;
VarTable *scope;
ASTChunk *currentChunk;
ASTChunk *topLevel;
} Parser;
static Token get(Parser *P) {
if(P->tokens[P->i].type == TOKEN_EOF) {
return P->tokens[P->i];
} else {
return P->tokens[P->i++];
}
}
static Token expect(Parser *P, TokenKind t) {
Token tok = get(P);
if(tok.type != t) {
stahp(tok.row, tok.column, "Expected %s, got %s.", TOKEN_NAMES[t], TOKEN_NAMES[tok.type]);
}
return tok;
}
static Token peek(Parser *P, int depth) {
int j = 0;
for(; j < depth; j++) {
if(P->tokens[P->i + j].type == TOKEN_EOF) {
break;
}
}
return P->tokens[P->i + j];
}
static int maybe(Parser *P, TokenKind t) {
if(peek(P, 0).type == t) {
get(P);
return 1;
}
return 0;
}
static void pushstat(Parser *P, void *a) {
if(P->currentChunk->statementFirst) {
P->currentChunk->statementLast->statement.next = a;
P->currentChunk->statementLast = a;
} else {
P->currentChunk->statementFirst = P->currentChunk->statementLast = a;
}
}
static ASTExprPrimitive *parse_prim(Parser *P) {
ASTExprPrimitive *ret = malloc(sizeof(*ret));