IR linearization

This commit is contained in:
Mid 2025-08-13 18:27:52 +03:00
parent 67fb9e5b52
commit 5755e243a9
11 changed files with 388 additions and 329 deletions

View File

@ -91,6 +91,8 @@ NOTE: Later someone called this normalization, which is a much less stupid word
## Use-def chain ## Use-def chain
***WARNING: THIS ENTIRE SECTION HAS BECOME OUTDATED***
I hate these things. Another is def-use chains, but both are horribly underdocumented. Their only use in most literature is so the author can immediately move to SSA form. I hate these things. Another is def-use chains, but both are horribly underdocumented. Their only use in most literature is so the author can immediately move to SSA form.
For each variable, its UD chain is a list of each usage in the AST, with the corresponding potential definition of the variable at that use. For each potential definition that exists at that point, there is one UD element in the chain. If there's only one potential definition at a point, then it's definitely the true one. Users of UD chains include optimizers and codegen. UD chains are always regenerated for use between passes by using the UD visitor on the top-level chunk. For each variable, its UD chain is a list of each usage in the AST, with the corresponding potential definition of the variable at that use. For each potential definition that exists at that point, there is one UD element in the chain. If there's only one potential definition at a point, then it's definitely the true one. Users of UD chains include optimizers and codegen. UD chains are always regenerated for use between passes by using the UD visitor on the top-level chunk.
@ -360,7 +362,7 @@ The code generator failed to accept this, because the `as T*` cast meant that it
What was my solution? IGNORE ALL POINTER CASTS! As I wrote above, the Nectar AST does not support pointer arithmetic like that of C. By this point, all complex types should have already been converted into integers. Therefore, it does not even matter. What was my solution? IGNORE ALL POINTER CASTS! As I wrote above, the Nectar AST does not support pointer arithmetic like that of C. By this point, all complex types should have already been converted into integers. Therefore, it does not even matter.
By adding the rule (`x as A*` -> `x` *only* if x's type is a pointer), we obtain the following after denooping: By adding the rule (`x as A*` -> `x` if x is of a pointer type), we obtain the following after denooping:
T* data = this + 4; T* data = this + 4;

View File

@ -0,0 +1,9 @@
u16 x = 0;
x = 5;
if(x == 2) {
x = 3;
}
x = 4;

View File

@ -1,12 +1,5 @@
u16 x: 5; u16 x: 5;
loop {
u16* y = 257;
u9 w = -4;
u16 p = *y;
u4 z = p + 3;
u2 o = -w;
if(x != 0) { if(x != 0) {
break; x = 2;
}
} }

578
src/ast.c
View File

@ -84,6 +84,11 @@ void generic_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST *tlc, v
if(n->exprExtSizeOf.ofExpr) { if(n->exprExtSizeOf.ofExpr) {
generic_visitor(&n->exprExtSizeOf.ofExpr, stmt, stmtPrev, chu, tlc, ud, preHandler, postHandler); generic_visitor(&n->exprExtSizeOf.ofExpr, stmt, stmtPrev, chu, tlc, ud, preHandler, postHandler);
} }
} else if(n->nodeKind == AST_STMT_JUMP) {
if(n->stmtJump.condition) {
generic_visitor(&n->stmtJump.condition, stmt, stmtPrev, chu, tlc, ud, preHandler, postHandler);
}
} else if(n->nodeKind == AST_STMT_LABEL) {
} else { } else {
stahp_node(n, "generic_visitor: unhandled %s", AST_KIND_STR[n->nodeKind]); stahp_node(n, "generic_visitor: unhandled %s", AST_KIND_STR[n->nodeKind]);
} }
@ -146,334 +151,190 @@ int ast_stmt_is_after(const AST *chunk, const AST *s1, const AST *s2) {
return -1; return -1;
} }
/* AST *ast_get_label_by_name(AST *tlc, const char *name) {
* This pass is necessary for the purposes of optimization and regalloc. for(AST *s = tlc->chunk.statementFirst; s; s = s->statement.next) {
* Because an AST may hold outdated UD-chains, this pass MUST be called if(s->nodeKind == AST_STMT_LABEL && !strcmp(s->stmtLabel.name, name)) {
* before using the UD-chains to make sure they are valid. return s;
* }
* Each local var (VTE of kind SCOPEITEM_VAR) holds its own UD-chain }
* that specifies the exact nodes in the AST where:
* 1. It is used
* 2. The whole statement in which it is used
* 3. The definition that *might* be in use
*
* Because multiple definitions may be in use (reachable) at the point
* of the use, a unique UseDef for each possible definition is appended
* to the chain.
*
* Reachable definitions are kept track in a ReachingDefs, also held by
* each VTE. In the case of a single, simple block of code, we know
* exactly one definition (including undefined) can reach each variable,
* which would simplify the ReachingDefs structure to a single
* definition pointer.
*
* Unfortunately, conditional blocks and loops ruin this simplicity.
* If you have code like
* x = A
* if B {
* x = C
* }
* then afterward two definitions may apply to x.
*
* A solution here is to lay ReachingDefs as a graph, with each
* ReachingDefs having an optional parent. When we enter a new block of
* code, we create an empty ReachingDefs with the previous block as its
* parent. Any definitions replace the ones in the deepest
* ReachingDefs only.
*
* How we exit a block depends on its type. If it is conditional,
* the reaching definitions should join the parent (mergedefs).
* If the block is a loop, it is even worse. Given
* x = A
* loop {
* use x
* x = B
* }
* definitions can apply to uses that come before it!
*
* Also, a different case:
* x = A
* loop {
* use x
* y = B
* }
* Because, technically, the last use of x is before y = B, y and x may
* be assigned the same physical location, corrupting data as a result.
* To fix this, fake, "useless" statements are inserted during parsing
* that make the AST look as such:
* x = A
* loop {
* use x
* y = B
* }
* x;
* Until dead code removal is implemented, this will not be a problem.
*/
static void rawadduse(ScopeItem *vte, UseDef *ud) { stahp(0, 0, "Label %s not found", name);
assert(vte->kind == SCOPEITEM_VAR); }
assert(ud->next == NULL); static void rd_kill(ReachingDefs *a, ScopeItem *var) {
for(size_t i = a->defCount; i --> 0;) {
AST *def = a->defs[i];
assert(!!vte->data.var.usedefFirst == !!vte->data.var.usedefLast); assert(def->nodeKind == AST_STMT_ASSIGN);
assert(def->stmtAssign.what->nodeKind == AST_EXPR_VAR);
assert(def->stmtAssign.what->exprVar.thing->kind == SCOPEITEM_VAR);
if(!vte->data.var.usedefFirst) { if(def->stmtAssign.what->exprVar.thing == var) {
vte->data.var.usedefFirst = vte->data.var.usedefLast = ud; memmove(&a->defs[i], &a->defs[i + 1], sizeof(*a->defs) * (a->defCount - i - 1));
} else { a->defCount--;
vte->data.var.usedefLast->next = ud; }
vte->data.var.usedefLast = ud;
} }
} }
static void adduse(ScopeItem *vte, AST *use, AST *whole) { static bool rd_equal(ReachingDefs *a, ReachingDefs *b) {
assert(vte->kind == SCOPEITEM_VAR); if(a->defCount != b->defCount) {
return false;
assert(vte->data.var.reachingDefs != NULL);
ReachingDefs *rd = vte->data.var.reachingDefs;
while(rd && rd->defCount == 0) rd = rd->parent;
if(rd) {
for(size_t d = 0; d < rd->defCount; d++) {
UseDef *ud = calloc(1, sizeof(*ud));
ud->def = rd->defs[d];
ud->use = use;
ud->stmt = whole;
ud->next = NULL;
rawadduse(vte, ud);
} }
} else {
UseDef *ud = calloc(1, sizeof(*ud));
ud->def = NULL;
ud->use = use;
ud->stmt = whole;
ud->next = NULL;
rawadduse(vte, ud); for(size_t i = 0; i < a->defCount; i++) {
if(a->defs[i] != b->defs[i]) {
return false;
}
}
return true;
}
static int compar_ptr(const void *a, const void *b) {
return *(uintptr_t*) a - *(uintptr_t*) b;
}
static bool rd_find(ReachingDefs *dest, union AST *ast) {
return !!bsearch(&ast, dest->defs, dest->defCount, sizeof(*dest->defs), compar_ptr);
}
static void rd_add(ReachingDefs *dest, union AST *ast) {
if(rd_find(dest, ast)) {
return;
}
dest->defs = realloc(dest->defs, sizeof(*dest->defs) * (++dest->defCount));
dest->defs[dest->defCount - 1] = ast;
qsort(dest->defs, dest->defCount, sizeof(*dest->defs), compar_ptr);
}
static void rd_union(ReachingDefs *dest, ReachingDefs *src) {
for(size_t i = 0; i < src->defCount; i++) {
rd_add(dest, src->defs[i]);
} }
} }
static void overwritedefs(ScopeItem *vte, AST *def) { static ReachingDefs rd_compute_in(AST *tlc, AST *stmt, AST *stmtPrev) {
assert(vte->kind == SCOPEITEM_VAR); ReachingDefs rd = {};
if(!vte->data.var.reachingDefs) { // The previous statement is a predecessor unless it's an unconditional jump statement
vte->data.var.reachingDefs = calloc(1, sizeof(*vte->data.var.reachingDefs)); if(stmtPrev && (stmtPrev->nodeKind != AST_STMT_JUMP || stmtPrev->stmtJump.condition)) {
rd_union(&rd, &stmtPrev->statement.rd);
} }
vte->data.var.reachingDefs->defCount = 1; // If this is a label statement, then all jumps to this statement are predecessors
if(stmt->nodeKind == AST_STMT_LABEL) {
for(AST *s = tlc->chunk.statementFirst; s; s = s->statement.next) {
if(s->nodeKind == AST_STMT_JUMP && !strcmp(s->stmtJump.label, stmt->stmtLabel.name)) {
rd_union(&rd, &s->statement.rd);
}
}
}
vte->data.var.reachingDefs->defs = realloc(vte->data.var.reachingDefs->defs, sizeof(*vte->data.var.reachingDefs->defs)); return rd;
vte->data.var.reachingDefs->defs[0] = def;
} }
static void mergedefs(ScopeItem *vte) { static void rd_step(AST *tlc, AST *stmt, AST *stmtPrev) {
assert(vte->kind == SCOPEITEM_VAR); stmt->statement.dirty = false;
ReachingDefs *rdefs = vte->data.var.reachingDefs; ReachingDefs rd = rd_compute_in(tlc, stmt, stmtPrev);
assert(rdefs != NULL); if(stmt->nodeKind == AST_STMT_ASSIGN && stmt->stmtAssign.what->nodeKind == AST_EXPR_VAR && stmt->stmtAssign.what->exprVar.thing->kind == SCOPEITEM_VAR) {
assert(rdefs->parent != NULL); rd_kill(&rd, stmt->stmtAssign.what->exprVar.thing);
rd_add(&rd, stmt);
rdefs->parent->defs = realloc(rdefs->parent->defs, sizeof(*rdefs->parent->defs) * (rdefs->parent->defCount + rdefs->defCount));
memcpy(rdefs->parent->defs + rdefs->parent->defCount, rdefs->defs, rdefs->defCount * sizeof(*rdefs->defs));
vte->data.var.reachingDefs = rdefs->parent;
free(rdefs->defs);
free(rdefs);
}
static void pushdefs(ScopeItem *vte) {
assert(vte->kind == SCOPEITEM_VAR);
ReachingDefs *rdefs = calloc(1, sizeof(*rdefs));
rdefs->defCount = 0;
rdefs->defs = NULL;
rdefs->excludeParent = 0;
rdefs->parent = vte->data.var.reachingDefs;
vte->data.var.reachingDefs = rdefs;
}
static void pushdefsall(AST *tlc) {
for(size_t i = 0; i < tlc->chunk.varCount; i++) {
pushdefs(tlc->chunk.vars[i]);
}
}
static void mergedefsall(AST *tlc) {
for(size_t i = 0; i < tlc->chunk.varCount; i++) {
mergedefs(tlc->chunk.vars[i]);
}
}
static void mergedefsloop(AST *tlc, ScopeItem *vte, AST *daLoopStmt) {
assert(vte->kind == SCOPEITEM_VAR);
for(size_t d = 0; d < vte->data.var.reachingDefs->defCount; d++) {
UseDef *ud = vte->data.var.usedefFirst;
while(ud) {
if(ast_stmt_is_after(daLoopStmt->stmtLoop.body, NULL, ud->stmt) == 1 && ud->def != vte->data.var.reachingDefs->defs[d]) {
UseDef *udnew = calloc(1, sizeof(*udnew));
udnew->next = ud->next;
ud->next = udnew;
udnew->def = vte->data.var.reachingDefs->defs[d];
udnew->use = ud->use;
udnew->stmt = ud->stmt;
/*if(udnew->next == NULL) {
vte->data.var.usedefLast = udnew;
}*/
} }
ud = ud->next; if(!rd_equal(&rd, &stmt->statement.rd)) {
} // Set dirty flag on all successors
// The next statement is a successor unless it's an unconditional jump statement
if(stmt->statement.next && (stmt->nodeKind != AST_STMT_JUMP || stmt->stmtJump.condition)) {
stmt->statement.next->statement.dirty = true;
} }
mergedefs(vte); // If this is a jump statement, the target label is a successor
} if(stmt->nodeKind == AST_STMT_JUMP) {
AST *label = ast_get_label_by_name(tlc, stmt->stmtJump.label);
static void mergedefsloopall(AST *tlc, AST *daLoopStmt) { label->statement.dirty = true;
for(size_t i = 0; i < tlc->chunk.varCount; i++) {
mergedefsloop(tlc, tlc->chunk.vars[i], daLoopStmt);
}
}
struct UsedefPassState {
size_t loopDepth;
};
static void ast_usedef_pass(struct UsedefPassState *state, AST *tlc, AST *a, AST *wholestmt) {
if(a->nodeKind == AST_CHUNK) {
for(AST *s = a->chunk.statementFirst; s; s = s->statement.next) {
ast_usedef_pass(state, tlc, s, s);
}
} else if(a->nodeKind == AST_STMT_IF) {
pushdefsall(tlc);
ast_usedef_pass(state, tlc, a->stmtIf.expression, wholestmt);
ast_usedef_pass(state, tlc, a->stmtIf.then, wholestmt);
mergedefsall(tlc);
} else if(a->nodeKind == AST_STMT_LOOP) {
pushdefsall(tlc);
state->loopDepth++;
ast_usedef_pass(state, tlc, a->stmtLoop.body, wholestmt);
state->loopDepth--;
if(state->loopDepth == 0) {
for(size_t vi = 0; vi < tlc->chunk.varCount; vi++) {
ScopeItem *si = tlc->chunk.vars[vi];
if(si->data.var.usedInLoop) {
if(ast_stmt_is_after(a->stmtLoop.body, si->data.var.declaration, si->data.var.declaration) == -1) {
if(ast_stmt_is_after(tlc, a, si->data.var.liveRangeStart) == 0) {
si->data.var.liveRangeStart = a;
}
si->data.var.liveRangeEnd = a;
}
} }
si->data.var.usedInLoop = false; stmt->statement.rd = rd;
}
}
mergedefsloopall(tlc, a);
} else if(a->nodeKind == AST_STMT_ASSIGN) {
if(a->stmtAssign.what->nodeKind == AST_EXPR_VAR && a->stmtAssign.what->exprVar.thing->kind == SCOPEITEM_VAR) {
overwritedefs(a->stmtAssign.what->exprVar.thing, a);
}
ast_usedef_pass(state, tlc, a->stmtAssign.what, wholestmt);
if(a->stmtAssign.to) {
ast_usedef_pass(state, tlc, a->stmtAssign.to, wholestmt);
}
} else if(a->nodeKind == AST_STMT_EXPR) {
ast_usedef_pass(state, tlc, a->stmtExpr.expr, wholestmt);
} else if(a->nodeKind == AST_EXPR_VAR) {
ScopeItem *si = a->exprVar.thing;
if(si->kind == SCOPEITEM_VAR) {
if(state->loopDepth > 0) {
si->data.var.usedInLoop = true;
}
adduse(si, a, wholestmt);
if(si->data.var.liveRangeStart == NULL) {
si->data.var.liveRangeStart = wholestmt;
}
si->data.var.liveRangeEnd = wholestmt;
}
} else if(a->nodeKind == AST_EXPR_BINARY_OP) {
ast_usedef_pass(state, tlc, a->exprBinOp.operands[0], wholestmt);
ast_usedef_pass(state, tlc, a->exprBinOp.operands[1], wholestmt);
} else if(a->nodeKind == AST_EXPR_UNARY_OP) {
ast_usedef_pass(state, tlc, a->exprUnOp.operand, wholestmt);
} else if(a->nodeKind == AST_EXPR_CALL) {
ast_usedef_pass(state, tlc, a->exprCall.what, wholestmt);
for(size_t p = 0; p < a->exprCall.what->expression.type->pointer.of->function.argCount; p++) {
ast_usedef_pass(state, tlc, a->exprCall.args[p], wholestmt);
}
} else if(a->nodeKind == AST_EXPR_PRIMITIVE) {
} else if(a->nodeKind == AST_EXPR_STRING_LITERAL) {
} else if(a->nodeKind == AST_EXPR_CAST) {
ast_usedef_pass(state, tlc, a->exprCast.what, wholestmt);
} else if(a->nodeKind == AST_EXPR_STACK_POINTER) {
} else if(a->nodeKind == AST_EXPR_EXT_SALLOC) {
} else if(a->nodeKind == AST_STMT_BREAK) {
} else if(a->nodeKind == AST_STMT_CONTINUE) {
} else if(a->nodeKind == AST_STMT_EXT_ALIGN) {
} else if(a->nodeKind == AST_STMT_EXT_ORG) {
} else if(a->nodeKind == AST_STMT_EXT_SECTION) {
} else if(a->nodeKind == AST_STMT_DECL) {
ScopeItem *si = a->stmtDecl.thing;
//assert(si->kind != SCOPEITEM_VAR || a->stmtDecl.expression);
if(si->kind == SCOPEITEM_VAR) {
assert(!si->data.var.declaration || si->data.var.declaration == a);
si->data.var.declaration = a;
}
} else if(a->nodeKind == AST_STMT_RETURN) {
if(a->stmtReturn.val) {
ast_usedef_pass(state, tlc, a->stmtReturn.val, wholestmt);
}
} else {
stahp_node(a, "ast_usedef_pass: unhandled %s", AST_KIND_STR[a->nodeKind]);
} }
} }
char *ast_dump(AST *tlc); char *ast_dump(AST *tlc);
char *ast_dumpc(AST *tlc, AST *chu); char *ast_dumpc(AST *tlc, AST *chu);
static void usedef_generation_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) {
AST *n = *nptr;
if(n->nodeKind == AST_EXPR_VAR) {
ReachingDefs *rd = &stmt->statement.rd;
ScopeItem *si = n->exprVar.thing;
for(size_t rdi = 0; rdi < rd->defCount; rdi++) {
AST *def = rd->defs[rdi];
if(def->stmtAssign.what->exprVar.thing == si) {
UseDef *ud = calloc(1, sizeof(*ud));
ud->def = def;
ud->use = n;
ud->stmt = stmt;
if(!si->data.var.usedefFirst) {
si->data.var.usedefFirst = si->data.var.usedefLast = ud;
} else {
si->data.var.usedefLast->next = ud;
si->data.var.usedefLast = ud;
}
}
}
}
}
void ast_usedef_reset(AST *chu) { void ast_usedef_reset(AST *chu) {
for(size_t i = 0; i < chu->chunk.varCount; i++) { for(size_t i = 0; i < chu->chunk.varCount; i++) {
ScopeItem *vte = chu->chunk.vars[i]; ScopeItem *vte = chu->chunk.vars[i];
assert(vte->kind == SCOPEITEM_VAR); assert(vte->kind == SCOPEITEM_VAR);
vte->data.var.reachingDefs = NULL;
vte->data.var.usedefFirst = NULL; vte->data.var.usedefFirst = NULL;
vte->data.var.usedefLast = NULL; vte->data.var.usedefLast = NULL;
vte->data.var.liveRangeStart = NULL; vte->data.var.liveRangeStart = NULL;
vte->data.var.liveRangeEnd = NULL; vte->data.var.liveRangeEnd = NULL;
} }
pushdefsall(chu); for(AST *s = chu->chunk.statementFirst; s; s = s->statement.next) {
if(s->nodeKind == AST_STMT_IF || s->nodeKind == AST_STMT_LOOP) {
stahp(0, 0, "UD-chain generation requires a completely linear IR");
}
struct UsedefPassState state = {}; s->statement.dirty = true;
}
ast_usedef_pass(&state, chu, chu, NULL); for(size_t rdsteps = 0;; rdsteps++) {
//fprintf(stderr, "RD step %lu\n", rdsteps);
AST *prev = NULL;
AST *dirty = NULL;
// Find at least one dirty statement
for(AST *s = chu->chunk.statementFirst; s; prev = s, s = s->statement.next) {
if(s->statement.dirty) {
dirty = s;
break;
}
}
if(!dirty) {
// Completed reaching definition computation
break;
}
rd_step(chu, dirty, prev);
}
generic_visitor(&chu, NULL, NULL, chu, chu, NULL, usedef_generation_visitor, NULL);
for(size_t i = 0; i < chu->chunk.varCount; i++) { for(size_t i = 0; i < chu->chunk.varCount; i++) {
ScopeItem *vte = chu->chunk.vars[i]; ScopeItem *vte = chu->chunk.vars[i];
@ -481,8 +342,26 @@ void ast_usedef_reset(AST *chu) {
assert(vte->kind == SCOPEITEM_VAR); assert(vte->kind == SCOPEITEM_VAR);
assert(!!vte->data.var.usedefFirst == !!vte->data.var.usedefLast); assert(!!vte->data.var.usedefFirst == !!vte->data.var.usedefLast);
assert(!!vte->data.var.usedefFirst == !!vte->data.var.liveRangeStart);
//assert(!!vte->data.var.liveRangeStart == !!vte->data.var.liveRangeEnd); vte->data.var.liveRangeStart = vte->data.var.usedefFirst->stmt;
vte->data.var.liveRangeEnd = vte->data.var.usedefLast->stmt;
}
// fix liveRangeStart and/or liveRangeEnd depending on goto targets
for(AST *s = chu->chunk.statementFirst; s; s = s->statement.next) {
if(s->nodeKind == AST_STMT_JUMP) {
AST *target = ast_get_label_by_name(chu, s->stmtJump.label);
for(size_t sii = 0; sii < chu->chunk.varCount; sii++) {
ScopeItem *si = chu->chunk.vars[sii];
if(ast_stmt_is_after(chu, si->data.var.liveRangeEnd, s) == 0 && ast_stmt_is_after(chu, target, si->data.var.liveRangeEnd) == 0 && ast_stmt_is_after(chu, si->data.var.declaration, target) == 0) {
si->data.var.liveRangeEnd = s;
}
}
}
} }
if(ntc_get_int("pdbg")) { if(ntc_get_int("pdbg")) {
@ -575,6 +454,10 @@ char *type_to_string(Type *t) {
} }
static char *ast_dumpe(AST *tlc, AST *e) { static char *ast_dumpe(AST *tlc, AST *e) {
if(!e) {
return malp("(null)");
}
if(e->nodeKind == AST_EXPR_PRIMITIVE) { if(e->nodeKind == AST_EXPR_PRIMITIVE) {
return malp("%i", e->exprPrim.val); return malp("%i", e->exprPrim.val);
} else if(e->nodeKind == AST_EXPR_VAR) { } else if(e->nodeKind == AST_EXPR_VAR) {
@ -600,6 +483,9 @@ static char *ast_dumpe(AST *tlc, AST *e) {
case UNOP_NEGATE: case UNOP_NEGATE:
op = "-"; op = "-";
break; break;
case UNOP_NOT:
op = "!";
break;
default: default:
abort(); abort();
} }
@ -755,6 +641,13 @@ static char *ast_dumps(AST *tlc, AST *s) {
free(a); free(a);
return r; return r;
} }
} else if(s->nodeKind == AST_STMT_JUMP) {
char *a = ast_dumpe(tlc, s->stmtJump.condition);
char *r = malp("jump %s if %s;", s->stmtJump.label, a);
free(a);
return r;
} else if(s->nodeKind == AST_STMT_LABEL) {
return malp("@label %s;", s->stmtLabel.name);
} else if(s->nodeKind == AST_STMT_LOOP) { } else if(s->nodeKind == AST_STMT_LOOP) {
char *inner = ast_dumpc(tlc, s->stmtLoop.body); char *inner = ast_dumpc(tlc, s->stmtLoop.body);
char *c = malp("loop {\n%s}", inner); char *c = malp("loop {\n%s}", inner);
@ -1204,3 +1097,120 @@ static void ast_segmented_dereference_visitor(AST **aptr, AST *stmt, AST *stmtPr
void ast_segmented_dereference(AST *tlc) { void ast_segmented_dereference(AST *tlc) {
generic_visitor(&tlc, NULL, NULL, tlc, tlc, tlc, ast_segmented_dereference_visitor, NULL); generic_visitor(&tlc, NULL, NULL, tlc, tlc, tlc, ast_segmented_dereference_visitor, NULL);
} }
#define LOOPSTACKSIZE 64
struct LinearizeState {
size_t currentDepth;
size_t loopStackStart[LOOPSTACKSIZE];
size_t loopStackEnd[LOOPSTACKSIZE];
AST *loopAfters[LOOPSTACKSIZE];
};
static void ast_linearize_visitor_pre(AST **aptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) {
static size_t nextLabelIdx = 0;
struct LinearizeState *state = ud;
AST *a = *aptr;
if(a->nodeKind == AST_STMT_IF) {
ASTExprUnaryOp *notcond = calloc(1, sizeof(*notcond));
notcond->nodeKind = AST_EXPR_UNARY_OP;
notcond->operator = UNOP_NOT;
notcond->operand = a->stmtIf.expression;
notcond->type = a->stmtIf.expression->expression.type;
ASTStmtJump *jump = calloc(1, sizeof(ASTStmtJump));
jump->nodeKind = AST_STMT_JUMP;
jump->condition = (AST*) notcond;
jump->label = malp("$Lin%lu", nextLabelIdx++);
ASTStmtLabel *label = calloc(1, sizeof(ASTStmtLabel));
label->nodeKind = AST_STMT_LABEL;
label->name = strdup(jump->label);
if(stmtPrev) {
stmtPrev->statement.next = (AST*) jump;
} else {
chunk->chunk.statementFirst = (AST*) jump;
}
if(a->stmtIf.then->chunk.statementFirst) {
jump->next = a->stmtIf.then->chunk.statementFirst;
for(AST *z = a->stmtIf.then->chunk.statementFirst; z; z = z->statement.next) {
if(!z->statement.next) {
z->statement.next = (AST*) label;
break;
}
}
} else {
jump->next = (AST*) label;
}
label->next = a->statement.next;
} else if(a->nodeKind == AST_STMT_LOOP) {
size_t startIdx = nextLabelIdx++;
size_t endIdx = nextLabelIdx++;
ASTStmtJump *jump = calloc(1, sizeof(ASTStmtJump));
jump->nodeKind = AST_STMT_JUMP;
jump->condition = NULL;
jump->label = malp("$Lin%lu", startIdx);
ASTStmtLabel *startLabel = calloc(1, sizeof(ASTStmtLabel));
startLabel->nodeKind = AST_STMT_LABEL;
startLabel->name = strdup(jump->label);
ASTStmtLabel *endLabel = calloc(1, sizeof(ASTStmtLabel));
endLabel->nodeKind = AST_STMT_LABEL;
endLabel->name = malp("$Lin%lu", endIdx);
if(stmtPrev) {
stmtPrev->statement.next = (AST*) startLabel;
} else {
chunk->chunk.statementFirst = (AST*) startLabel;
}
if(a->stmtLoop.body->chunk.statementFirst) {
startLabel->next = a->stmtLoop.body->chunk.statementFirst;
for(AST *z = a->stmtLoop.body->chunk.statementFirst; z; z = z->statement.next) {
if(!z->statement.next) {
z->statement.next = (AST*) jump;
break;
}
}
} else {
startLabel->next = (AST*) jump;
}
jump->next = (AST*) endLabel;
endLabel->next = a->statement.next;
state->currentDepth++;
state->loopStackStart[state->currentDepth - 1] = startIdx;
state->loopStackEnd[state->currentDepth - 1] = endIdx;
state->loopAfters[state->currentDepth - 1] = (AST*) endLabel;
} else if(a->nodeKind == AST_STMT_BREAK) {
ASTStmtJump *jump = calloc(1, sizeof(ASTStmtJump));
jump->nodeKind = AST_STMT_JUMP;
jump->condition = NULL;
jump->label = malp("$Lin%lu", state->loopStackEnd[state->currentDepth - 1]);
jump->next = a->statement.next;
*aptr = (AST*) jump;
} else if(a->nodeKind == AST_STMT_CONTINUE) {
ASTStmtJump *jump = calloc(1, sizeof(ASTStmtJump));
jump->nodeKind = AST_STMT_JUMP;
jump->condition = NULL;
jump->label = malp("$Lin%lu", state->loopStackStart[state->currentDepth - 1]);
jump->next = a->statement.next;
*aptr = (AST*) jump;
}
}
static void ast_linearize_visitor_post(AST **aptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) {
struct LinearizeState *state = ud;
AST *a = *aptr;
if(state->currentDepth && state->loopAfters[state->currentDepth - 1] == a) {
state->currentDepth--;
}
}
void ast_linearize(AST *tlc) {
struct LinearizeState state = {};
generic_visitor(&tlc, NULL, NULL, tlc, tlc, &state, ast_linearize_visitor_pre, ast_linearize_visitor_post);
}

View File

@ -42,7 +42,9 @@
K(AST_STMT_RETURN) \ K(AST_STMT_RETURN) \
K(AST_EXPR_EXT_SALLOC) \ K(AST_EXPR_EXT_SALLOC) \
K(AST_EXPR_DOT) \ K(AST_EXPR_DOT) \
K(AST_EXPR_EXT_SIZEOF) K(AST_EXPR_EXT_SIZEOF) \
K(AST_STMT_JUMP) \
K(AST_STMT_LABEL)
typedef enum ENUMPAK { AST_KINDS(GEN_ENUM) } ASTKind; typedef enum ENUMPAK { AST_KINDS(GEN_ENUM) } ASTKind;
extern const char *AST_KIND_STR[]; extern const char *AST_KIND_STR[];
@ -104,6 +106,7 @@ typedef enum ENUMPAK {
UNOP_NEGATE = 1, UNOP_NEGATE = 1,
UNOP_BITWISE_NOT = 2, UNOP_BITWISE_NOT = 2,
UNOP_REF = 3, UNOP_REF = 3,
UNOP_NOT = 4,
} UnaryOp; } UnaryOp;
union AST; union AST;
@ -182,6 +185,10 @@ typedef struct {
typedef struct { typedef struct {
ASTBase; ASTBase;
union AST *next; union AST *next;
// Used for reaching definition analysis
bool dirty;
ReachingDefs rd;
} ASTStmt; } ASTStmt;
typedef struct { typedef struct {
@ -307,6 +314,19 @@ typedef struct {
Type *ofType; Type *ofType;
} ASTExprExtSizeOf; } ASTExprExtSizeOf;
typedef struct {
ASTStmt;
union AST *condition;
char *label;
} ASTStmtJump;
typedef struct {
ASTStmt;
char *name;
} ASTStmtLabel;
typedef union AST { typedef union AST {
ASTBase; ASTBase;
@ -336,6 +356,8 @@ typedef union AST {
ASTStmtExtOrg stmtExtOrg; ASTStmtExtOrg stmtExtOrg;
ASTStmtExtSection stmtExtSection; ASTStmtExtSection stmtExtSection;
ASTExprExtSizeOf exprExtSizeOf; ASTExprExtSizeOf exprExtSizeOf;
ASTStmtJump stmtJump;
ASTStmtLabel stmtLabel;
} AST; } AST;
#pragma pack(pop) #pragma pack(pop)
@ -369,4 +391,6 @@ void ast_sroa(AST *tlc);
// Must be done before ast_sroa. // Must be done before ast_sroa.
void ast_segmented_dereference(AST *tlc); void ast_segmented_dereference(AST *tlc);
void ast_linearize(AST *tlc);
#endif #endif

View File

@ -96,9 +96,12 @@ int main(int argc_, char **argv_) {
ast_segmented_dereference(chunk); ast_segmented_dereference(chunk);
ast_sroa(chunk); ast_sroa(chunk);
dumben_pre(chunk);
dumben_go(chunk);
ast_linearize(chunk);
dumben_pre(chunk);
dumben_go(chunk);
while(!cg_go(chunk)) { while(!cg_go(chunk)) {
dumben_go(chunk); dumben_go(chunk);
} }

View File

@ -957,6 +957,8 @@ static AST *parse_declaration(Parser *P) {
decl->next = NULL; decl->next = NULL;
decl->expression = expr; decl->expression = expr;
entry->data.var.declaration = (AST*) decl;
ret = (AST*) decl; ret = (AST*) decl;
} else { } else {

View File

@ -6,7 +6,7 @@
#include<assert.h> #include<assert.h>
#include<stdio.h> #include<stdio.h>
struct ReachingDefs *reachingdefs_push(struct ReachingDefs *this) { /*struct ReachingDefs *reachingdefs_push(struct ReachingDefs *this) {
struct ReachingDefs *ret = calloc(1, sizeof(*ret)); struct ReachingDefs *ret = calloc(1, sizeof(*ret));
ret->parent = this; ret->parent = this;
return ret; return ret;
@ -29,7 +29,7 @@ void reachingdefs_set(struct ReachingDefs *this, union AST *def) {
this->defs = realloc(this->defs, sizeof(*this->defs) * this->defCount); this->defs = realloc(this->defs, sizeof(*this->defs) * this->defCount);
this->defs[0] = def; this->defs[0] = def;
this->excludeParent = 1; this->excludeParent = 1;
} }*/
Scope *scope_new(Scope *parent) { Scope *scope_new(Scope *parent) {
Scope *ret = calloc(1, sizeof(*ret)); Scope *ret = calloc(1, sizeof(*ret));

View File

@ -23,13 +23,7 @@ typedef struct UseDef {
typedef struct ReachingDefs { typedef struct ReachingDefs {
size_t defCount; size_t defCount;
union AST **defs; union AST **defs;
int excludeParent;
struct ReachingDefs *parent;
} ReachingDefs; } ReachingDefs;
struct ReachingDefs *reachingdefs_push(struct ReachingDefs*);
struct ReachingDefs *reachingdefs_coalesce(struct ReachingDefs*);
void reachingdefs_set(struct ReachingDefs*, union AST*);
struct Scope; struct Scope;
typedef struct ScopeItem { typedef struct ScopeItem {
@ -66,9 +60,8 @@ typedef struct ScopeItem {
int registerClass; int registerClass;
// Used by ast_usedef_pass // Used by ast_usedef_pass
//ReachingDefs *reachingDefs;
union AST *declaration; union AST *declaration;
ReachingDefs *reachingDefs;
bool usedInLoop;
UseDef *usedefFirst; UseDef *usedefFirst;
UseDef *usedefLast; UseDef *usedefLast;

View File

@ -10,13 +10,13 @@
static const char *BINOP_SIMPLE_INSTRS[] = {[BINOP_ADD] = "add", [BINOP_SUB] = "sub", [BINOP_BITWISE_AND] = "and", [BINOP_BITWISE_OR] = "or", [BINOP_BITWISE_XOR] = "xor"}; static const char *BINOP_SIMPLE_INSTRS[] = {[BINOP_ADD] = "add", [BINOP_SUB] = "sub", [BINOP_BITWISE_AND] = "and", [BINOP_BITWISE_OR] = "or", [BINOP_BITWISE_XOR] = "xor"};
static size_t nextLocalLabel = 0; /*static size_t nextLocalLabel = 0;*/
typedef struct { typedef struct {
#define LOOPSTACKSIZE 96 /*#define LOOPSTACKSIZE 96
size_t loopStackStart[LOOPSTACKSIZE]; size_t loopStackStart[LOOPSTACKSIZE];
size_t loopStackEnd[LOOPSTACKSIZE]; size_t loopStackEnd[LOOPSTACKSIZE];
size_t loopStackIdx; size_t loopStackIdx;*/
int isFunction; int isFunction;
@ -248,7 +248,21 @@ void cg_chunk(CGState *cg, AST *a) {
// Potentially complex pattern matching // Potentially complex pattern matching
while(s) { while(s) {
if(s->nodeKind == AST_STMT_EXT_SECTION) { if(s->nodeKind == AST_STMT_LABEL) {
printf(".%s:\n", s->stmtLabel.name);
} else if(s->nodeKind == AST_STMT_JUMP) {
if(s->stmtJump.condition) {
assert(s->stmtJump.condition->nodeKind == AST_EXPR_BINARY_OP && binop_is_comparison(s->stmtJump.condition->exprBinOp.operator));
printf("cmp %s, %s\n", xop(cg->tlc, s->stmtJump.condition->exprBinOp.operands[0]), xop(cg->tlc, s->stmtJump.condition->exprBinOp.operands[1]));
printf("j%s .%s\n", xj(s->stmtJump.condition->exprBinOp.operator), s->stmtJump.label);
} else {
printf("jmp .%s\n", s->stmtJump.label);
}
} else if(s->nodeKind == AST_STMT_EXT_SECTION) {
Token t = s->stmtExtSection.name; Token t = s->stmtExtSection.name;
printf("section %.*s\n", (int) t.length, t.content); printf("section %.*s\n", (int) t.length, t.content);
@ -310,6 +324,8 @@ void cg_chunk(CGState *cg, AST *a) {
ast_sroa(s->stmtDecl.expression->exprFunc.chunk); ast_sroa(s->stmtDecl.expression->exprFunc.chunk);
ast_linearize(s->stmtDecl.expression->exprFunc.chunk);
dumben_pre(s->stmtDecl.expression->exprFunc.chunk); dumben_pre(s->stmtDecl.expression->exprFunc.chunk);
dumben_go(s->stmtDecl.expression->exprFunc.chunk); dumben_go(s->stmtDecl.expression->exprFunc.chunk);
@ -455,7 +471,7 @@ void cg_chunk(CGState *cg, AST *a) {
} }
} }
} else if(s->nodeKind == AST_STMT_LOOP) { } /*else if(s->nodeKind == AST_STMT_LOOP) {
size_t lbl0 = nextLocalLabel++; size_t lbl0 = nextLocalLabel++;
size_t lbl1 = nextLocalLabel++; size_t lbl1 = nextLocalLabel++;
@ -495,7 +511,7 @@ void cg_chunk(CGState *cg, AST *a) {
printf(".L%lu:\n", lbl); printf(".L%lu:\n", lbl);
} else if(s->nodeKind == AST_STMT_RETURN) { }*/ else if(s->nodeKind == AST_STMT_RETURN) {
if(s->stmtReturn.val) { if(s->stmtReturn.val) {
assert(s->stmtReturn.val->nodeKind == AST_EXPR_VAR); assert(s->stmtReturn.val->nodeKind == AST_EXPR_VAR);
@ -519,7 +535,7 @@ void cg_chunk(CGState *cg, AST *a) {
} else { } else {
stahp_node(s, "Unknown statement caught by code generator."); stahp_node(s, "Unknown statement %s caught by code generator.", AST_KIND_STR[s->nodeKind]);
} }

View File

@ -24,7 +24,6 @@ static ScopeItem *create_dumbtemp(AST *tlc, Type *itstype) {
vte->data.var.precolored = false; vte->data.var.precolored = false;
vte->data.var.degree = 0; vte->data.var.degree = 0;
vte->data.var.priority = 0; vte->data.var.priority = 0;
vte->data.var.reachingDefs = NULL;
vte->data.var.name = malp("$dumb%lu", vidx++); vte->data.var.name = malp("$dumb%lu", vidx++);
// Add to var array // Add to var array
@ -95,9 +94,9 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST *
AST *s = *nptr; AST *s = *nptr;
if(s->nodeKind == AST_STMT_IF) { if(s->nodeKind == AST_STMT_JUMP && s->stmtJump.condition) {
AST *e = s->stmtIf.expression; AST *e = s->stmtJump.condition;
if(e->nodeKind == AST_EXPR_BINARY_OP && binop_is_comparison(e->exprBinOp.operator)) { if(e->nodeKind == AST_EXPR_BINARY_OP && binop_is_comparison(e->exprBinOp.operator)) {
@ -120,7 +119,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST *
} else { } else {
s->stmtIf.expression = varify(tlc, chu, stmtPrev, s, e); s->stmtJump.condition = varify(tlc, chu, stmtPrev, s, e);
this->effective = 1; this->effective = 1;
} }
@ -507,6 +506,14 @@ static void denoop_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST
*nptr = n->exprBinOp.operands[0]; *nptr = n->exprBinOp.operands[0];
*success = true;
} else if(n->nodeKind == AST_EXPR_UNARY_OP && n->exprUnOp.operator == UNOP_NOT && n->exprUnOp.operand->nodeKind == AST_EXPR_BINARY_OP) {
// Turn `!(a op b)` to `(a !op b)`
n->exprUnOp.operand->exprBinOp.operator = binop_comp_opposite(n->exprUnOp.operand->exprBinOp.operator);
*nptr = n->exprUnOp.operand;
*success = true; *success = true;
} else if(n->nodeKind == AST_EXPR_CAST && n->exprCast.what->expression.type->type == TYPE_TYPE_POINTER && n->exprCast.to->type == TYPE_TYPE_POINTER) { } else if(n->nodeKind == AST_EXPR_CAST && n->exprCast.what->expression.type->type == TYPE_TYPE_POINTER && n->exprCast.to->type == TYPE_TYPE_POINTER) {
// Turn (x as A*) into x, since all pointer types are identical in Nectar's AST // Turn (x as A*) into x, since all pointer types are identical in Nectar's AST