From 14ab1f432d2e4342016b0fb545553da7e8148281 Mon Sep 17 00:00:00 2001 From: Mid <> Date: Mon, 15 Sep 2025 17:10:38 +0300 Subject: [PATCH] Correct type sizes under different ABIs --- src/ast/ast.c | 5 ++ src/ast/dump.c | 12 +++ src/scope.c | 5 ++ src/types.c | 8 ++ src/types.h | 8 ++ src/x86/cg.c | 193 ++++++++++------------------------------- src/x86/dumberdowner.c | 102 ++++++++++++++-------- 7 files changed, 149 insertions(+), 184 deletions(-) diff --git a/src/ast/ast.c b/src/ast/ast.c index c2ddb60..cae4c87 100644 --- a/src/ast/ast.c +++ b/src/ast/ast.c @@ -177,6 +177,11 @@ static void *memdup(void *a, size_t len) { return r; } +/* + * WARNING: Just because you deep copy an AST node, does not mean + * ast_expression_equal will return true! This matters for example with + * function calls (a function call is not equal to itself). + */ AST *ast_deep_copy(AST *src) { if(src->nodeKind == AST_EXPR_VAR) { return memdup(src, sizeof(ASTExprVar)); diff --git a/src/ast/dump.c b/src/ast/dump.c index 4bcd268..796e993 100644 --- a/src/ast/dump.c +++ b/src/ast/dump.c @@ -274,6 +274,18 @@ static char *ast_dumps(AST *tlc, AST *s) { return malp("%s; /* loop guard */", name); } else if(s->nodeKind == AST_STMT_EXPR) { return ast_dumpe(tlc, s->stmtExpr.expr); + } else if(s->nodeKind == AST_STMT_DECL) { + char *a = type_to_string(s->stmtDecl.thing->type); + char *c; + if(s->stmtDecl.expression) { + char *b = ast_dumpe(tlc, s->stmtDecl.expression); + c = malp("%s %s = %s;", a, s->stmtDecl.thing->data.var.name, b); + free(b); + } else { + c = malp("%s %s;", a, s->stmtDecl.thing->data.var.name); + } + free(a); + return c; } else if(s->nodeKind == AST_STMT_RETURN) { if(s->stmtReturn.val) { char *e = ast_dumpe(tlc, s->stmtReturn.val); diff --git a/src/scope.c b/src/scope.c index 7fa8dfe..34a282d 100644 --- a/src/scope.c +++ b/src/scope.c @@ -5,6 +5,7 @@ #include #include #include +#include"x86/arch.h" Scope *scope_new(Scope *parent) { Scope *ret = calloc(1, sizeof(*ret)); @@ -81,6 +82,10 @@ void vte_precolor(ScopeItem *vte, int class, int color) { assert(vte->kind == SCOPEITEM_VAR && "vte must be var"); assert(!vte->data.var.precolored && "already precolored"); + if(type_size(vte->type) > 0) { + assert(type_size(vte->type) == REG_CLASSES[class].rsS[color] && "Sizes must match in precoloring"); + } + vte->data.var.precolored = true; vte->data.var.preclassed = true; vte->data.var.registerClass = class; diff --git a/src/types.c b/src/types.c index 759c0d2..db06d24 100644 --- a/src/types.c +++ b/src/types.c @@ -364,3 +364,11 @@ Type *type_shallow_copy(Type *t) { } abort(); } + +Type *type_prim_cast(Type *t, size_t bits) { + assert(t->type == TYPE_TYPE_PRIMITIVE); + + Type *t2 = type_shallow_copy(t); + t2->primitive.width = bits; + return t2; +} diff --git a/src/types.h b/src/types.h index 1360ae4..561a437 100644 --- a/src/types.h +++ b/src/types.h @@ -111,6 +111,8 @@ Type *type_shallow_copy(Type *t); bool type_is_generic(Type *t); +Type *type_prim_cast(Type *t, size_t bits); + static inline bool type_is_segmented_pointer(Type *type) { return type->type == TYPE_TYPE_RECORD && !!strstr(type->record.name, " @far*"); } @@ -121,4 +123,10 @@ static inline Type *type_u(size_t bits) { return primitive_parse(buf); } +static inline Type *type_s(size_t bits) { + char buf[32]; + snprintf(buf, sizeof(buf), "s%lu", bits); + return primitive_parse(buf); +} + #endif diff --git a/src/x86/cg.c b/src/x86/cg.c index cdea206..5f24614 100644 --- a/src/x86/cg.c +++ b/src/x86/cg.c @@ -12,6 +12,11 @@ static const char *BINOP_SIMPLE_INSTRS[] = {[BINOP_ADD] = "add", [BINOP_SUB] = " /*static size_t nextLocalLabel = 0;*/ +struct CalleeSavedState { + const char *reg[MAX_REGS_PER_CLASS]; + size_t stackOffset[MAX_REGS_PER_CLASS]; +}; + typedef struct { /*#define LOOPSTACKSIZE 96 size_t loopStackStart[LOOPSTACKSIZE]; @@ -21,6 +26,8 @@ typedef struct { int isFunction; AST *tlc; + + struct CalleeSavedState calleeSaved; } CGState; static const char *direct(int size) { @@ -258,6 +265,17 @@ void cg_chunk(CGState *cg, AST *a) { } else { printf("sub esp, %lu\n", a->chunk.stackReservation); } + + for(int i = 0; i < MAX_REGS_PER_CLASS && cg->calleeSaved.reg[i]; i++) { + if(x86_ia16()) { + printf("mov [bp + %li], %s\n", cg->calleeSaved.stackOffset[i] - a->chunk.stackReservation, cg->calleeSaved.reg[i]); + } else { + printf("mov [esp + %li], %s\n", cg->calleeSaved.stackOffset[i], cg->calleeSaved.reg[i]); + } + } + } else { + // If there's no stack reservation, there can't be callee-saved regs. + assert(cg->calleeSaved.reg[0] == NULL); } // Potentially complex pattern matching @@ -546,10 +564,17 @@ void cg_chunk(CGState *cg, AST *a) { if(s->stmtReturn.val) { assert(s->stmtReturn.val->nodeKind == AST_EXPR_VAR); assert(s->stmtReturn.val->exprVar.thing->kind == SCOPEITEM_VAR); - //assert(s->stmtReturn.val->exprVar.thing->data.var.color == COLOR_EAX); } if(a->chunk.stackReservation) { + for(int i = 0; i < MAX_REGS_PER_CLASS && cg->calleeSaved.reg[i]; i++) { + if(x86_ia16()) { + printf("mov [bp + %li], %s\n", cg->calleeSaved.stackOffset[i] - a->chunk.stackReservation, cg->calleeSaved.reg[i]); + } else { + printf("mov [esp + %li], %s\n", cg->calleeSaved.stackOffset[i], cg->calleeSaved.reg[i]); + } + } + if(x86_ia16()) { printf("add sp, %lu\n", a->chunk.stackReservation); } else { @@ -603,170 +628,38 @@ static bool var_collision(AST *tlc, ScopeItem *v1, ScopeItem *v2) { return liveRangeIntersection && resourceIntersection; } -struct CalleeSavedState { - AST *targetTLC; - - ScopeItem *calleeUsers[MAX_REGS_PER_CLASS]; - size_t calleeOffsets[MAX_REGS_PER_CLASS]; - - // To make sure we don't process the same return statement to infinity - AST *lastProcessedReturn; -}; -static void callee_saved_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) { - struct CalleeSavedState *this = ud; - - AST *n = *nptr; - - if(tlc != this->targetTLC) { - // Don't do anything. - return; - } - - if(n == tlc) { - // Function entry - - for(int i = 0; i < MAX_REGS_PER_CLASS; i++) { - ScopeItem *vte = this->calleeUsers[i]; - - if(!vte) { - continue; - } - - ASTExprStackPointer *stk = calloc(1, sizeof(*stk)); - stk->nodeKind = AST_EXPR_STACK_POINTER; - stk->type = primitive_parse("u32"); - - ASTExprPrimitive *offset = calloc(1, sizeof(*offset)); - offset->nodeKind = AST_EXPR_PRIMITIVE; - offset->type = primitive_parse("u32"); - offset->val = this->calleeOffsets[i]; - offset->stackGrowth = true; - - ASTExprBinaryOp *sum = calloc(1, sizeof(*sum)); - sum->nodeKind = AST_EXPR_BINARY_OP; - sum->type = offset->type; - sum->operator = BINOP_ADD; - sum->operands[0] = (AST*) stk; - sum->operands[1] = (AST*) offset; - - ASTExprUnaryOp *deref = calloc(1, sizeof(*deref)); - deref->nodeKind = AST_EXPR_UNARY_OP; - deref->type = offset->type; - deref->operator = UNOP_DEREF; - deref->operand = (AST*) sum; - - ASTExprVar *ev = calloc(1, sizeof(*ev)); - ev->nodeKind = AST_EXPR_VAR; - ev->type = vte->type; - ev->thing = vte; - - ASTStmtAssign *assign = calloc(1, sizeof(*assign)); - assign->nodeKind = AST_STMT_ASSIGN; - assign->what = (AST*) deref; - assign->to = (AST*) ev; - - assign->next = tlc->chunk.statementFirst; - tlc->chunk.statementFirst = (AST*) assign; - - assert(tlc->chunk.statementLast != NULL); - } - - } else if(n->nodeKind == AST_STMT_RETURN && n != this->lastProcessedReturn) { - // Function exit - - this->lastProcessedReturn = n; - - for(int i = 0; i < MAX_REGS_PER_CLASS; i++) { - ScopeItem *vte = this->calleeUsers[i]; - - if(!vte) { - continue; - } - - ASTExprStackPointer *stk = calloc(1, sizeof(*stk)); - stk->nodeKind = AST_EXPR_STACK_POINTER; - stk->type = primitive_parse("u32"); - - ASTExprPrimitive *offset = calloc(1, sizeof(*offset)); - offset->nodeKind = AST_EXPR_PRIMITIVE; - offset->type = primitive_parse("u32"); - offset->val = this->calleeOffsets[i]; - offset->stackGrowth = true; - - ASTExprBinaryOp *sum = calloc(1, sizeof(*sum)); - sum->nodeKind = AST_EXPR_BINARY_OP; - sum->type = offset->type; - sum->operator = BINOP_ADD; - sum->operands[0] = (AST*) stk; - sum->operands[1] = (AST*) offset; - - ASTExprUnaryOp *deref = calloc(1, sizeof(*deref)); - deref->nodeKind = AST_EXPR_UNARY_OP; - deref->type = offset->type; - deref->operator = UNOP_DEREF; - deref->operand = (AST*) sum; - - ASTExprVar *ev = calloc(1, sizeof(*ev)); - ev->nodeKind = AST_EXPR_VAR; - ev->type = vte->type; - ev->thing = vte; - - ASTStmtAssign *assign = calloc(1, sizeof(*assign)); - assign->nodeKind = AST_STMT_ASSIGN; - assign->what = (AST*) ev; - assign->to = (AST*) deref; - assign->next = stmt; - - if(stmtPrev) { - stmtPrev->statement.next = (AST*) assign; - } else { - tlc->chunk.statementFirst = (AST*) assign; - } - stmtPrev = (AST*) assign; - } - - } -} - -static void callee_saved(AST *tlc) { - ScopeItem *ebxuser = NULL, *ediuser = NULL, *esiuser = NULL; +static void callee_saved(AST *tlc, struct CalleeSavedState *state) { + bool ebxused = false, ediused = false, esiused = false; for(size_t v = 0; v < tlc->chunk.varCount; v++) { if(is_reg_b(tlc->chunk.vars[v]->data.var.registerClass, tlc->chunk.vars[v]->data.var.color)) { - ebxuser = tlc->chunk.vars[v]; + ebxused = true; } if(is_reg_di(tlc->chunk.vars[v]->data.var.registerClass, tlc->chunk.vars[v]->data.var.color)) { - ediuser = tlc->chunk.vars[v]; + ediused = true; } if(is_reg_si(tlc->chunk.vars[v]->data.var.registerClass, tlc->chunk.vars[v]->data.var.color)) { - esiuser = tlc->chunk.vars[v]; + esiused = true; } } - struct CalleeSavedState state = {}; - state.targetTLC = tlc; - size_t nextUser = 0; - if(ebxuser) { - state.calleeOffsets[nextUser] = nextUser * 4; - state.calleeUsers[nextUser] = ebxuser; + if(ebxused) { + state->stackOffset[nextUser] = nextUser * x86_max_gpr_size(); + state->reg[nextUser] = x86_ia16() ? "bx" : "ebx"; nextUser++; } - if(esiuser) { - state.calleeOffsets[nextUser] = nextUser * 4; - state.calleeUsers[nextUser] = esiuser; + if(esiused) { + state->stackOffset[nextUser] = nextUser * x86_max_gpr_size(); + state->reg[nextUser] = x86_ia16() ? "si" : "esi"; nextUser++; } - if(ediuser) { - state.calleeOffsets[nextUser] = nextUser * 4; - state.calleeUsers[nextUser] = ediuser; + if(ediused) { + state->stackOffset[nextUser] = nextUser * x86_max_gpr_size(); + state->reg[nextUser] = x86_ia16() ? "di" : "edi"; nextUser++; } - ast_grow_stack_frame(tlc, nextUser * 4); - - if(nextUser) { - generic_visitor(&tlc, NULL, NULL, tlc, tlc, &state, callee_saved_visitor, NULL); - } + ast_grow_stack_frame(tlc, nextUser * x86_max_gpr_size()); } static void determine_register_classes(AST *tlc) { @@ -949,14 +842,16 @@ cont:; return 0; } + struct CalleeSavedState calleeSaved = {}; if(a->chunk.functionType) { - callee_saved(a); + callee_saved(a, &calleeSaved); } CGState cg; memset(&cg, 0, sizeof(cg)); cg.tlc = a; cg.isFunction = !!a->chunk.functionType; + cg.calleeSaved = calleeSaved; cg_chunk(&cg, a); diff --git a/src/x86/dumberdowner.c b/src/x86/dumberdowner.c index 703daee..00916da 100644 --- a/src/x86/dumberdowner.c +++ b/src/x86/dumberdowner.c @@ -83,6 +83,32 @@ static void mark_ptr(AST *a) { } } +static void mark_a(ScopeItem *si) { + size_t sz = type_size(si->type); + if(sz <= 1) { + vte_precolor(si, REG_CLASS_8, 0); + } else if(sz == 2) { + vte_precolor(si, REG_CLASS_16_32, 0); + } else if(sz == 4 || sz == 0) { + vte_precolor(si, REG_CLASS_16_32, 1); + } else { + abort(); + } +} + +static void mark_d(ScopeItem *si) { + size_t sz = type_size(si->type); + if(sz <= 1) { + vte_precolor(si, REG_CLASS_8, 6); + } else if(sz == 2) { + vte_precolor(si, REG_CLASS_16_32, 6); + } else if(sz == 4 || sz == 0) { + vte_precolor(si, REG_CLASS_16_32, 7); + } else { + abort(); + } +} + struct DumbenState { AST *targetTLC; int effective; @@ -233,7 +259,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * retval = s->stmtReturn.val = varify(tlc, chu, stmtPrev, s, retval); this->effective = 1; - vte_precolor(retval->exprVar.thing, REG_CLASS_16_32, 1); + mark_a(retval->exprVar.thing); } @@ -246,7 +272,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * s->stmtExpr.expr = varify(tlc, chu, stmtPrev, s, s->stmtExpr.expr); - vte_precolor(s->stmtExpr.expr->exprVar.thing, REG_CLASS_16_32, 1); + mark_a(s->stmtExpr.expr->exprVar.thing); // Purge this statement entirely, otherwise we'd have // a = f(x); @@ -273,7 +299,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * if(s->stmtAssign.to->nodeKind == AST_EXPR_CALL && (s->stmtAssign.what->nodeKind != AST_EXPR_VAR || s->stmtAssign.what->exprVar.thing->kind != SCOPEITEM_VAR || !s->stmtAssign.what->exprVar.thing->data.var.precolored)) { ScopeItem *tmp = create_dumbtemp(tlc, s->stmtAssign.what->expression.type); - vte_precolor(tmp, REG_CLASS_16_32, 1); + mark_a(tmp); ASTExprVar *ev[2] = {calloc(1, sizeof(**ev)), calloc(1, sizeof(**ev))}; ev[0]->nodeKind = AST_EXPR_VAR; @@ -392,19 +418,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * AST *hihalf = varify(tlc, chu, stmtPrev->statement.next, s, mulhi); - switch(type_size(s->stmtAssign.what->expression.type)) { - case 1: - vte_precolor(hihalf->exprVar.thing, REG_CLASS_8, 6); - break; - case 2: - vte_precolor(hihalf->exprVar.thing, REG_CLASS_16_32, 6); - break; - case 4: - vte_precolor(hihalf->exprVar.thing, REG_CLASS_16_32, 7); - break; - default: - abort(); - } + mark_d(hihalf->exprVar.thing); } s->stmtAssign.what = ast_deep_copy(s->stmtAssign.to->exprBinOp.operands[0]); @@ -417,19 +431,7 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * s->stmtAssign.next = (AST*) redest; - switch(type_size(s->stmtAssign.what->expression.type)) { - case 1: - vte_precolor(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing, REG_CLASS_8, 0); - break; - case 2: - vte_precolor(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing, REG_CLASS_16_32, 0); - break; - case 4: - vte_precolor(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing, REG_CLASS_16_32, 1); - break; - default: - abort(); - } + mark_a(s->stmtAssign.to->exprBinOp.operands[0]->exprVar.thing); this->effective = 1; } else assert(because == NOT_AT_ALL_IT || because == GUCCI); @@ -476,11 +478,6 @@ static void dumben_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chu, AST * } } -struct DenoopState { - AST *targetTLC; - bool success; -}; - static void pre_dumb_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) { AST *n = *nptr; @@ -524,7 +521,7 @@ static void pre_dumb_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, A ass->nodeKind = AST_STMT_ASSIGN; ass->next = NULL; ass->what = (AST*) evar; - ass->to = (AST*) deref; + ass->to = ast_cast_expr((AST*) deref, vte->type); // Must cast because of "convention correctness" ass->next = n->chunk.statementFirst; if(n->chunk.statementFirst) { @@ -591,6 +588,11 @@ static bool is_double_field_access(AST *e) { return e->nodeKind == AST_EXPR_BINARY_OP && is_pointer2pointer_cast(e->exprBinOp.operands[0]) && e->exprBinOp.operands[0]->exprCast.what->nodeKind == AST_EXPR_BINARY_OP && e->exprBinOp.operands[0]->exprCast.what->exprBinOp.operator == e->exprBinOp.operator && e->exprBinOp.operator == BINOP_ADD && e->exprBinOp.operands[0]->exprCast.what->exprBinOp.operands[1]->nodeKind == AST_EXPR_PRIMITIVE && e->exprBinOp.operands[1]->nodeKind == AST_EXPR_PRIMITIVE; } +struct DenoopState { + AST *targetTLC; + bool success; +}; + static void denoop_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) { struct DenoopState *state = ud; @@ -700,7 +702,37 @@ void ast_denoop(AST *tlc, AST **node) { } while(state.success); } +/* + * The convention correctness pass converts all function calls & definitions to the form that matches + * the architecture most closely. For example, arguments (and return values) in cdecl are always + * passed as 32-bit integers, even if they are defined as 8-bit or 16-bit in the source. + * + * TODO: convert records to proper form also. + */ +static void convention_correctness_visitor(AST **nptr, AST *stmt, AST *stmtPrev, AST *chunk, AST *tlc, void *ud) { + if(tlc != ud) { + return; + } + + AST *n = *nptr; + + if(n->nodeKind == AST_STMT_DECL && n->stmtDecl.thing->kind == SCOPEITEM_SYMBOL && n->stmtDecl.thing->type->type == TYPE_TYPE_FUNCTION) { + Type *type = n->stmtDecl.thing->type; + + assert(n->stmtDecl.expression->exprFunc.chunk->chunk.functionType == type); + assert(type->function.ret->type == TYPE_TYPE_PRIMITIVE || type->function.ret->type == TYPE_TYPE_POINTER); + + type->function.ret = type_prim_cast(type->function.ret, 8 * x86_max_gpr_size()); + + for(size_t i = 0; i < type->function.argCount; i++) { + assert(type->function.args[i]->type == TYPE_TYPE_PRIMITIVE || type->function.args[i]->type == TYPE_TYPE_POINTER); + type->function.args[i] = type_prim_cast(type->function.args[i], 8 * x86_max_gpr_size()); + } + } +} + void dumben_pre(AST *tlc) { + generic_visitor(&tlc, NULL, NULL, tlc, tlc, tlc, convention_correctness_visitor, NULL); generic_visitor(&tlc, NULL, NULL, tlc, tlc, tlc, pre_dumb_visitor, NULL); generic_visitor(&tlc, NULL, NULL, tlc, tlc, tlc, decompose_symbol_record_field_access, NULL);