#include"parse.h" #include #include #include #include"utils.h" #include"vartable.h" #include"reporting.h" #include #include #include"x86.h" #ifndef __GNUC__ static inline int __builtin_clzl(unsigned long x) { unsigned long n = 32; unsigned long y; y = x >>16; if (y != 0) { n = n -16; x = y; } y = x >> 8; if (y != 0) { n = n - 8; x = y; } y = x >> 4; if (y != 0) { n = n - 4; x = y; } y = x >> 2; if (y != 0) { n = n - 2; x = y; } y = x >> 1; if (y != 0) return n - 2; return n - x; } #endif typedef struct { Token *tokens; intmax_t i; Scope *scope; // Used to coalesce all scopes into one after parsing, to perform global register allocation ASTChunk *topLevel; // Used by pushstat to add statements ASTChunk *currentChunk; // Used to place guard variable uses after loops to stop reg allocation from fucking up Scope *loopScope; size_t guardedVarCount; ASTExprVar **guardedVars; // Used for generating statements that load & store arguments int isInFunction; // Skim mode disables parsing function definitions // This is needed to automatically forward-declare symbols int skimMode; // If this parser is for importing an external module, all symbols should become "extern" int externalify; } Parser; static void *alloc_node(Parser *P, size_t sz) { AST *a = calloc(1, sz); a->row = P->tokens[P->i].row; a->col = P->tokens[P->i].column; return a; } static Token get(Parser *P) { if(P->tokens[P->i].type == TOKEN_EOF) { return P->tokens[P->i]; } else { return P->tokens[P->i++]; } } static Token expect(Parser *P, TokenKind t) { Token tok = get(P); if(tok.type != t) { stahp(tok.row, tok.column, "Expected %s, got %s.", TOKEN_NAMES[t], TOKEN_NAMES[tok.type]); } return tok; } static Token peek(Parser *P, int depth) { int j = 0; for(; j < depth; j++) { if(P->tokens[P->i + j].type == TOKEN_EOF) { break; } } return P->tokens[P->i + j]; } static int maybe(Parser *P, TokenKind t) { if(peek(P, 0).type == t) { get(P); return 1; } return 0; } static void pushstat(Parser *P, void *a) { if(P->currentChunk->statementFirst) { P->currentChunk->statementLast->statement.next = a; P->currentChunk->statementLast = a; } else { P->currentChunk->statementFirst = P->currentChunk->statementLast = a; } } static ASTExprPrimitive *parse_prim(Parser *P) { ASTExprPrimitive *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_EXPR_PRIMITIVE; Token tok = get(P); const char *str = tok.content; long base = 10; if(strchr(str, 'r')) { if(!unstupid_strtol(str, (char**) &str, 10, &base)) { return NULL; } str++; /* Go past the r. */ } long val; if(!unstupid_strtol(str, NULL, base, &val)) { return NULL; } ret->val = val; // Smallest integer type to store number char buf[8]; snprintf(buf, sizeof(buf), "s%i", ret->val ? (64 - __builtin_clzl(ret->val - 1)) : 1); ret->type = (Type*) primitive_parse(buf); return ret; } static AST *exprvar(Parser *P, ScopeItem *v) { assert(v->kind != SCOPEITEM_TYPE); AST *a = alloc_node(P, sizeof(ASTExprVar)); a->nodeKind = AST_EXPR_VAR; a->exprVar.type = v->type; a->exprVar.thing = v; if(P->loopScope) { // XXX: O(n)!!!!!!!!! int inloop = 0; for(Scope *vt = v->owner; vt; vt = vt->parent) { if(vt->parent == P->loopScope) { inloop = 1; break; } } if(!inloop) { int alreadyAdded = 0; for(size_t i = 0; i < P->guardedVarCount; i++) { if(P->guardedVars[i]->thing == v) { alreadyAdded = 1; break; } } if(!alreadyAdded) { ASTExprVar *ev = alloc_node(P, sizeof(*ev)); memcpy(ev, a, sizeof(*ev)); P->guardedVars = realloc(P->guardedVars, sizeof(*P->guardedVars) * (P->guardedVarCount + 1)); P->guardedVars[P->guardedVarCount++] = ev; } } } return a; } ASTChunk *nct_parse_chunk(Parser*, int, int, Scope*, Type *ft); Type *nct_parse_typename(Parser *P); static bool parse_parametrization(Parser *P); static char *parametrize_function_name(Type *t, const char *original, Scope *scope); static void binop_implicit_cast(/*Parser *P, */ASTExprBinaryOp *binop) { if(type_size(binop->operands[0]->expression.type) < type_size(binop->operands[1]->expression.type)) { binop->operands[0] = ast_cast_expr(binop->operands[0], binop->operands[1]->expression.type); } if(type_size(binop->operands[1]->expression.type) < type_size(binop->operands[0]->expression.type)) { binop->operands[1] = ast_cast_expr(binop->operands[1], binop->operands[0]->expression.type); } if(!binop->type) { binop->type = binop->operands[0]->expression.type; } } AST *nct_parse_expression(Parser *P, int lOP) { if(lOP == 0) { size_t startTokI = P->i; // Test if this is an anonymous function Type *ft = nct_parse_typename(P); if(ft) { assert(ft->type == TYPE_TYPE_FUNCTION); ASTExprFunc *e = alloc_node(P, sizeof(*e)); e->nodeKind = AST_EXPR_FUNC; e->type = ft; if(type_is_generic(ft)) { // Don't parse a generic function because the types are unavailable size_t depth = 0; while(1) { TokenKind tk = get(P).type; if(tk == TOKEN_SQUIGGLY_L) depth++; else if(tk == TOKEN_SQUIGGLY_R) { if(--depth == 0) { break; } } } } else if(P->skimMode) { // In skim mode, it won't be parsed normally anyway } else { expect(P, TOKEN_ARROW); expect(P, TOKEN_SQUIGGLY_L); P->isInFunction++; e->chunk = (AST*) nct_parse_chunk(P, 1, 0, scope_new(P->scope), ft); e->chunk->chunk.functionType = ft; P->isInFunction--; expect(P, TOKEN_SQUIGGLY_R); } e->scope = P->scope; e->rangeTokens = P->tokens; e->startTokI = startTokI; e->endTokI = P->i; return (AST*) e; } } if(lOP == 6) { AST *e = NULL; if(peek(P, 0).type == TOKEN_NUMBER) { e = (AST*) parse_prim(P); } else if(peek(P, 0).type == TOKEN_IDENTIFIER) { if(!strcmp(peek(P, 0).content, "@stack")) { get(P); ASTExprStackPointer *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_EXPR_STACK_POINTER; ret->type = primitive_parse("u32"); e = (AST*) ret; } else if(!strcmp(peek(P, 0).content, "@salloc")) { get(P); expect(P, TOKEN_PAREN_L); ASTExprExtSalloc *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_EXPR_EXT_SALLOC; ret->size = nct_parse_typename(P); ret->type = type_pointer_wrap(ret->size); expect(P, TOKEN_PAREN_R); e = (AST*) ret; } else if(!strcmp(peek(P, 0).content, "@sizeof")) { get(P); ASTExprExtSizeOf *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_EXPR_EXT_SIZEOF; ret->ofType = nct_parse_typename(P); if(!ret->ofType) { ret->ofExpr = nct_parse_expression(P, lOP - 1); } ret->type = primitive_parse("u32"); e = (AST*) ret; } else { Token varname = get(P); ScopeItem *vte = scope_find(P->scope, varname.content); if(!vte) { stahp(varname.row, varname.column, "Unknown variable %s", varname.content); } e = (AST*) exprvar(P, vte); } } else if(peek(P, 0).type == TOKEN_STRING) { ASTExprStringLiteral *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_EXPR_STRING_LITERAL; Token tok = get(P); ret->type = &TYPE_ERROR; ret->data = tok.content; ret->length = tok.length; e = (AST*) ret; } else if(maybe(P, TOKEN_PAREN_L)) { e = nct_parse_expression(P, 0); expect(P, TOKEN_PAREN_R); } while(maybe(P, TOKEN_DOT)) { assert(e->expression.type->type == TYPE_TYPE_RECORD); Token fieldTok = expect(P, TOKEN_IDENTIFIER); ASTExprDot *d = alloc_node(P, sizeof(*d)); d->nodeKind = AST_EXPR_DOT; d->a = (AST*) e; bool foundField = false; for(size_t f = 0; f < e->expression.type->record.fieldCount; f++) { char *fieldName = e->expression.type->record.fieldNames[f]; if(!strcmp(fieldName, fieldTok.content)) { foundField = true; d->type = e->expression.type->record.fieldTypes[f]; d->b = strdup(fieldName); } } if(!foundField) { stahp(fieldTok.row, fieldTok.column, "Field %s does not exist.", fieldTok.content); } e = (AST*) d; } return e; } else if(lOP == 5) { if(maybe(P, TOKEN_STAR)) { ASTExprUnaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_UNARY_OP; astop->operator = UNOP_DEREF; astop->operand = nct_parse_expression(P, lOP); /* Not +1! */ astop->type = astop->operand->expression.type->pointer.of; return (AST*) astop; } else if(maybe(P, TOKEN_AMPERSAND)) { ASTExprUnaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_UNARY_OP; astop->operator = UNOP_REF; astop->operand = nct_parse_expression(P, lOP); astop->type = type_pointer_wrap(astop->operand->expression.type); return (AST*) astop; } else if(maybe(P, TOKEN_MINUS)) { AST *operand = nct_parse_expression(P, lOP); if(operand->nodeKind == AST_EXPR_PRIMITIVE) { operand->exprPrim.val *= -1; return operand; } else { ASTExprUnaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_UNARY_OP; astop->operator = UNOP_NEGATE; astop->operand = operand; astop->type = operand->expression.type; return (AST*) astop; } } else if(maybe(P, TOKEN_TILDE)) { AST *child = nct_parse_expression(P, lOP); if(child->nodeKind == AST_EXPR_PRIMITIVE) { child->exprPrim.val = ~child->exprPrim.val; return child; } else { ASTExprUnaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_UNARY_OP; astop->operator = UNOP_BITWISE_NOT; astop->operand = child; astop->type = child->expression.type; return (AST *) astop; } } else return nct_parse_expression(P, lOP + 1); } else if(lOP == 4) { AST *ret = nct_parse_expression(P, lOP + 1); while(peek(P, 0).type == TOKEN_PAREN_L || peek(P, 0).type == TOKEN_SQUAREN_L) { if(maybe(P, TOKEN_PAREN_L)) { if(ret->expression.type->type != TYPE_TYPE_FUNCTION) { stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Only function types may be called."); } ASTExprCall *call = alloc_node(P, sizeof(*call)); call->nodeKind = AST_EXPR_CALL; call->type = ret->expression.type->function.ret; call->what = ret; call->args = NULL; int argCount = 0; if(!maybe(P, TOKEN_PAREN_R)) { while(peek(P, 0).type != TOKEN_PAREN_R && peek(P, 0).type != TOKEN_COMMA) { call->args = realloc(call->args, (argCount + 1) * sizeof(AST*)); call->args[argCount] = ast_cast_expr(nct_parse_expression(P, 0), ret->expression.type->function.args[argCount]); argCount++; if(maybe(P, TOKEN_PAREN_R)) { break; } else expect(P, TOKEN_COMMA); } } if(argCount != call->what->expression.type->function.argCount) { stahp(P->tokens[P->i].row, P->tokens[P->i].column, "Mismatched number of arguments"); } ret = (AST*) call; } else if(peek(P, 0).type == TOKEN_SQUAREN_L) { P->scope = scope_new(P->scope); if(parse_parametrization(P)) { // Generic type parametrization // Generic functions are not first-class assert(ret->nodeKind == AST_EXPR_VAR); assert(ret->exprVar.thing != NULL); assert(ret->exprVar.thing->kind == SCOPEITEM_SYMBOL); char *cname = parametrize_function_name(ret->expression.type, ret->exprVar.thing->data.symbol.name, P->scope); ScopeItem *cvte = scope_find(P->scope, cname); if(!cvte) { stahp_token(&P->tokens[P->i], "Parametrization %s not found.", cname); } ret = exprvar(P, cvte); } else { // Array access expect(P, TOKEN_SQUAREN_L); ASTExprUnaryOp *ref = alloc_node(P, sizeof(*ref)); ref->nodeKind = AST_EXPR_UNARY_OP; ref->operator = UNOP_REF; ref->operand = ret; ref->type = type_pointer_wrap(ret->expression.type->array.of); ASTExprBinaryOp *child = alloc_node(P, sizeof(*child)); child->nodeKind = AST_EXPR_BINARY_OP; child->operands[0] = (AST*) ref; child->operands[1] = nct_parse_expression(P, 0); child->operator = BINOP_ADD; child->type = ref->type; if(ret->expression.type->type != TYPE_TYPE_ARRAY) { stahp_token(&P->tokens[P->i], "Attempt to index a non-array type"); } int typesize = type_size(ret->expression.type->array.of); if(typesize != 1) { ASTExprPrimitive *scale = alloc_node(P, sizeof(*scale)); scale->nodeKind = AST_EXPR_PRIMITIVE; scale->type = primitive_parse("u32"); scale->val = typesize; ASTExprBinaryOp *mul = alloc_node(P, sizeof(*mul)); mul->nodeKind = AST_EXPR_BINARY_OP; mul->type = child->operands[1]->expression.type; mul->operator = BINOP_MUL; mul->operands[0] = (AST*) scale; mul->operands[1] = child->operands[1]; child->operands[1] = (AST*) mul; } ASTExprUnaryOp *unop = alloc_node(P, sizeof(*unop)); unop->nodeKind = AST_EXPR_UNARY_OP; unop->type = ret->expression.type->array.of; unop->operator = UNOP_DEREF; unop->operand = (AST*) child; expect(P, TOKEN_SQUAREN_R); ret = (AST*) unop; } P->scope = P->scope->parent; } else abort(); } return ret; } else if(lOP == 3) { AST *ret = nct_parse_expression(P, lOP + 1); if(peek(P, 0).type == TOKEN_STAR || peek(P, 0).type == TOKEN_SLASH || peek(P, 0).type == TOKEN_STAR_CARET) { while(1) { BinaryOp op; if(maybe(P, TOKEN_STAR)) op = BINOP_MUL; else if(maybe(P, TOKEN_STAR_CARET)) op = BINOP_MULHI; else if(maybe(P, TOKEN_SLASH)) op = BINOP_DIV; else break; ASTExprBinaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_BINARY_OP; astop->type = ret->expression.type; astop->operator = op; astop->operands[0] = ret; AST *operand = nct_parse_expression(P, lOP + 1); if(operand->expression.type->type != TYPE_TYPE_PRIMITIVE) { stahp_token(&P->tokens[P->i], "Invalid combination of operator and operand types."); } astop->operands[1] = operand; if(!astop->type) { astop->type = operand->expression.type; } else { if(type_size(operand->expression.type) > type_size(astop->type)) { astop->type = operand->expression.type; } } binop_implicit_cast(astop); ret = (AST*) astop; } } return ret; } else if(lOP == 2) { AST *ret = nct_parse_expression(P, lOP + 1); if( peek(P, 0).type == TOKEN_PLUS || peek(P, 0).type == TOKEN_MINUS || peek(P, 0).type == TOKEN_AMPERSAND || peek(P, 0).type == TOKEN_VERTICAL_BAR || peek(P, 0).type == TOKEN_CARET ) { while(1) { BinaryOp op; if(maybe(P, TOKEN_PLUS)) op = BINOP_ADD; else if(maybe(P, TOKEN_MINUS)) op = BINOP_SUB; else if(maybe(P, TOKEN_AMPERSAND)) op = BINOP_BITWISE_AND; else if(maybe(P, TOKEN_VERTICAL_BAR)) op = BINOP_BITWISE_OR; else if(maybe(P, TOKEN_CARET)) op = BINOP_BITWISE_XOR; else break; ASTExprBinaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_BINARY_OP; astop->type = NULL; astop->operator = op; astop->operands[0] = ret; ASTExpr *operand = &(astop->operands[1] = nct_parse_expression(P, lOP + 1))->expression; if(!type_is_number(astop->operands[0]->expression.type) || !type_is_number(astop->operands[1]->expression.type)) { stahp_token(&P->tokens[P->i], "Attempt to perform arithmetic on non-number types."); } binop_implicit_cast(astop); ret = (AST*) astop; } } return ret; } else if(lOP == 1) { AST *ret = nct_parse_expression(P, lOP + 1); while(maybe(P, TOKEN_AS)) { Type *castTo = nct_parse_typename(P); ret = ast_cast_expr(ret, castTo); } return ret; } else if(lOP == 0) { AST *ret = nct_parse_expression(P, lOP + 1); if(peek(P, 0).type == TOKEN_DOUBLE_EQUALS || peek(P, 0).type == TOKEN_EXCLAMATION_EQUALS || peek(P, 0).type == TOKEN_LESS || peek(P, 0).type == TOKEN_GREATER || peek(P, 0).type == TOKEN_LEQUAL || peek(P, 0).type == TOKEN_GEQUAL) { while(1) { BinaryOp op; if(maybe(P, TOKEN_DOUBLE_EQUALS)) op = BINOP_EQUAL; else if(maybe(P, TOKEN_EXCLAMATION_EQUALS)) op = BINOP_NEQUAL; else if(maybe(P, TOKEN_LESS)) op = BINOP_LESS; else if(maybe(P, TOKEN_GREATER)) op = BINOP_GREATER; else if(maybe(P, TOKEN_LEQUAL)) op = BINOP_LEQUAL; else if(maybe(P, TOKEN_GEQUAL)) op = BINOP_GEQUAL; else break; ASTExprBinaryOp *astop = alloc_node(P, sizeof(*astop)); astop->nodeKind = AST_EXPR_BINARY_OP; astop->type = NULL; astop->operator = op; astop->operands[0] = ret; ASTExpr *operand = &(astop->operands[1] = nct_parse_expression(P, lOP + 1))->expression; if(operand->type->type != TYPE_TYPE_PRIMITIVE) { stahp_token(&P->tokens[P->i], "Invalid combination of operator and operand types."); } binop_implicit_cast(astop); ret = (AST*) astop; } } //ret = ast_expression_optimize(ret); return ret; } #ifdef DEBUG else abort(); #endif return NULL; } // This function modifies the current scope. // This function may backtrack. static bool parse_parametrization(Parser *P) { size_t oldIdx = P->i; if(!maybe(P, TOKEN_SQUAREN_L)) { goto backtrack; } intmax_t idx = 0; bool integerMode = false; while(1) { if(maybe(P, TOKEN_SQUAREN_R)) { break; } if(integerMode) { AST *n = nct_parse_expression(P, 0); if(!n) { goto backtrack; } ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_CEXPR; vte->type = n->expression.type; vte->data.cexpr.concrete = n; vte->data.cexpr.paramIdx = idx; char buf[64]; snprintf(buf, sizeof(buf), "%li%s", idx, "i"); scope_set(P->scope, buf, vte); } else { Type *t = nct_parse_typename(P); if(!t) { goto backtrack; } ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_TYPE; vte->type = t; vte->data.type.ptr = t; char buf[64]; snprintf(buf, sizeof(buf), "%li%s", idx, "t"); scope_set(P->scope, buf, vte); } idx++; if(maybe(P, TOKEN_SQUAREN_R)) { break; } if(maybe(P, TOKEN_SEMICOLON)) { idx = 0; integerMode = true; } else if(!maybe(P, TOKEN_COMMA)) { goto backtrack; } } return true; backtrack: P->i = oldIdx; return false; } /* Since this function backtracks, don't use aborting functions like expect. */ Type *nct_parse_typename(Parser *P) { int oldIdx = P->i; bool generics = peek(P, 0).type == TOKEN_SQUAREN_L; if(generics) { P->scope = scope_new(P->scope); parse_genericization(P); } if(peek(P, 0).type != TOKEN_IDENTIFIER) { goto backtrack; } Type *ret = NULL; Token id = expect(P, TOKEN_IDENTIFIER); ScopeItem *potentialVTE = scope_find(P->scope, id.content); if(potentialVTE && potentialVTE->kind == SCOPEITEM_TYPE) { ret = potentialVTE->data.type.ptr; } else { ret = (Type*) primitive_parse(id.content); } if(!ret) { goto backtrack; } while(peek(P, 0).type == TOKEN_PAREN_L || peek(P, 0).type == TOKEN_STAR || peek(P, 0).type == TOKEN_SQUAREN_L) { if(maybe(P, TOKEN_STAR)) { TypePointer *ptr = calloc(1, sizeof(*ptr)); ptr->type = TYPE_TYPE_POINTER; ptr->of = ret; ret = (Type*) ptr; } else if(maybe(P, TOKEN_PAREN_L)) { TypeFunction *fun = calloc(1, sizeof(*fun)); fun->type = TYPE_TYPE_FUNCTION; fun->ret = ret; fun->argCount = 0; fun->args = malloc(0); if(!maybe(P, TOKEN_PAREN_R)) { while(1) { fun->argCount++; fun->args = realloc(fun->args, sizeof(*fun->args) * fun->argCount); fun->argNames = realloc(fun->argNames, sizeof(*fun->argNames) * fun->argCount); if((fun->args[fun->argCount - 1] = nct_parse_typename(P)) == NULL) { free(fun); P->scope = P->scope->parent; goto backtrack; } if(peek(P, 0).type == TOKEN_IDENTIFIER) { fun->argNames[fun->argCount - 1] = get(P).content; } if(maybe(P, TOKEN_PAREN_R)) { break; } else expect(P, TOKEN_COMMA); } } ret = (Type*) fun; } else if(maybe(P, TOKEN_SQUAREN_L)) { size_t oldIdx = P->i; Type *t = nct_parse_typename(P); P->i = oldIdx; if(!t) { TypeArray *arr = calloc(1, sizeof(*arr)); arr->type = TYPE_TYPE_ARRAY; arr->of = ret; if(peek(P, 0).type == TOKEN_NUMBER) { ASTExprPrimitive *prim = parse_prim(P); arr->length = prim->val; free(prim); } else if(maybe(P, TOKEN_QUESTION_MARK)) { arr->length = -1; } else if(peek(P, 0).type == TOKEN_IDENTIFIER) { const char *what = expect(P, TOKEN_IDENTIFIER).content; ScopeItem *vte = scope_find(P->scope, what); if(!vte) { goto backtrack; } if(vte->kind != SCOPEITEM_CEXPR) { stahp_token(&P->tokens[P->i], "Variable '%s' is not constant.", what); } if(vte->data.cexpr.concrete) { AST *n = vte->data.cexpr.concrete; assert(n->nodeKind == AST_EXPR_PRIMITIVE); arr->length = n->exprPrim.val; arr->lengthIsGeneric = false; } else { arr->length = 0; arr->lengthIsGeneric = true; arr->lengthGenericParamIdx = 0; // TODO: SHIT arr->lengthGenericParamName = strdup(what); } } else { stahp_token(&P->tokens[P->i], "Array length must be either constant, generic or '?' (1)."); } ret = (Type*) arr; expect(P, TOKEN_SQUAREN_R); } else { assert(ret->type == TYPE_TYPE_RECORD); P->i--; P->scope = scope_new(P->scope); assert(parse_parametrization(P)); ret = type_parametrize(ret, P->scope); P->scope = P->scope->parent; } } } if(generics) { P->scope = P->scope->parent; } return ret; backtrack: if(generics) { P->scope = P->scope->parent; } P->i = oldIdx; return NULL; } /* Potentially backtracking. Returns NULL upon failure. */ static AST *parse_declaration(Parser *P) { int oldIdx = P->i; int isLocal = maybe(P, TOKEN_LOCAL); int isExternal = 0; if(!isLocal) { isExternal = maybe(P, TOKEN_EXTERN); } Type *type = nct_parse_typename(P); if(!type && (peek(P, 0).type != TOKEN_IDENTIFIER || peek(P, 1).type != TOKEN_COLON)) goto backtrack; if(peek(P, 0).type != TOKEN_IDENTIFIER) goto backtrack; Token name = expect(P, TOKEN_IDENTIFIER); ScopeItem *entry; if(peek(P, 0).type == TOKEN_COLON && (entry = scope_get(P->scope, name.content))) { /* Forward declared. */ } else { entry = calloc(sizeof(*entry), 1); entry->type = type; } AST *ret = NULL; if(maybe(P, TOKEN_EQUALS) || (peek(P, 0).type == TOKEN_SEMICOLON && !isExternal && !isLocal)) { if(isExternal || isLocal) { stahp(name.row, name.column, "'local' and 'extern' keywords are to be used for symbol declaration only."); return NULL; } entry->kind = SCOPEITEM_VAR; entry->data.var.priority = 1; entry->data.var.color = -1; ASTStmtAssign *assign = alloc_node(P, sizeof(*assign)); assign->nodeKind = AST_STMT_ASSIGN; assign->next = NULL; //entry->data.var.reachingDefs = reachingdefs_push(NULL); //reachingdefs_set(entry->data.var.reachingDefs, (AST*) assign); assign->what = exprvar(P, entry); assign->to = peek(P, 0).type == TOKEN_SEMICOLON ? NULL : ast_cast_expr(nct_parse_expression(P, 0), assign->what->expression.type); ret = (AST*) assign; } else { ASTStmtDecl *decl = alloc_node(P, sizeof(*ret)); decl->nodeKind = AST_STMT_DECL; decl->thing = entry; decl->next = NULL; if(maybe(P, TOKEN_COLON)) { if(isExternal) { fputs("External symbols may not be defined.\n", stderr); abort(); return NULL; } entry->kind = SCOPEITEM_SYMBOL; entry->data.symbol.isLocal = P->externalify ? false : isLocal; entry->data.symbol.isExternal = P->externalify ? true : isExternal; entry->data.symbol.name = name.content; decl->expression = nct_parse_expression(P, 0); if(type) { if(decl->expression) { decl->expression = ast_cast_expr(decl->expression, type); } } else { entry->type = decl->expression->expression.type; } if(decl->expression && decl->expression->nodeKind == AST_EXPR_FUNC && type_is_generic(decl->expression->expression.type)) { entry->data.symbol.genfunc.scope = decl->expression->exprFunc.scope; entry->data.symbol.genfunc.rangeTokens = decl->expression->exprFunc.rangeTokens; entry->data.symbol.genfunc.startTokI = decl->expression->exprFunc.startTokI; entry->data.symbol.genfunc.endTokI = decl->expression->exprFunc.endTokI; } } else if(isExternal) { entry->kind = SCOPEITEM_SYMBOL; entry->data.symbol.isLocal = isLocal; entry->data.symbol.isExternal = isExternal; entry->data.symbol.name = name.content; } else { abort(); } ret = (AST*) decl; } scope_set(P->scope, name.content, entry); if(P->skimMode) { // In skim mode parsing is not done normally } else { expect(P, TOKEN_SEMICOLON); } return ret; backtrack: P->i = oldIdx; return NULL; } static char *plus_underscore(char *s, char *ts) { size_t strlens = strlen(s); s = realloc(s, strlens + strlen(ts) + 2); memset(s + strlens, 0, strlen(ts) + 2); strcat(s, ts); s[strlen(s)] = '_'; return s; } static char *parametrize_function_name(Type *t, const char *original, Scope *scope) { char *s = calloc(1, strlen(original) + 1 + 1); strcpy(s, original); s[strlen(original)] = '_'; for(int i = 0;; i++) { char vtename[64]; snprintf(vtename, sizeof(vtename), "%it", i); // scope_get, NOT SCOPE_FIND! ScopeItem *vte = scope_get(scope, vtename); if(!vte) break; assert(vte->kind == SCOPEITEM_TYPE); s = plus_underscore(s, type_to_string(vte->data.type.ptr)); } for(int i = 0;; i++) { char vtename[64]; snprintf(vtename, sizeof(vtename), "%ii", i); // scope_get, NOT SCOPE_FIND! ScopeItem *vte = scope_get(scope, vtename); if(!vte) break; assert(vte->kind == SCOPEITEM_CEXPR); assert(vte->data.cexpr.concrete); assert(vte->data.cexpr.concrete->nodeKind == AST_EXPR_PRIMITIVE); char numstr[32]; snprintf(numstr, sizeof(numstr), "%i", vte->data.cexpr.concrete->exprPrim.val); s = plus_underscore(s, numstr); } // Remove last underscore s[strlen(s) - 1] = '\0'; return s; } /*static void add_parametrizations_to_scope(Parser *P, Parametrizations *parametrizations, Parametrizations *renames) { Parametrization *c = parametrizations->typeParams; Parametrization *g = renames->typeParams; while(c && g) { Type *ct = (Type*) c->param; Type *gt = (Type*) g->param; assert(!!ct == !!gt); ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_TYPE; vte->data.type.ptr = ct; scope_set(P->scope, gt->generic.paramName, vte); c = c->next; g = g->next; } size_t idx = 0; c = parametrizations->intParams; g = renames->intParams; while(c && g) { AST *node = c->param; char *name = g->param; assert(!!node == !!name); ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_CEXPR; vte->data.cexpr.paramIdx = idx; vte->data.cexpr.paramName = name; vte->data.cexpr.concrete = node; scope_set(P->scope, name, vte); c = c->next; g = g->next; idx++; } }*/ void nct_parse_statement(Parser *P) { if(maybe(P, TOKEN_IF)) { expect(P, TOKEN_PAREN_L); AST *e = nct_parse_expression(P, 0); expect(P, TOKEN_PAREN_R); ASTStmtIf *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_IF; ret->next = NULL; ret->expression = e; pushstat(P, ret); expect(P, TOKEN_SQUIGGLY_L); ret->then = (AST*) nct_parse_chunk(P, 0, 0, NULL, NULL); expect(P, TOKEN_SQUIGGLY_R); return; } else if(maybe(P, TOKEN_LOOP)) { ASTStmtLoop *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_LOOP; ret->next = NULL; int isFirstLoop = P->loopScope == NULL; if(isFirstLoop) { P->loopScope = P->scope; } expect(P, TOKEN_SQUIGGLY_L); ret->body = (AST*) nct_parse_chunk(P, 0, 1, NULL, NULL); expect(P, TOKEN_SQUIGGLY_R); pushstat(P, ret); if(isFirstLoop) { P->loopScope = NULL; for(size_t i = 0; i < P->guardedVarCount; i++) { ASTExprVar *ev = P->guardedVars[i]; AST *es = alloc_node(P, sizeof(ASTStmtExpr)); es->nodeKind = AST_STMT_EXPR; es->stmtExpr.expr = (AST*) ev; pushstat(P, es); } P->guardedVarCount = 0; free(P->guardedVars); P->guardedVars = NULL; } return; } else if(maybe(P, TOKEN_BREAK)) { ASTStmtBreak *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_BREAK; ret->next = NULL; expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else if(maybe(P, TOKEN_CONTINUE)) { ASTStmtContinue *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_CONTINUE; ret->next = NULL; expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else if(maybe(P, TOKEN_RETURN)) { ASTStmtReturn *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_RETURN; ret->next = NULL; if(!maybe(P, TOKEN_SEMICOLON)) { ret->val = nct_parse_expression(P, 0); expect(P, TOKEN_SEMICOLON); } pushstat(P, ret); return; } else if(maybe(P, TOKEN_RECORD)) { // Do nothing. This is handled in nct_parse_chunk. expect(P, TOKEN_IDENTIFIER); while(get(P).type != TOKEN_SQUIGGLY_L); size_t depth = 1; while(1) { TokenKind tk = get(P).type; if(tk == TOKEN_SQUIGGLY_L) { depth++; } else if(tk == TOKEN_SQUIGGLY_R) { depth--; if(depth == 0) { break; } } } return; } else if(maybe(P, TOKEN_USE)) { while(get(P).type != TOKEN_SEMICOLON); return; } else if(peek(P, 0).type == TOKEN_IDENTIFIER) { if(!strcmp(peek(P, 0).content, "@align")) { ASTStmtExtAlign *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_EXT_ALIGN; ret->next = NULL; get(P); expect(P, TOKEN_PAREN_L); ASTExprPrimitive *val = parse_prim(P); ret->val = val->val; free(val); expect(P, TOKEN_PAREN_R); expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else if(!strcmp(peek(P, 0).content, "@org")) { ASTStmtExtOrg *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_EXT_ORG; ret->next = NULL; get(P); expect(P, TOKEN_PAREN_L); ASTExprPrimitive *val = parse_prim(P); ret->val = val->val; free(val); expect(P, TOKEN_PAREN_R); expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else if(!strcmp(peek(P, 0).content, "@section")) { ASTStmtExtSection *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_EXT_SECTION; ret->next = NULL; get(P); expect(P, TOKEN_PAREN_L); ret->name = expect(P, TOKEN_STRING); expect(P, TOKEN_PAREN_R); expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else if(!strcmp(peek(P, 0).content, "@instantiate")) { get(P); Token funcname = expect(P, TOKEN_IDENTIFIER); ScopeItem *func = scope_find(P->scope, funcname.content); if(!func) { stahp_token(&P->tokens[P->i], "Cannot find function %s for parametrization.", funcname.content); } assert(type_is_generic(func->type)); Scope *oldScope = P->scope; P->scope = scope_new(oldScope); assert(parse_parametrization(P)); P->scope->parent = func->data.symbol.genfunc.scope; Type *parametrizedFuncType = type_parametrize(func->type, P->scope); size_t oldIdx = P->i; Token *oldTokens = P->tokens; P->tokens = func->data.symbol.genfunc.rangeTokens; P->i = func->data.symbol.genfunc.startTokI; expect(P, TOKEN_SQUAREN_L); intmax_t paramIdx = 0; bool integerMode = false; while(1) { if(maybe(P, TOKEN_SQUAREN_R)) { break; } char vtename[64]; snprintf(vtename, sizeof(vtename), integerMode ? "%lii" : "%lit", paramIdx++); ScopeItem *src = scope_get(P->scope, vtename); assert(src != NULL); scope_set(P->scope, expect(P, TOKEN_IDENTIFIER).content, src); if(maybe(P, TOKEN_SEMICOLON)) { paramIdx = 0; integerMode = true; } else { if(maybe(P, TOKEN_SQUAREN_R)) { break; } expect(P, TOKEN_COMMA); } } // Parse without the genericizing [...] because we've just parsed it ourselves and set the necessary VTEs AST *concreteFunction = nct_parse_expression(P, 0); assert(concreteFunction->nodeKind == AST_EXPR_FUNC); P->i = oldIdx; P->tokens = oldTokens; ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_SYMBOL; vte->type = parametrizedFuncType; vte->data.symbol.name = parametrize_function_name(func->type, func->data.symbol.name, P->scope); vte->data.symbol.isExternal = false; vte->data.symbol.isLocal = false; // Important order (remove parametrizations from scope, then add parametrized function to global) P->scope = oldScope; scope_set(P->scope, vte->data.symbol.name, vte); ASTStmtDecl *decl = alloc_node(P, sizeof(*decl)); decl->nodeKind = AST_STMT_DECL; decl->thing = vte; decl->expression = concreteFunction; pushstat(P, (AST*) decl); expect(P, TOKEN_SEMICOLON); return; } } { AST *decl = parse_declaration(P); if(decl) { if(decl->nodeKind == AST_STMT_DECL) { if(decl->stmtDecl.thing->kind == SCOPEITEM_SYMBOL && decl->stmtDecl.thing->data.symbol.isExternal) { P->topLevel->externs = realloc(P->topLevel->externs, sizeof(*P->topLevel->externs) * (++P->topLevel->externCount)); P->topLevel->externs[P->topLevel->externCount - 1] = decl->stmtDecl.thing; } } // Don't pass declaration statements for generic functions, because they're useless if(decl->nodeKind != AST_STMT_DECL || !type_is_generic(decl->stmtDecl.thing->type)) { pushstat(P, decl); } return; } } AST *e = nct_parse_expression(P, 0); if(maybe(P, TOKEN_EQUALS)) { ASTStmtAssign *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_ASSIGN; ret->next = NULL; ret->what = e; ret->to = ast_cast_expr(nct_parse_expression(P, 0), ret->what->expression.type); //if(ret->what->nodeKind == AST_EXPR_VAR) { // reachingdefs_set(ret->what->exprVar.thing->data.var.reachingDefs, (AST*) ret); //} expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } else { ASTStmtExpr *ret = alloc_node(P, sizeof(*ret)); ret->nodeKind = AST_STMT_EXPR; ret->next = NULL; ret->expr = e; expect(P, TOKEN_SEMICOLON); pushstat(P, ret); return; } } /* * This function inserts VTEs into the *current* scope. * Make sure to create a child scope before using. * */ void parse_genericization(Parser *P) { expect(P, TOKEN_SQUAREN_L); bool integerMode = false; size_t nextIdx = 0; while(peek(P, 0).type == TOKEN_IDENTIFIER) { if(!integerMode) { Type *tg = calloc(1, sizeof(TypeGeneric)); tg->type = TYPE_TYPE_GENERIC; tg->generic.paramName = strdup(get(P).content); tg->generic.paramIdx = nextIdx; ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_TYPE; vte->data.type.ptr = tg; scope_set(P->scope, strdup(tg->generic.paramName), vte); } else { ScopeItem *vte = calloc(1, sizeof(*vte)); vte->type = primitive_parse("u32"); vte->kind = SCOPEITEM_CEXPR; vte->data.cexpr.paramName = strdup(get(P).content); vte->data.cexpr.paramIdx = nextIdx; scope_set(P->scope, strdup(vte->data.var.name), vte); } nextIdx++; if(maybe(P, TOKEN_SQUAREN_R)) { break; } if(maybe(P, TOKEN_SEMICOLON)) { integerMode = true; nextIdx = 0; } else { expect(P, TOKEN_COMMA); } if(maybe(P, TOKEN_SQUAREN_R)) { break; } } } Type *nct_parse_record_definition(Parser *P) { expect(P, TOKEN_RECORD); P->scope = scope_new(P->scope); Token name = expect(P, TOKEN_IDENTIFIER); Type *tr = calloc(1, sizeof(TypeRecord)); tr->type = TYPE_TYPE_RECORD; tr->record.name = strdup(name.content); if(maybe(P, TOKEN_SQUAREN_L)) { P->i--; parse_genericization(P); } expect(P, TOKEN_SQUIGGLY_L); size_t nextOffset = 0; while(peek(P, 0).type != TOKEN_SQUIGGLY_R) { if(peek(P, 0).type == TOKEN_NUMBER) { ASTExprPrimitive *explicitOffset = parse_prim(P); nextOffset = explicitOffset->val; free(explicitOffset); expect(P, TOKEN_COLON); } size_t fi = tr->record.fieldCount++; tr->record.fieldOffsets = realloc(tr->record.fieldOffsets, sizeof(*tr->record.fieldOffsets) * tr->record.fieldCount); tr->record.fieldTypes = realloc(tr->record.fieldTypes, sizeof(*tr->record.fieldTypes) * tr->record.fieldCount); tr->record.fieldNames = realloc(tr->record.fieldNames, sizeof(*tr->record.fieldNames) * tr->record.fieldCount); Type *fieldType = nct_parse_typename(P); Token fieldName = expect(P, TOKEN_IDENTIFIER); tr->record.fieldTypes[fi] = fieldType; tr->record.fieldNames[fi] = strdup(fieldName.content); tr->record.fieldOffsets[fi] = nextOffset; if(type_is_generic(tr->record.fieldTypes[fi])) { // Hope nothing goes wrong. // Field offsets must be regenerated later } else { nextOffset += type_size(tr->record.fieldTypes[fi]); } expect(P, TOKEN_SEMICOLON); } expect(P, TOKEN_SQUIGGLY_R); P->scope = P->scope->parent; return tr; } static void skim_chunk(Parser *P, int isTopLevel) { /* Find all symbol names and struct types ahead of time. Searches for colons as those can only mean symbol declarations */ P->skimMode++; { intmax_t oldIdx = P->i; while(1) { TokenKind k = get(P).type; if(k == (isTopLevel ? TOKEN_EOF : TOKEN_SQUIGGLY_R)) { break; } else if(k == TOKEN_SQUIGGLY_L) { /* Don't enter deeper scopes. */ int depth = 0; while(1) { switch(get(P).type) { case TOKEN_SQUIGGLY_L: depth++; break; case TOKEN_SQUIGGLY_R: if(depth-- == 0) goto stomp; break; default:; } } stomp:; } else if(k == TOKEN_COLON || k == TOKEN_EXTERN) { /* Move back to beginning of declaration. */ if(k == TOKEN_COLON) { int squarenDepth = 0; do { P->i--; if(P->tokens[P->i].type == TOKEN_SQUAREN_R) { squarenDepth++; } else if(P->tokens[P->i].type == TOKEN_SQUAREN_L) { squarenDepth--; } } while(squarenDepth != 0 || (P->i >= oldIdx && P->tokens[P->i].type != TOKEN_SEMICOLON && P->tokens[P->i].type != TOKEN_SQUIGGLY_R && P->tokens[P->i].type != TOKEN_SQUIGGLY_L)); P->i++; } else { P->i--; } AST *d = parse_declaration(P); if(!d) abort(); free(d); /* We don't need it. */ } else if(k == TOKEN_RECORD) { P->i--; Type *tr = nct_parse_record_definition(P); ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_TYPE; vte->data.type.ptr = tr; scope_set(P->scope, tr->record.name, vte); } else if(k == TOKEN_USE) { char *path = malp("%s", expect(P, TOKEN_IDENTIFIER).content); for(;;) { if(maybe(P, TOKEN_SEMICOLON)) { break; } expect(P, TOKEN_DOT); char *path2 = malp("%s/%s", path, expect(P, TOKEN_IDENTIFIER).content); free(path); path = path2; } FILE *f = NULL; for(const char **importPaths = ntc_get_import_paths(); *importPaths; importPaths++) { char *path2 = malp("tests/%s.nct", path); f = fopen(path2, "rb"); free(path2); if(f) { // Importee found break; } } if(!f) { stahp_token(&P->tokens[P->i], "Module %s not found", path); } free(path); Parser subp = {.tokens = nct_lex(f), .scope = scope_new(NULL), .externalify = 1}; skim_chunk(&subp, 1); // Copy all extern symbols from the scope into our TLC's externs array for(size_t i = 0; i < subp.scope->count; i++) { ScopeItem *vte = subp.scope->data[i]; if(vte->kind == SCOPEITEM_SYMBOL && vte->data.symbol.isExternal) { P->topLevel->externs = realloc(P->topLevel->externs, sizeof(*P->topLevel->externs) * (++P->topLevel->externCount)); P->topLevel->externs[P->topLevel->externCount - 1] = vte; } } subp.scope->parent = P->scope; scope_merge(subp.scope); // free(subp.tokens); DO THIS CANNOT fclose(f); } } P->i = oldIdx; } P->skimMode--; } ASTChunk *nct_parse_chunk(Parser *P, int isTopLevel, int varPrioritize, Scope *toplevelParent, Type *ft) { AST *ret = alloc_node(P, sizeof(ASTChunk)); ret->nodeKind = AST_CHUNK; ret->chunk.statementFirst = ret->chunk.statementLast = NULL; ret->chunk.varCount = 0; ret->chunk.vars = NULL; ret->chunk.stackReservation = 0; AST *oldChunk = (AST*) P->currentChunk; P->currentChunk = &ret->chunk; Scope *oldScope = P->scope; P->scope = isTopLevel ? toplevelParent : scope_new(oldScope); ASTChunk *oldTopLevel = P->topLevel; if(isTopLevel) { P->topLevel = &ret->chunk; } skim_chunk(P, isTopLevel); /* Arguments */ if(ft && isTopLevel) { ScopeItem **vtes = alloca(sizeof(*vtes) * ft->function.argCount); // First arguments in a function TLC is the arguments for(int i = 0; i < ft->function.argCount; i++) { ScopeItem *vte = calloc(1, sizeof(*vte)); vte->kind = SCOPEITEM_VAR; vte->type = ft->function.args[i]; vte->data.var.name = ft->function.argNames[i]; vte->data.var.color = -1; scope_set(toplevelParent, vte->data.var.name, vte); vtes[i] = vte; } } /* Now actual parsing. */ while(peek(P, 0).type != TOKEN_EOF && peek(P, 0).type != TOKEN_SQUIGGLY_R) { nct_parse_statement(P); } // Add all variables used in this chunk into the TLC's variable list size_t varsToAdd = 0; for(size_t i = 0; i < P->scope->count; i++) { if(P->scope->data[i]->kind == SCOPEITEM_VAR) { varsToAdd++; } } P->topLevel->vars = realloc(P->topLevel->vars, sizeof(*P->topLevel->vars) * (P->topLevel->varCount + varsToAdd)); // This makes sure function arguments stay first in the array memmove(P->topLevel->vars + varsToAdd, P->topLevel->vars, sizeof(*P->topLevel->vars) * P->topLevel->varCount); for(size_t i = 0, n = 0; i < P->scope->count; i++) { if(P->scope->data[i]->kind == SCOPEITEM_VAR) { P->topLevel->vars[n++] = P->scope->data[i]; P->topLevel->varCount++; } } P->scope = oldScope; P->currentChunk = oldChunk; if(isTopLevel) { P->topLevel = oldTopLevel; } return &ret->chunk; } AST *nct_parse(Token *tokens) { Parser P; memset(&P, 0, sizeof(P)); P.tokens = tokens; return (AST*) nct_parse_chunk(&P, 1, 0, scope_new(NULL), NULL); }