commit 64c21ca43ae62a74fd8de035ac8270a9899904e7 Author: Mid <> Date: Sun Aug 31 16:22:38 2025 +0300 Initial commit diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2b0b92f --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +all: + $(CC) -I./ -O2 -std=c11 -o nua main.c parse.c vm.c lexer.c -lm diff --git a/README.md b/README.md new file mode 100644 index 0000000..1e59f5e --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# Impotent + +This is an attempt to create a Lua virtual machine capable of true multithreading. Once the nctref compiler matures enough, I intend to plug it into Impotent as a JIT. + +Impotent is still work-in-progress: + +1. Integers are 32-bit only +2. No error handling, meaning any mistake will either crash the VM or make it silently fail +3. No standard library other than `print` +4. Tables cannot be resized +5. Most operators are missing +6. Integers and floats are always separate table keys (unlike real Lua where e.g. `5` and `5.0` are considered identical) +7. The GC isn't real and everything leaks diff --git a/dump.h b/dump.h new file mode 100644 index 0000000..eb8d368 --- /dev/null +++ b/dump.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include +#include + +__attribute__((format(printf, 1, 2))) static inline char *malp(const char *fmt, ...) { + va_list v1, v2; + va_start(v1, fmt); + va_copy(v2, v1); + size_t len = vsnprintf(NULL, 0, fmt, v1); + va_end(v1); + va_start(v2, fmt); + char *str = malloc(len + 1); + vsnprintf(str, len + 1, fmt, v2); + str[len] = 0; + va_end(v2); + return str; +} + +static const char *LINST_NAMES[] = { + [L_GETGLOBAL] = "getglobal", + [L_SETGLOBAL] = "setglobal", + [L_SETINT16] = "setint16", + [L_SETINT32] = "setint32", + [L_SETFLOAT] = "setfloat", + [L_SETSTR] = "setstr", + [L_SETTABLE] = "settable", + [L_SETBOOL] = "setbool", + [L_SETNIL] = "setnil", + [L_SETFUNC] = "setfunc", + [L_ADD] = "add", + [L_SUB] = "sub", + [L_MUL] = "mul", + [L_DIV] = "div", + [L_IDIV] = "idiv", + [L_MOD] = "mod", + [L_RET] = "ret", + [L_JNOTCOND] = "jnotcond", + [L_MOVE] = "move", + [L_CALL] = "call", + [L_JUMP] = "jump", + [L_ADVANCETEST] = "advancetest", + [L_COND_EQ] = "cond_eq", + [L_COND_NEQ] = "cond_neq", + [L_SETFIELD] = "setfield", + [L_GETFIELD] = "getfield", +}; + +static void dump(LInst* i) { + while(1) { + printf("%s %02X %02X %02X\n", LINST_NAMES[i->opcode], i->a, i->b, i->c); + if(i->opcode == L_RET) { + break; + } + + i++; + } +} diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..c4c60de --- /dev/null +++ b/lexer.c @@ -0,0 +1,150 @@ +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#define i_implement +#include"lexer.h" + +typedef struct { + const char *txt; + TokenType type; +} KW; + +static KW KWS[] = { + {"return", TOK_RETURN}, + {"if", TOK_IF}, + {"break", TOK_BREAK}, + {"goto", TOK_GOTO}, + {"end", TOK_END}, + {"do", TOK_DO}, + {"for", TOK_FOR}, + {"repeat", TOK_REPEAT}, + {"until", TOK_UNTIL}, + {"local", TOK_LOCAL}, + {"then", TOK_THEN}, + {"else", TOK_ELSE}, + {"elseif", TOK_ELSEIF}, + {"false", TOK_FALSE}, + {"true", TOK_TRUE}, + {"nil", TOK_NIL}, + {"function", TOK_FUNCTION}, + {"while", TOK_WHILE}, +}; + +static TokenType is_kw(const char *str, size_t len) { + for(int i = 0; i < sizeof(KWS) / sizeof(*KWS); i++) { + if(len == strlen(KWS[i].txt) && !strncmp(KWS[i].txt, str, len)) { + return KWS[i].type; + } + } + return TOK_NAME; +} + +vec_Token ltokenize(const char *buf, size_t len) { + vec_Token tokens = {}; + + size_t row = 1; + + while(len) { + if(isspace(buf[0])) { + if(buf[0] == '\n') { + row++; + } + buf++, len--; + } else if(isalpha(buf[0])) { + size_t idlen = 0; + while(idlen < len && isalnum(buf[idlen])) { + idlen++; + } + + TokenType tt = is_kw(buf, idlen); + vec_Token_push(&tokens, (Token) {.text = tt == TOK_NAME ? strndup(buf, idlen) : NULL, .type = tt}); + + buf += idlen, len -= idlen; + } else if(buf[0] == '+') { + vec_Token_push(&tokens, (Token) {.type = TOK_PLUS}); + buf++, len--; + } else if(buf[0] == '=') { + if(len > 1 && buf[1] == '=') { + vec_Token_push(&tokens, (Token) {.type = TOK_DOUBLE_EQUAL}); + buf++, len--; + buf++, len--; + } else { + vec_Token_push(&tokens, (Token) {.type = TOK_EQUAL}); + buf++, len--; + } + } else if(buf[0] == '(') { + vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_L}); + buf++, len--; + } else if(buf[0] == ')') { + vec_Token_push(&tokens, (Token) {.type = TOK_PAREN_R}); + buf++, len--; + } else if(buf[0] == '[') { + vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_L}); + buf++, len--; + } else if(buf[0] == ']') { + vec_Token_push(&tokens, (Token) {.type = TOK_SQUAREN_R}); + buf++, len--; + } else if(buf[0] == '.') { + vec_Token_push(&tokens, (Token) {.type = TOK_DOT}); + buf++, len--; + } else if(buf[0] == ',') { + vec_Token_push(&tokens, (Token) {.type = TOK_COMMA}); + buf++, len--; + } else if(buf[0] == '%') { + vec_Token_push(&tokens, (Token) {.type = TOK_PERCENT}); + buf++, len--; + } else if(buf[0] == '{') { + vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_L}); + buf++, len--; + } else if(buf[0] == '}') { + vec_Token_push(&tokens, (Token) {.type = TOK_SQUIGGLY_R}); + buf++, len--; + } else if(len > 1 && buf[0] == '~' && buf[1] == '=') { + vec_Token_push(&tokens, (Token) {.type = TOK_NOT_EQUAL}); + buf++, len--; + buf++, len--; + } else if(isdigit(buf[0]) || (len > 1 && buf[0] == '-' && isdigit(buf[1]))) { + size_t idlen = 0; + + if(buf[0] == '-') { + idlen++; + } + while(idlen < len && isdigit(buf[idlen])) { + idlen++; + } + + vec_Token_push(&tokens, (Token) {.text = strndup(buf, idlen), .type = TOK_NUMBER}); + + buf += idlen, len -= idlen; + } else if(buf[0] == '\'' || buf[0] == '\"') { + bool single = buf[0] == '\''; + + buf++, len--; + + size_t strlen = 1; + + while(strlen < len) { + if(buf[strlen] == '\\') { + strlen += 2; + continue; + } else if(buf[strlen] == (single ? '\'' : '\"')) { + strlen++; + break; + } + strlen++; + } + + char *str = strndup(buf, strlen - 1); + + // TODO: unescaping + + vec_Token_push(&tokens, (Token) {.text = str, .type = TOK_STRING}); + + buf += strlen, len -= strlen; + } else { + assert(false); + } + } + + return tokens; +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..c21d2e8 --- /dev/null +++ b/lexer.h @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +typedef enum TokenType { + TOK_NONE, + TOK_INVALID, + TOK_SEMICOLON, + TOK_EQUAL, + TOK_BREAK, + TOK_GOTO, + TOK_DO, + TOK_END, + TOK_WHILE, + TOK_REPEAT, + TOK_UNTIL, + TOK_IF, + TOK_THEN, + TOK_ELSEIF, + TOK_ELSE, + TOK_FOR, + TOK_COMMA, + TOK_IN, + TOK_FUNCTION, + TOK_LOCAL, + TOK_DOUBLE_COLON, + TOK_DOT, + TOK_COLON, + TOK_SQUAREN_L, + TOK_SQUAREN_R, + TOK_NIL, + TOK_TRUE, + TOK_FALSE, + TOK_ELLIPSIS, + TOK_PAREN_L, + TOK_PAREN_R, + TOK_SQUIGGLY_L, + TOK_SQUIGGLY_R, + TOK_PLUS, + TOK_MINUS, + TOK_MUL, + TOK_DIV, + TOK_IDIV, + TOK_CARET, + TOK_PERCENT, + TOK_AMPERSAND, + TOK_TILDE, + TOK_BAR, + TOK_DOUBLE_RIGHT, + TOK_DOUBLE_LEFT, + TOK_DOUBLE_DOT, + TOK_LEFT, + TOK_RIGHT, + TOK_LEFT_EQUAL, + TOK_RIGHT_EQUAL, + TOK_DOUBLE_EQUAL, + TOK_NOT_EQUAL, + TOK_AND, + TOK_OR, + TOK_NOT, + TOK_SHARP, + TOK_NAME, + TOK_RETURN, + TOK_NUMBER, + TOK_STRING, + TOK_EOF, +} TokenType; + +typedef struct Token { + TokenType type; + int row; + char *text; +} Token; + +#define i_header +#define T vec_Token, Token +#include"stc/vec.h" +#undef i_header + +vec_Token ltokenize(const char *buf, size_t len); diff --git a/main.c b/main.c new file mode 100644 index 0000000..835f6b4 --- /dev/null +++ b/main.c @@ -0,0 +1,44 @@ +#include"value.h" +#include"table.h" +#include"parse.h" +#include"vm.h" +#include"lexer.h" +#include"str.h" +#include"dump.h" + +static size_t native_print(LVM *lvm, void *ud, size_t argn, LValue *values) { + if(lvalue_tag(values[0]) == LTAG_STRING) { + LString *lstr = (void*) (values[0].u & ~LTAG_MASK); + printf("%.*s\n", (int) lstr->length, lstr->data); + } else if(lvalue_tag(values[0]) == LTAG_I32) { + printf("%i\n", lvalue_to_int32(values[0])); + } else if(values[0].u == LTAG_NIL) { + printf("nil\n"); + } + return 0; +} + +int main() { + LTable *env = ltable_new(128); + + LString *key = lstring_newz("print"); + LFunc *func = lvm_func_from_native(native_print, NULL); + + ltable_set(env, lvalue_from_string(key), lvalue_from_func(func)); + + const char *bufs = "for i = 1, 1000000 do print(i) if i % 3 == 0 then print(\"Fizz\") end if i % 5 == 0 then print(\"Buzz\") end end"; + //const char *bufs = "local t = {a = 9} print(t.a)"; + //const char *bufs = "z = 5 print(z)"; + //const char *bufs = "local i = 0 while i ~= 1500000 do print(i) i = i + 1 end"; + vec_Token toks = ltokenize(bufs, strlen(bufs)); + + LUnit *unit = lparse(toks.size, toks.data, env); + + dump(unit->funcs[0].lua_instrs); + + LValue regs[256]; + lvm_reset_regs(regs); + + LVM lvm = {}; + lvm_run(&lvm, &unit->funcs[0], 0, regs); +} diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..1eba0aa --- /dev/null +++ b/parse.c @@ -0,0 +1,1053 @@ +#include"parse.h" + +#include"vm.h" +#include + +#define i_key LInst +#include"stc/vec.h" + +#define i_key int32_t +#include"stc/vec.h" + +#define i_key LFunc +#include"stc/vec.h" + +typedef struct ScopeItem { + Token name; + int vreg; + + struct ScopeItem *next; +} ScopeItem; + +typedef struct Scope { + struct Scope *parent; + ScopeItem *items; +} Scope; + +ScopeItem *scope_get(Scope *scope, const char *name) { + for(ScopeItem *si = scope->items; si; si = si->next) { + if(!strcmp(si->name.text, name)) { + return si; + } + } + return NULL; +} + +ScopeItem *scope_find(Scope *scope, const char *name) { + while(scope) { + ScopeItem *si = scope_get(scope, name); + if(si) { + return si; + } + scope = scope->parent; + } + return NULL; +} + +void scope_set_direct(Scope *scope, ScopeItem *new) { + new->next = scope->items; + scope->items = new; +} + +typedef struct { + vec_LInst instrs; + int used_vregs[256]; +} Chunk; + +typedef struct { + size_t i; + size_t sz; + Token *tokens; + + LTable *environment; + + Scope *scope; + + vec_LFunc unit_functions; + + size_t abyss_size; + uint8_t *abyss; + + Chunk current_chunk; +} Parser; + +typedef enum ExprKind { + EX_LOCAL, + EX_GLOBAL, + EX_INT, + EX_FLOAT, + EX_BOOL, + EX_ADD, + EX_SUB, + EX_MUL, + EX_DIV, + EX_IDIV, + EX_MOD, + EX_POW, + EX_BAND, + EX_BOR, + EX_BXOR, + EX_AND, + EX_OR, + EX_EQ, + EX_NEQ, + EX_TBL_LIT, + EX_INDEX, + EX_CALL, + EX_STR, +} ExprKind; +typedef struct Expr { + ExprKind kind; + union { + struct { + struct Expr *A; + union { + struct Expr *B; + Token B_tok; + }; + }; + Token name; + intmax_t i; + double f; + bool b; + struct { + uint8_t sub_count; + struct Expr *subs[]; + }; + struct { + size_t table_first_token; + size_t table_last_token; + }; + }; +} Expr; + +size_t abyss_insert(Parser *P, uint8_t *data, size_t len) { + size_t idx = P->abyss_size; + + P->abyss = realloc(P->abyss, P->abyss_size += len); + + if(data) { + memcpy(P->abyss + idx, data, len); + } + + return idx; +} + +Token peek(Parser *P, int offset) { + if(P->i + offset >= P->sz) { + Token t = P->tokens[P->sz - 1]; + t.type = TOK_EOF; + return t; + } + return P->tokens[P->i + offset]; +} + +Token get(Parser *P) { + Token t = peek(P, 0); + P->i++; + return t; +} + +Token expect(Parser *P, TokenType type) { + Token t = get(P); + assert(t.type == type); + return t; +} + +bool maybe(Parser *P, TokenType type) { + if(peek(P, 0).type == type) { + P->i++; + return true; + } else { + return false; + } +} + +int find_vreg(Parser *P) { + for(int i = 255; i >= 0; i--) { + if(!P->current_chunk.used_vregs[i]) { + return i; + } + } + assert(false); +} + +void alloc_vreg(Parser *P, int vreg) { + P->current_chunk.used_vregs[vreg]++; +} + +void free_vreg(Parser *P, int vreg) { + P->current_chunk.used_vregs[vreg]--; + assert(P->current_chunk.used_vregs[vreg] >= 0 && "Cannot free unused vreg"); +} + +void parse_chunk(Parser *P); + +int parse_functiondef(Parser *P, bool can_be_local) { + size_t old_idx = P->i; + + if(!(peek(P, 0).type == TOK_FUNCTION || (can_be_local && peek(P, 0).type == TOK_LOCAL && peek(P, 1).type == TOK_FUNCTION))) { + goto err; + } + + bool is_local = maybe(P, TOK_LOCAL); + + expect(P, TOK_FUNCTION); + + Token name = {}; + if(peek(P, 0).type == TOK_NAME) { + name = get(P); + } + + if(!maybe(P, TOK_PAREN_L)) { + goto err; + } + + Chunk old_chunk = P->current_chunk; + P->current_chunk = (Chunk) {}; + + Scope *new_scope = calloc(1, sizeof(*new_scope)); + new_scope->parent = P->scope; + P->scope = new_scope; + + size_t arg_count = 0; + if(!maybe(P, TOK_PAREN_R)) { + while(1) { + if(peek(P, 0).type != TOK_NAME) { + goto err; + } + + ScopeItem *si = calloc(1, sizeof(*si)); + si->name = expect(P, TOK_NAME); + si->vreg = arg_count++; + scope_set_direct(P->scope, si); + + if(maybe(P, TOK_PAREN_R)) { + break; + } else { + if(peek(P, 0).type != TOK_COMMA) { + goto err; + } + } + } + } + + parse_chunk(P); + if(P->current_chunk.instrs.data[P->current_chunk.instrs.size - 1].opcode != L_RET) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_RET, .argb = {0}}); + } + + expect(P, TOK_END); + + assert(P->unit_functions.size > 0); + + LFunc lf = {}; + lf.unit = P->unit_functions.data[0].unit; + lf.is_native = false; + lf.lua_instrs = P->current_chunk.instrs.data; + lf.env = P->environment; + vec_LFunc_push(&P->unit_functions, lf); + + size_t function_idx = P->unit_functions.size - 1; + + P->current_chunk = old_chunk; + P->scope = P->scope->parent; + + int vreg = find_vreg(P); + assert(vreg != -1); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETFUNC, .a = vreg, .bc = function_idx}); + + if(name.text) { + if(is_local) { + alloc_vreg(P, vreg); + + ScopeItem *si = calloc(1, sizeof(*si)); + si->name = name; + si->vreg = vreg; + + scope_set_direct(P->scope, si); + } else { + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], name.text, strlen(name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETGLOBAL, .a = vreg, .bc = abyss_idx}); + } + } + + return vreg; +err: + P->i = old_idx; + return -1; +} + +Expr *desc_exp(Parser *P); +Expr *desc_subexp(Parser *P, int priority); +void emit_expr(Parser *P, int assigned_vreg, Expr *expr); + +bool parse_functioncall(Parser *P) { + size_t old_idx = P->i; + + Expr *ex = desc_exp(P); + + if(ex->kind != EX_CALL) { + goto err; + } + + emit_expr(P, find_vreg(P), ex); + + return true; +err: + P->i = old_idx; + return false; +} + +vec_Token parse_namelist(Parser *P) { + vec_Token v = {}; + + vec_Token_push(&v, expect(P, TOK_NAME)); + + while(maybe(P, TOK_COMMA)) { + vec_Token_push(&v, expect(P, TOK_NAME)); + } + + return v; +} + +Expr *desc_subexp(Parser *P, int priority) { + if(priority == 0) { + Expr *a = desc_subexp(P, priority + 1); + + while(maybe(P, TOK_DOUBLE_EQUAL) || maybe(P, TOK_NOT_EQUAL)) { + Token op = peek(P, -1); + + Expr *b = desc_subexp(P, priority + 1); + + Expr *opex = calloc(1, sizeof(*opex)); + opex->A = a; + opex->B = b; + + if(op.type == TOK_DOUBLE_EQUAL) { + opex->kind = EX_EQ; + } else if(op.type == TOK_NOT_EQUAL) { + opex->kind = EX_NEQ; + } + + a = opex; + } + + return a; + } else if(priority == 1) { + Expr *a = desc_subexp(P, priority + 1); + + while(peek(P, 0).type == TOK_PLUS || peek(P, 0).type == TOK_MINUS) { + Token op = get(P); + + Expr *b = desc_subexp(P, priority + 1); + + Expr *opex = calloc(1, sizeof(*opex)); + opex->A = a; + opex->B = b; + + if(op.type == TOK_PLUS) { + opex->kind = EX_ADD; + } else { + opex->kind = EX_SUB; + } + + a = opex; + } + + return a; + } else if(priority == 2) { + Expr *a = desc_subexp(P, priority + 1); + + while(peek(P, 0).type == TOK_MUL || peek(P, 0).type == TOK_DIV || peek(P, 0).type == TOK_IDIV || peek(P, 0).type == TOK_PERCENT) { + Token op = get(P); + + Expr *b = desc_subexp(P, priority + 1); + + Expr *opex = calloc(1, sizeof(*opex)); + opex->A = a; + opex->B = b; + + if(op.type == TOK_MUL) { + opex->kind = EX_MUL; + } else if(op.type == TOK_DIV) { + opex->kind = EX_DIV; + } else if(op.type == TOK_IDIV) { + opex->kind = EX_IDIV; + } else if(op.type == TOK_PERCENT) { + opex->kind = EX_MOD; + } + + a = opex; + } + + return a; + } else if(priority == 3) { + Expr *e = NULL; + + if(maybe(P, TOK_TRUE)) { + e = calloc(1, sizeof(*e)); + e->kind = EX_BOOL; + e->b = true; + } else if(maybe(P, TOK_FALSE)) { + e = calloc(1, sizeof(*e)); + e->kind = EX_BOOL; + e->b = false; + } else if(maybe(P, TOK_NUMBER)) { + P->i--; + + Token num = expect(P, TOK_NUMBER); + long i = strtol(num.text, NULL, 10); + + e = calloc(1, sizeof(*e)); + e->kind = EX_INT; + e->i = i; + } else if(maybe(P, TOK_NAME)) { + P->i--; + + Token name = expect(P, TOK_NAME); + + ScopeItem *si = scope_find(P->scope, name.text); + + e = calloc(1, sizeof(*e)); + e->kind = si ? EX_LOCAL : EX_GLOBAL; + e->name = name; + } else if(maybe(P, TOK_STRING)) { + P->i--; + + Token str = expect(P, TOK_STRING); + + e = calloc(1, sizeof(*e)); + e->kind = EX_STR; + e->name = str; + } else if(maybe(P, TOK_SQUIGGLY_L)) { + e = calloc(1, sizeof(*e)); + e->kind = EX_TBL_LIT; + e->table_first_token = P->i - 1; + + size_t depth = 1; + while(1) { + Token t = get(P); + if(t.type == TOK_SQUIGGLY_L) { + depth++; + } else if(t.type == TOK_SQUIGGLY_R) { + depth--; + if(depth == 0) { + break; + } + } + } + + e->table_last_token = P->i - 1; + } + + if(e) { + while(maybe(P, TOK_PAREN_L) || maybe(P, TOK_DOT)) { + if(peek(P, -1).type == TOK_PAREN_L) { + Expr *call = calloc(1, sizeof(*call) + sizeof(Expr*)); + call->kind = EX_CALL; + call->sub_count = 1; + call->subs[0] = e; + + if(!maybe(P, TOK_PAREN_R)) { + while(1) { + call = realloc(call, sizeof(*call) + sizeof(Expr*) * (++call->sub_count)); + call->subs[call->sub_count - 1] = desc_exp(P); + + if(maybe(P, TOK_PAREN_R)) { + break; + } else { + expect(P, TOK_COMMA); + } + } + } + + e = call; + } else if(peek(P, -1).type == TOK_DOT) { + Expr *dot = calloc(1, sizeof(*dot)); + dot->kind = EX_INDEX; + dot->A = e; + dot->B_tok = expect(P, TOK_NAME); + + e = dot; + } + } + + return e; + } + } + return NULL; +} + +Expr *desc_exp(Parser *P) { + return desc_subexp(P, 0); +} + +/*void parse_exp_priority(Parser *P, int assigned_vreg, int priority) { + if(priority == 0) { + parse_exp_priority(P, assigned_vreg, priority + 1); + + while(peek(P, 0).type == TOK_DOUBLE_EQUAL) { + int vreg = find_vreg(P); + + Token op = get(P); + + alloc_vreg(P, vreg); + parse_exp_priority(P, vreg, priority + 1); + free_vreg(P, vreg); + + if(op.type == TOK_DOUBLE_EQUAL) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_COND_EQ, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } + } + } else if(priority == 1) { + parse_exp_priority(P, assigned_vreg, priority + 1); + + while(peek(P, 0).type == TOK_MUL || peek(P, 0).type == TOK_DIV || peek(P, 0).type == TOK_PERCENT || peek(P, 0).type == TOK_IDIV) { + int vreg = find_vreg(P); + + Token op = get(P); + + alloc_vreg(P, vreg); + parse_exp_priority(P, vreg, priority + 1); + free_vreg(P, vreg); + + if(op.type == TOK_MUL) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_MUL, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } else if(op.type == TOK_DIV) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_DIV, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } else if(op.type == TOK_PERCENT) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_MOD, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } else { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_IDIV, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } + } + } else if(priority == 2) { + parse_exp_priority(P, assigned_vreg, priority + 1); + + while(peek(P, 0).type == TOK_PLUS || peek(P, 0).type == TOK_MINUS) { + int vreg = find_vreg(P); + + Token op = get(P); + + alloc_vreg(P, vreg); + parse_exp_priority(P, vreg, priority + 1); + free_vreg(P, vreg); + + if(op.type == TOK_PLUS) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_ADD, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } else { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SUB, .a = assigned_vreg, .b = assigned_vreg, .c = vreg}); + } + } + } else if(priority == 3) { + if(maybe(P, TOK_TRUE)) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETBOOL, .a = assigned_vreg, .b = true}); + } else if(maybe(P, TOK_FALSE)) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETBOOL, .a = assigned_vreg, .b = false}); + } else if(maybe(P, TOK_NIL)) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETNIL, .a = assigned_vreg}); + } else if(maybe(P, TOK_SQUIGGLY_L)) { + expect(P, TOK_SQUIGGLY_R); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETTABLE, .a = assigned_vreg, .bc = 8}); + } else if(peek(P, 0).type == TOK_NAME) { + Token name = expect(P, TOK_NAME); + + ScopeItem *si = scope_find(P->scope, name.text); + + if(!si) { + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], name.text, strlen(name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_GETGLOBAL, .a = assigned_vreg, .b = abyss_idx}); + } else { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_MOVE, .a = assigned_vreg, .b = si->vreg}); + } + } else if(peek(P, 0).type == TOK_NUMBER) { + Token num = expect(P, TOK_NUMBER); + + long i = strtol(num.text, NULL, 10); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETINT16, .a = assigned_vreg, .bc = i}); + } else if(peek(P, 0).type == TOK_STRING) { + Token str = expect(P, TOK_STRING); + + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(str.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(str.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], str.text, strlen(str.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETSTR, .a = assigned_vreg, .bc = abyss_idx}); + } else { + assert(false); + } + } +}*/ + +void emit_expr(Parser *P, int assigned_vreg, Expr *expr) { + if(expr->kind == EX_LOCAL) { + ScopeItem *si = scope_find(P->scope, expr->name.text); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_MOVE, .a = assigned_vreg, .b = si->vreg}); + } else if(expr->kind == EX_GLOBAL) { + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(expr->name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(expr->name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], expr->name.text, strlen(expr->name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_GETGLOBAL, .a = assigned_vreg, .b = abyss_idx}); + } else if(expr->kind == EX_INT) { + if(expr->i <= 32767 && expr->i >= -32768) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETINT16, .a = assigned_vreg, .bc = expr->i}); + } else { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETINT32, .a = assigned_vreg, .bc = abyss_insert(P, (uint8_t*) &expr->i, 4)}); + } + } else if(expr->kind == EX_FLOAT) { + assert(false); + } else if(expr->kind == EX_BOOL) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETBOOL, .a = assigned_vreg, .bc = expr->b}); + } else if(expr->kind == EX_STR) { + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(expr->name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(expr->name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], expr->name.text, strlen(expr->name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETSTR, .a = assigned_vreg, .bc = abyss_idx}); + } else if(expr->kind == EX_CALL) { + int vreg = find_vreg(P); + assert(vreg != -1); + + emit_expr(P, vreg, expr->subs[0]); + alloc_vreg(P, vreg); + + uint8_t buf[258] = {}; + + uint8_t *ret_vreg = buf + 0; + uint8_t *arg_count = buf + 1; + uint8_t *args = buf + 2; + + for(size_t i = 1; i < expr->sub_count; i++) { + int av = find_vreg(P); + emit_expr(P, av, expr->subs[i++]); + args[(*arg_count)++] = av; + alloc_vreg(P, av); + } + + *ret_vreg = find_vreg(P); + + free_vreg(P, vreg); + for(int i = 0; i < *arg_count; i++) { + free_vreg(P, args[i]); + } + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_CALL, .a = vreg, .bc = abyss_insert(P, buf, 2 + *arg_count)}); + } else if(expr->kind == EX_TBL_LIT) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETTABLE, .a = assigned_vreg, .bc = 16}); + + size_t old_idx = P->i; + P->i = expr->table_first_token; + + expect(P, TOK_SQUIGGLY_L); + + if(!maybe(P, TOK_SQUIGGLY_R)) { + while(1) { + int keyv = find_vreg(P); + assert(keyv != -1); + if(peek(P, 0).type == TOK_NAME) { + Token name = get(P); + + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], name.text, strlen(name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETSTR, .a = keyv, .bc = abyss_idx}); + } else { + expect(P, TOK_SQUAREN_L); + emit_expr(P, keyv, desc_exp(P)); + expect(P, TOK_SQUAREN_R); + } + + expect(P, TOK_EQUAL); + + alloc_vreg(P, keyv); + int valv = find_vreg(P); + emit_expr(P, valv, desc_exp(P)); + free_vreg(P, keyv); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETFIELD, .a = assigned_vreg, .b = keyv, .c = valv}); + + if(maybe(P, TOK_SQUIGGLY_R)) { + break; + } else { + expect(P, TOK_COMMA); + } + } + } + + P->i = old_idx; + } else if(expr->kind == EX_INDEX) { + Token field = expr->B_tok; + + emit_expr(P, assigned_vreg, expr->A); + alloc_vreg(P, assigned_vreg); + + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(field.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(field.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], field.text, strlen(field.text)); + + int keyv = find_vreg(P); + assert(keyv != -1); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETSTR, .a = keyv, .bc = abyss_idx}); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_GETFIELD, .a = assigned_vreg, .b = assigned_vreg, .c = keyv}); + + free_vreg(P, assigned_vreg); + } else { + assert(expr->kind == EX_ADD || expr->kind == EX_SUB || expr->kind == EX_MUL || expr->kind == EX_DIV || expr->kind == EX_IDIV || expr->kind == EX_MOD || expr->kind == EX_POW || expr->kind == EX_BAND || expr->kind == EX_BOR || expr->kind == EX_BXOR || expr->kind == EX_AND || expr->kind == EX_OR || expr->kind == EX_EQ || expr->kind == EX_NEQ); + + assert(expr->A); + assert(expr->B); + + emit_expr(P, assigned_vreg, expr->A); + + alloc_vreg(P, assigned_vreg); + int b_vreg = find_vreg(P); + emit_expr(P, b_vreg, expr->B); + free_vreg(P, assigned_vreg); + + uint8_t opcode; + switch(expr->kind) { + case EX_ADD: opcode = L_ADD; break; + case EX_SUB: opcode = L_SUB; break; + case EX_MUL: opcode = L_MUL; break; + case EX_DIV: opcode = L_DIV; break; + case EX_IDIV: opcode = L_IDIV; break; + case EX_MOD: opcode = L_MOD; break; + case EX_POW: opcode = L_POW; break; + case EX_EQ: opcode = L_COND_EQ; break; + case EX_NEQ: opcode = L_COND_NEQ; break; + default: assert(false); + } + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = opcode, .a = assigned_vreg, .b = assigned_vreg, .c = b_vreg}); + } +} + +int parse_exp(Parser *P, int assigned_vreg) { + bool owned = false; + if(assigned_vreg == -1) { + owned = true; + + assigned_vreg = find_vreg(P); + alloc_vreg(P, assigned_vreg); + } + + emit_expr(P, assigned_vreg, desc_exp(P)); + + if(owned) { + free_vreg(P, assigned_vreg); + } + + return assigned_vreg; +} + +bool parse_assignment(Parser *P) { + size_t old_idx = P->i; + + const size_t max_exprs = 8; + + Expr *lhs[max_exprs]; + size_t lhsi = 0; + + Expr *rhs[max_exprs]; + int rhsv[max_exprs]; + size_t rhsi = 0; + + while(1) { + lhs[lhsi++] = desc_exp(P); + + if(!lhs[lhsi - 1]) { + goto err; + } + + if(lhs[lhsi - 1]->kind != EX_LOCAL && lhs[lhsi - 1]->kind != EX_GLOBAL && lhs[lhsi - 1]->kind != EX_INDEX) { + goto err; + } + + if(maybe(P, TOK_EQUAL)) { + break; + } else { + expect(P, TOK_COMMA); + } + } + + rhs[rhsi++] = desc_exp(P); + + while(1) { + if(!maybe(P, TOK_COMMA)) { + break; + } + + rhs[rhsi++] = desc_exp(P); + } + + for(size_t i = 0; i < rhsi; i++) { + rhsv[i] = find_vreg(P); + emit_expr(P, rhsv[i], rhs[i]); + alloc_vreg(P, rhsv[i]); + } + + assert(rhsi == lhsi); + + for(size_t i = 0; i < lhsi; i++) { + if(lhs[i]->kind == EX_LOCAL) { + ScopeItem *si = scope_find(P->scope, lhs[i]->name.text); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_MOVE, .a = si->vreg, .b = rhsv[i]}); + } else if(lhs[i]->kind == EX_GLOBAL) { + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(lhs[i]->name.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(lhs[i]->name.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], lhs[i]->name.text, strlen(lhs[i]->name.text)); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETGLOBAL, .a = rhsv[i], .bc = abyss_idx}); + } else if(lhs[i]->kind == EX_INDEX) { + int lhsv = find_vreg(P); + assert(lhsv != -1); + emit_expr(P, lhsv, lhs[i]->A); + alloc_vreg(P, lhsv); + + Token field = lhs[i]->B_tok; + + size_t abyss_idx = abyss_insert(P, NULL, sizeof(uint16_t) + strlen(field.text)); + *(uint16_t*) &P->abyss[abyss_idx] = strlen(field.text); + memcpy(&P->abyss[abyss_idx + sizeof(uint16_t)], field.text, strlen(field.text)); + + int keyv = find_vreg(P); + assert(keyv != -1); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETSTR, .a = keyv, .bc = abyss_idx}); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETFIELD, .a = lhsv, .b = keyv, .c = rhsv[i]}); + + free_vreg(P, lhsv); + } else { + assert(false); + } + } + + for(size_t i = 0; i < rhsi; i++) { + free_vreg(P, rhsv[i]); + } + + return true; +err: + P->i = old_idx; + return false; +} + +void parse_chunk(Parser *P); + +bool parse_stat(Parser *P) { + if(parse_functiondef(P, true) != -1) { + } else if(maybe(P, TOK_LOCAL)) { + vec_Token names = parse_namelist(P); + + int i = 0; + + if(maybe(P, TOK_EQUAL)) { + do { + int vreg = parse_exp(P, -1); + + if(i < names.size) { + alloc_vreg(P, vreg); + + ScopeItem *si = calloc(1, sizeof(*si)); + *si = (ScopeItem) {.name = names.data[i], .vreg = vreg}; + + scope_set_direct(P->scope, si); + } + + i++; + } while(maybe(P, TOK_COMMA)); + } + + for(; i < names.size; i++) { + int vreg = find_vreg(P); + alloc_vreg(P, vreg); + + ScopeItem *si = calloc(1, sizeof(*si)); + *si = (ScopeItem) {.name = names.data[i], .vreg = vreg}; + + scope_set_direct(P->scope, si); + } + + vec_Token_drop(&names); + } else if(maybe(P, TOK_IF)) { + int vreg = parse_exp(P, -1); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JNOTCOND, .a = vreg, .bc = 0}); + size_t jump2else = P->current_chunk.instrs.size - 1; + + expect(P, TOK_THEN); + + parse_chunk(P); + + bool isElse = maybe(P, TOK_ELSE); + + size_t jump2end; + if(isElse) { + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JUMP, .a = 0, .bc = 0}); + jump2end = P->current_chunk.instrs.size - 1; + } + + P->current_chunk.instrs.data[jump2else].bc = P->current_chunk.instrs.size - 1 - jump2else; + + if(isElse) { + parse_chunk(P); + P->current_chunk.instrs.data[jump2end].bc = P->current_chunk.instrs.size - 1 - jump2end; + } + + expect(P, TOK_END); + } else if(maybe(P, TOK_WHILE)) { + Expr *condition = desc_exp(P); + + int vreg = find_vreg(P); + assert(vreg != -1); + + size_t start_idx = P->current_chunk.instrs.size; + + emit_expr(P, vreg, condition); + + size_t jump2end = P->current_chunk.instrs.size; + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JNOTCOND, .a = vreg, .bc = 0}); + + Scope *new_scope = calloc(1, sizeof(*new_scope)); + new_scope->parent = P->scope; + + expect(P, TOK_DO); + parse_chunk(P); + expect(P, TOK_END); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JUMP, .a = 0, .bc = start_idx - P->current_chunk.instrs.size - 1}); + + P->current_chunk.instrs.data[jump2end].bc = P->current_chunk.instrs.size - 1 - jump2end; + + P->scope = P->scope->parent; + } else if(maybe(P, TOK_FOR)) { + if(peek(P, 0).type == TOK_NAME && peek(P, 1).type == TOK_EQUAL) { + // Range loop + + Token name = expect(P, TOK_NAME); + + expect(P, TOK_EQUAL); + + int fromVreg = parse_exp(P, -1); + alloc_vreg(P, fromVreg); + + expect(P, TOK_COMMA); + + int toVreg = parse_exp(P, -1); + alloc_vreg(P, toVreg); + + int stepVreg; + if(maybe(P, TOK_COMMA)) { + stepVreg = parse_exp(P, -1); + } else { + stepVreg = find_vreg(P); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SETINT16, .a = stepVreg, .bc = 1}); + } + alloc_vreg(P, stepVreg); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_SUB, .a = fromVreg, .b = fromVreg, .c = stepVreg}); + + expect(P, TOK_DO); + + Scope *new_scope = calloc(1, sizeof(*new_scope)); + new_scope->parent = P->scope; + + ScopeItem *si = calloc(1, sizeof(*si)); + si->name = name; + si->vreg = fromVreg; + scope_set_direct(new_scope, si); + + P->scope = new_scope; + + size_t start_idx = P->current_chunk.instrs.size; + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_ADD, .a = fromVreg, .b = fromVreg, .c = stepVreg}); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_ADVANCETEST, .a = fromVreg, .b = toVreg, .c = stepVreg}); + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JUMP, .a = 0, .bc = 0}); + size_t jump2end = P->current_chunk.instrs.size - 1; + + parse_chunk(P); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_JUMP, .a = 0, .bc = start_idx - P->current_chunk.instrs.size - 1}); + + P->current_chunk.instrs.data[jump2end].bc = P->current_chunk.instrs.size - 1 - jump2end; + + P->scope = P->scope->parent; + + expect(P, TOK_END); + + free_vreg(P, fromVreg); + free_vreg(P, toVreg); + free_vreg(P, stepVreg); + } else { + assert(0 && "Iterator for loop not yet supported"); + } + } else if(maybe(P, TOK_RETURN)) { + int vreg = parse_exp(P, -1); + + vec_LInst_push(&P->current_chunk.instrs, (LInst) {.opcode = L_RET, .argb = {vreg}}); + + return false; + } else if(peek(P, 0).type == TOK_END || peek(P, 0).type == TOK_ELSE || peek(P, 0).type == TOK_ELSEIF || peek(P, 0).type == TOK_EOF) { + return false; + } else if(parse_functioncall(P)) { + } else if(parse_assignment(P)) { + } else { + assert(false); + } + + return true; +} + +void parse_chunk(Parser *P) { + while(parse_stat(P)); +} + +LUnit *lparse(size_t sz, Token *tokens, LTable *environment) { + LUnit *unit = calloc(1, sizeof(*unit)); + + Parser P = { + .i = 0, + .sz = sz, + .tokens = tokens, + + .environment = environment, + + .unit_functions = {}, + + .scope = calloc(1, sizeof(Scope)), + }; + + LFunc func = {}; + func.unit = unit; + func.is_native = false; + func.upvalue_count = 0; + func.env = environment; + vec_LFunc_push(&P.unit_functions, func); + + parse_chunk(&P); + if(P.current_chunk.instrs.data[P.current_chunk.instrs.size - 1].opcode != L_RET) { + vec_LInst_push(&P.current_chunk.instrs, (LInst) {.opcode = L_RET, .argb = {0}}); + } + + P.unit_functions.data[0].lua_instrs = P.current_chunk.instrs.data; + + unit->abyss = P.abyss; + unit->func_count = 1; + unit->funcs = P.unit_functions.data; + + return unit; +} diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..458178b --- /dev/null +++ b/parse.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include"lexer.h" + +struct LUnit; +struct LTable; + +struct LUnit *lparse(size_t sz, Token *tokens, struct LTable *environment); \ No newline at end of file diff --git a/stc/algorithm.h b/stc/algorithm.h new file mode 100644 index 0000000..a517a15 --- /dev/null +++ b/stc/algorithm.h @@ -0,0 +1,11 @@ +#ifndef STC_ALGORITHM_H_INCLUDED +#define STC_ALGORITHM_H_INCLUDED + +// IWYU pragma: begin_exports +#include "sys/crange.h" +#include "sys/filter.h" +#include "sys/utility.h" +#include "sys/sumtype.h" +// IWYU pragma: end_exports + +#endif // STC_ALGORITHM_H_INCLUDED diff --git a/stc/arc.h b/stc/arc.h new file mode 100644 index 0000000..f703bb6 --- /dev/null +++ b/stc/arc.h @@ -0,0 +1,254 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* arc: atomic reference counted shared_ptr (new implementation) + * + * The difference between arc and arc2 is that arc only takes up one pointer, + * whereas arc2 uses two. arc cannot be constructed from an already allocated pointer, + * which arc2 may. To use arc2, specify the `(c_arc2)` option after the key type, e.g.: + * #define T MyArc, MyType, (c_arc2 | c_no_atomic) + */ +/* +#include + +typedef struct { cstr name, last; } Person; + +Person Person_make(const char* name, const char* last) { + return (Person){.name = cstr_from(name), .last = cstr_from(last)}; +} +Person Person_clone(Person p) { + p.name = cstr_clone(p.name); + p.last = cstr_clone(p.last); + return p; +} +void Person_drop(Person* p) { + printf("drop: %s %s\n", cstr_str(&p->name), cstr_str(&p->last)); + cstr_drop(&p->name); + cstr_drop(&p->last); +} + +#define T ArcPers, Person, (c_keyclass) // clone, drop, cmp, hash +#include + +int main(void) { + ArcPers p = ArcPers_from(Person_make("John", "Smiths")); + ArcPers q = ArcPers_clone(p); // share the pointer + + printf("%s %s. uses: %ld\n", cstr_str(&q.get->name), cstr_str(&q.get->last), ArcPers_use_count(q)); + c_drop(ArcPers, &p, &q); +} +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_ARC_H_INCLUDED +#define STC_ARC_H_INCLUDED +#include "common.h" +#include + +#if defined __GNUC__ || defined __clang__ || defined _MSC_VER || defined i_no_atomic + typedef long catomic_long; +#else // try with C11 + typedef _Atomic(long) catomic_long; +#endif +#if defined _MSC_VER + #include + #define c_atomic_inc(v) (void)_InterlockedIncrement(v) + #define c_atomic_dec_and_test(v) !_InterlockedDecrement(v) +#elif defined __GNUC__ || defined __clang__ + #define c_atomic_inc(v) (void)__atomic_add_fetch(v, 1, __ATOMIC_SEQ_CST) + #define c_atomic_dec_and_test(v) !__atomic_sub_fetch(v, 1, __ATOMIC_SEQ_CST) +#else // try with C11 + #include + #define c_atomic_inc(v) (void)atomic_fetch_add(v, 1) + #define c_atomic_dec_and_test(v) (atomic_fetch_sub(v, 1) == 1) +#endif +#endif // STC_ARC_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix arc_ +#endif +#define _i_is_arc +#include "priv/template.h" +typedef i_keyraw _m_raw; + +#if c_OPTION(c_no_atomic) + #define i_no_atomic +#endif +#if !defined i_no_atomic + #define _i_atomic_inc(v) c_atomic_inc(v) + #define _i_atomic_dec_and_test(v) c_atomic_dec_and_test(v) +#else + #define _i_atomic_inc(v) (void)(++*(v)) + #define _i_atomic_dec_and_test(v) !(--*(v)) +#endif + +#if c_OPTION(c_arc2) + #define i_arc2 +#endif +#if !(defined i_arc2 || defined STC_USE_ARC2) +// ------------ Arc1 size of one pointer (union) ------------- + +#ifndef i_declared +_c_DEFTYPES(declare_arc, Self, i_key); +#endif +struct _c_MEMB(_ctrl) { + _m_value value; + catomic_long counter; +}; + +// c++: std::make_shared<_m_value>(val) +STC_INLINE Self _c_MEMB(_make)(_m_value val) { + Self arc = {.ctrl=_i_new_n(_c_MEMB(_ctrl), 1)}; + arc.ctrl->value = val; + arc.ctrl->counter = 1; + return arc; +} + +STC_INLINE Self _c_MEMB(_toarc)(_m_value* arc_raw) + { Self arc = {.ctrl=(_c_MEMB(_ctrl) *)arc_raw}; return arc; } + +// destructor +STC_INLINE void _c_MEMB(_drop)(const Self* self) { + if (self->ctrl && _i_atomic_dec_and_test(&self->ctrl->counter)) { + i_keydrop(self->get); + i_free(self->ctrl, c_sizeof *self->ctrl); + } +} + +#else // ------------ Arc2 size of two pointers ------------- + +#ifndef i_declared +_c_DEFTYPES(declare_arc2, Self, i_key); +#endif +struct _c_MEMB(_ctrl) { + catomic_long counter; // nb! counter <-> value order is swapped. + _m_value value; +}; +#define ctrl ctrl2 + +// c++: std::make_shared<_m_value>(val) +STC_INLINE Self _c_MEMB(_make)(_m_value val) { + Self out = {.ctrl2=_i_new_n(_c_MEMB(_ctrl), 1)}; + out.ctrl2->counter = 1; + out.get = &out.ctrl2->value; + *out.get = val; + return out; +} + +STC_INLINE Self _c_MEMB(_from_ptr)(_m_value* ptr) { + Self out = {.get=ptr}; + if (ptr) { + enum {OFFSET = offsetof(_c_MEMB(_ctrl), value)}; + // Adds 2 dummy bytes to ensure that the second if-test in _drop() is safe. + catomic_long* _rc = (catomic_long*)i_malloc(OFFSET + 2); + out.ctrl2 = (_c_MEMB(_ctrl)*) _rc; + out.ctrl2->counter = 1; + } + return out; +} + +// destructor +STC_INLINE void _c_MEMB(_drop)(const Self* self) { + if (self->ctrl2 && _i_atomic_dec_and_test(&self->ctrl2->counter)) { + enum {OFFSET = offsetof(_c_MEMB(_ctrl), value)}; + i_keydrop(self->get); + + if ((char*)self->ctrl2 + OFFSET == (char*)self->get) { + i_free((void*)self->ctrl2, c_sizeof *self->ctrl2); // _make() + } else { + i_free((void*)self->ctrl2, OFFSET + 2); // _from_ptr() + i_free(self->get, c_sizeof *self->get); + } + } +} + +// take ownership of pointer p +STC_INLINE void _c_MEMB(_reset_to)(Self* self, _m_value* ptr) { + _c_MEMB(_drop)(self); + *self = _c_MEMB(_from_ptr)(ptr); +} + +#endif // ---------- end Arc2 with two pointers ------------ + +STC_INLINE long _c_MEMB(_use_count)(Self arc) + { return arc.ctrl ? arc.ctrl->counter : 0; } + +STC_INLINE Self _c_MEMB(_init)(void) + { return c_literal(Self){0}; } + +STC_INLINE Self _c_MEMB(_from)(_m_raw raw) + { return _c_MEMB(_make)(i_keyfrom(raw)); } + +STC_INLINE _m_raw _c_MEMB(_toraw)(const Self* self) + { return i_keytoraw(self->get); } + +// move ownership to receiving arc +STC_INLINE Self _c_MEMB(_move)(Self* self) { + Self arc = *self; + *self = (Self){0}; + return arc; // now unowned +} + +// take ownership of unowned arc +STC_INLINE void _c_MEMB(_take)(Self* self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; // now owned +} + +// make shared ownership with owned arc +STC_INLINE void _c_MEMB(_assign)(Self* self, const Self* owned) { + if (owned->ctrl) + _i_atomic_inc(&owned->ctrl->counter); + _c_MEMB(_drop)(self); + *self = *owned; +} + +// clone by sharing. Does not use i_keyclone, so OK to always define. +STC_INLINE Self _c_MEMB(_clone)(Self owned) { + if (owned.ctrl) + _i_atomic_inc(&owned.ctrl->counter); + return owned; +} + +#if defined _i_has_cmp + STC_INLINE int _c_MEMB(_raw_cmp)(const _m_raw* rx, const _m_raw* ry) + { return i_cmp(rx, ry); } +#endif + +#if defined _i_has_eq + STC_INLINE bool _c_MEMB(_raw_eq)(const _m_raw* rx, const _m_raw* ry) + { return i_eq(rx, ry); } +#endif + +#if !defined i_no_hash && defined _i_has_eq + STC_INLINE size_t _c_MEMB(_raw_hash)(const _m_raw* rx) + { return i_hash(rx); } +#endif // i_no_hash + +#undef ctrl +#undef i_no_atomic +#undef i_arc2 +#undef _i_atomic_inc +#undef _i_atomic_dec_and_test +#undef _i_is_arc +#include "sys/finalize.h" diff --git a/stc/box.h b/stc/box.h new file mode 100644 index 0000000..d007aca --- /dev/null +++ b/stc/box.h @@ -0,0 +1,168 @@ + +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* cbox: heap allocated boxed type +#include + +typedef struct { cstr name, email; } Person; + +Person Person_from(const char* name, const char* email) { + return (Person){.name = cstr_from(name), .email = cstr_from(email)}; +} +Person Person_clone(Person p) { + p.name = cstr_clone(p.name); + p.email = cstr_clone(p.email); + return p; +} +void Person_drop(Person* p) { + printf("drop: %s %s\n", cstr_str(&p->name), cstr_str(&p->email)); + c_drop(cstr, &p->name, &p->email); +} + +#define T PBox, Person, (c_keyclass) // bind Person clone+drop fn's +#include + +int main(void) { + PBox p = PBox_from(Person_from("John Smiths", "josmiths@gmail.com")); + PBox q = PBox_clone(p); + cstr_assign(&q.get->name, "Joe Smiths"); + + printf("%s %s.\n", cstr_str(&p.get->name), cstr_str(&p.get->email)); + printf("%s %s.\n", cstr_str(&q.get->name), cstr_str(&q.get->email)); + + c_drop(PBox, &p, &q); +} +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_BOX_H_INCLUDED +#define STC_BOX_H_INCLUDED +#include "common.h" +#include + +#define cbox_null {0} +#endif // STC_BOX_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix box_ +#endif +#define _i_is_box +#include "priv/template.h" +typedef i_keyraw _m_raw; + +#ifndef i_declared +_c_DEFTYPES(declare_box, Self, i_key); +#endif + +// constructors (take ownership) +STC_INLINE Self _c_MEMB(_init)(void) + { return c_literal(Self){0}; } + +STC_INLINE long _c_MEMB(_use_count)(const Self* self) + { return (long)(self->get != NULL); } + + +// c++: std::make_unique(val) +STC_INLINE Self _c_MEMB(_make)(_m_value val) { + Self box = {_i_new_n(_m_value, 1)}; + *box.get = val; + return box; +} + +STC_INLINE Self _c_MEMB(_from_ptr)(_m_value* p) + { return c_literal(Self){p}; } + +STC_INLINE Self _c_MEMB(_from)(_m_raw raw) + { return _c_MEMB(_make)(i_keyfrom(raw)); } + +STC_INLINE _m_raw _c_MEMB(_toraw)(const Self* self) + { return i_keytoraw(self->get); } + +// destructor +STC_INLINE void _c_MEMB(_drop)(const Self* self) { + if (self->get) { + i_keydrop(self->get); + i_free(self->get, c_sizeof *self->get); + } +} + +// move ownership to receiving box +STC_INLINE Self _c_MEMB(_move)(Self* self) { + Self box = *self; + self->get = NULL; + return box; +} + +// release owned pointer, must be manually freed by receiver +STC_INLINE _m_value* _c_MEMB(_release)(Self* self) + { return _c_MEMB(_move)(self).get; } + +// take ownership of pointer p +STC_INLINE void _c_MEMB(_reset_to)(Self* self, _m_value* p) { + _c_MEMB(_drop)(self); + self->get = p; +} + +// take ownership of unowned box +STC_INLINE void _c_MEMB(_take)(Self* self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +// transfer ownership from other; set other to NULL +STC_INLINE void _c_MEMB(_assign)(Self* self, Self* owned) { + if (owned->get == self->get) + return; + _c_MEMB(_drop)(self); + *self = *owned; + owned->get = NULL; +} + +#if !defined i_no_clone + STC_INLINE Self _c_MEMB(_clone)(Self other) { + if (other.get == NULL) return other; + Self out = {_i_new_n(_m_value, 1)}; + *out.get = i_keyclone((*other.get)); + return out; + } +#endif // !i_no_clone + + +#if defined _i_has_cmp + STC_INLINE int _c_MEMB(_raw_cmp)(const _m_raw* rx, const _m_raw* ry) + { return i_cmp(rx, ry); } +#endif + +#if defined _i_has_eq + STC_INLINE bool _c_MEMB(_raw_eq)(const _m_raw* rx, const _m_raw* ry) + { return i_eq(rx, ry); } +#endif + +#if !defined i_no_hash && defined _i_has_eq + STC_INLINE size_t _c_MEMB(_raw_hash)(const _m_raw* rx) + { return i_hash(rx); } +#endif // i_no_hash +#undef _i_is_box +#include "sys/finalize.h" diff --git a/stc/cbits.h b/stc/cbits.h new file mode 100644 index 0000000..ac424ba --- /dev/null +++ b/stc/cbits.h @@ -0,0 +1,336 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* +Similar to boost::dynamic_bitset / std::bitset + +#include +#include "cbits.h" + +int main(void) { + cbits bset = cbits_with_size(23, true); + cbits_reset(&bset, 9); + cbits_resize(&bset, 43, false); + + printf("%4d: ", (int)cbits_size(&bset)); + for (c_range(i, cbits_size(&bset))) + printf("%d", cbits_at(&bset, i)); + puts(""); + cbits_set(&bset, 28); + cbits_resize(&bset, 77, true); + cbits_resize(&bset, 93, false); + cbits_resize(&bset, 102, true); + cbits_set_value(&bset, 99, false); + + printf("%4d: ", (int)cbits_size(&bset)); + for (c_range(i, cbits_size(&bset))) + printf("%d", cbits_at(&bset, i)); + puts(""); + + cbits_drop(&bset); +} +*/ +#include "priv/linkage.h" +#ifndef STC_CBITS_H_INCLUDED +#define STC_CBITS_H_INCLUDED +#include "common.h" +#include + +#if INTPTR_MAX == INT64_MAX +#define _gnu_popc(x) __builtin_popcountll(x) +#define _msc_popc(x) (int)__popcnt64(x) +#else +#define _gnu_popc(x) __builtin_popcount(x) +#define _msc_popc(x) (int)__popcnt(x) +#endif +#define _cbits_WS c_sizeof(uintptr_t) +#define _cbits_WB (8*_cbits_WS) +#define _cbits_bit(i) ((uintptr_t)1 << ((i) & (_cbits_WB - 1))) +#define _cbits_words(n) (isize)(((n) + (_cbits_WB - 1))/_cbits_WB) +#define _cbits_bytes(n) (_cbits_words(n)*_cbits_WS) + +#if defined _MSC_VER + #include + STC_INLINE int c_popcount(uintptr_t x) { return _msc_popc(x); } +#elif defined __GNUC__ || defined __clang__ + STC_INLINE int c_popcount(uintptr_t x) { return _gnu_popc(x); } +#else + STC_INLINE int c_popcount(uintptr_t x) { /* http://en.wikipedia.org/wiki/Hamming_weight */ + x -= (x >> 1) & (uintptr_t)0x5555555555555555; + x = (x & (uintptr_t)0x3333333333333333) + ((x >> 2) & (uintptr_t)0x3333333333333333); + x = (x + (x >> 4)) & (uintptr_t)0x0f0f0f0f0f0f0f0f; + return (int)((x*(uintptr_t)0x0101010101010101) >> (_cbits_WB - 8)); + } +#endif +#if defined __GNUC__ && !defined __clang__ && !defined __cplusplus +#pragma GCC diagnostic ignored "-Walloc-size-larger-than=" // gcc 11.4 +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // gcc 11.4 +#endif + +#define cbits_print(...) c_MACRO_OVERLOAD(cbits_print, __VA_ARGS__) +#define cbits_print_1(self) cbits_print_4(self, stdout, 0, -1) +#define cbits_print_2(self, stream) cbits_print_4(self, stream, 0, -1) +#define cbits_print_4(self, stream, start, end) cbits_print_5(cbits, self, stream, start, end) +#define cbits_print_3(SetType, self, stream) cbits_print_5(SetType, self, stream, 0, -1) +#define cbits_print_5(SetType, self, stream, start, end) do { \ + const SetType* _cb_set = self; \ + isize _cb_start = start, _cb_end = end; \ + if (_cb_end == -1) _cb_end = SetType##_size(_cb_set); \ + for (c_range_3(_cb_i, _cb_start, _cb_end)) \ + fputc(SetType##_test(_cb_set, _cb_i) ? '1' : '0', stream); \ +} while (0) + +STC_INLINE isize _cbits_count(const uintptr_t* set, const isize sz) { + const isize n = sz/_cbits_WB; + isize count = 0; + for (isize i = 0; i < n; ++i) + count += c_popcount(set[i]); + if (sz & (_cbits_WB - 1)) + count += c_popcount(set[n] & (_cbits_bit(sz) - 1)); + return count; +} + +STC_INLINE char* _cbits_to_str(const uintptr_t* set, const isize sz, + char* out, isize start, isize stop) { + if (stop > sz) stop = sz; + c_assert(start <= stop); + + c_memset(out, '0', stop - start); + for (isize i = start; i < stop; ++i) + if ((set[i/_cbits_WB] & _cbits_bit(i)) != 0) + out[i - start] = '1'; + out[stop - start] = '\0'; + return out; +} + +#define _cbits_OPR(OPR, VAL) \ + const isize n = sz/_cbits_WB; \ + for (isize i = 0; i < n; ++i) \ + if ((set[i] OPR other[i]) != VAL) \ + return false; \ + if ((sz & (_cbits_WB - 1)) == 0) \ + return true; \ + const uintptr_t i = (uintptr_t)n, m = _cbits_bit(sz) - 1; \ + return ((set[i] OPR other[i]) & m) == (VAL & m) + +STC_INLINE bool _cbits_subset_of(const uintptr_t* set, const uintptr_t* other, const isize sz) + { _cbits_OPR(|, set[i]); } + +STC_INLINE bool _cbits_disjoint(const uintptr_t* set, const uintptr_t* other, const isize sz) + { _cbits_OPR(&, 0); } + +#endif // STC_CBITS_H_INCLUDED + +#if defined T && !defined i_type + #define i_type T +#endif +#if defined i_type + #define Self c_GETARG(1, i_type) + #define _i_length c_GETARG(2, i_type) +#else + #define Self cbits +#endif +#ifndef i_allocator + #define i_allocator c +#endif +#define _i_MEMB(name) c_JOIN(Self, name) + + +#if !defined _i_length // DYNAMIC SIZE BITARRAY + +typedef struct { uintptr_t *buffer; isize _size; } Self; +#define _i_assert(x) c_assert(x) + +STC_INLINE void cbits_drop(cbits* self) { i_free(self->buffer, _cbits_bytes(self->_size)); } +STC_INLINE isize cbits_size(const cbits* self) { return self->_size; } + +STC_INLINE cbits* cbits_take(cbits* self, cbits other) { + if (self->buffer != other.buffer) { + cbits_drop(self); + *self = other; + } + return self; +} + +STC_INLINE cbits cbits_clone(cbits other) { + cbits set = other; + const isize bytes = _cbits_bytes(other._size); + set.buffer = (uintptr_t *)c_safe_memcpy(i_malloc(bytes), other.buffer, bytes); + return set; +} + +STC_INLINE cbits* cbits_copy(cbits* self, const cbits* other) { + if (self->buffer == other->buffer) + return self; + if (self->_size != other->_size) + return cbits_take(self, cbits_clone(*other)); + c_memcpy(self->buffer, other->buffer, _cbits_bytes(other->_size)); + return self; +} + +STC_INLINE bool cbits_resize(cbits* self, const isize size, const bool value) { + const isize new_w = _cbits_words(size), osize = self->_size, old_w = _cbits_words(osize); + uintptr_t* b = (uintptr_t *)i_realloc(self->buffer, old_w*_cbits_WS, new_w*_cbits_WS); + if (b == NULL) return false; + self->buffer = b; self->_size = size; + if (size > osize) { + c_memset(self->buffer + old_w, -(int)value, (new_w - old_w)*_cbits_WS); + if (osize & (_cbits_WB - 1)) { + uintptr_t mask = _cbits_bit(osize) - 1; + if (value) self->buffer[old_w - 1] |= ~mask; + else self->buffer[old_w - 1] &= mask; + } + } + return true; +} + +STC_INLINE void cbits_set_all(cbits *self, const bool value); +STC_INLINE void cbits_set_pattern(cbits *self, const uintptr_t pattern); + +STC_INLINE cbits cbits_move(cbits* self) { + cbits tmp = *self; + self->buffer = NULL, self->_size = 0; + return tmp; +} + +STC_INLINE cbits cbits_with_size(const isize size, const bool value) { + cbits set = {(uintptr_t *)i_malloc(_cbits_bytes(size)), size}; + cbits_set_all(&set, value); + return set; +} + +STC_INLINE cbits cbits_with_pattern(const isize size, const uintptr_t pattern) { + cbits set = {(uintptr_t *)i_malloc(_cbits_bytes(size)), size}; + cbits_set_pattern(&set, pattern); + return set; +} + +#else // _i_length: FIXED SIZE BITARRAY + +#define _i_assert(x) (void)0 + +typedef struct { uintptr_t buffer[(_i_length - 1)/_cbits_WB + 1]; } Self; + +STC_INLINE void _i_MEMB(_drop)(Self* self) { (void)self; } +STC_INLINE isize _i_MEMB(_size)(const Self* self) { (void)self; return _i_length; } +STC_INLINE Self _i_MEMB(_move)(Self* self) { return *self; } +STC_INLINE Self* _i_MEMB(_take)(Self* self, Self other) { *self = other; return self; } +STC_INLINE Self _i_MEMB(_clone)(Self other) { return other; } +STC_INLINE void _i_MEMB(_copy)(Self* self, const Self* other) { *self = *other; } +STC_INLINE void _i_MEMB(_set_all)(Self *self, const bool value); +STC_INLINE void _i_MEMB(_set_pattern)(Self *self, const uintptr_t pattern); + +STC_INLINE Self _i_MEMB(_with_size)(const isize size, const bool value) { + c_assert(size <= _i_length); + Self set; _i_MEMB(_set_all)(&set, value); + return set; +} + +STC_INLINE Self _i_MEMB(_with_pattern)(const isize size, const uintptr_t pattern) { + c_assert(size <= _i_length); + Self set; _i_MEMB(_set_pattern)(&set, pattern); + return set; +} +#endif // _i_length + +// COMMON: + +STC_INLINE void _i_MEMB(_set_all)(Self *self, const bool value) + { c_memset(self->buffer, -(int)value, _cbits_bytes(_i_MEMB(_size)(self))); } + +STC_INLINE void _i_MEMB(_set_pattern)(Self *self, const uintptr_t pattern) { + isize n = _cbits_words(_i_MEMB(_size)(self)); + while (n--) self->buffer[n] = pattern; +} + +STC_INLINE bool _i_MEMB(_test)(const Self* self, const isize i) + { return (self->buffer[i/_cbits_WB] & _cbits_bit(i)) != 0; } + +STC_INLINE bool _i_MEMB(_at)(const Self* self, const isize i) + { c_assert(c_uless(i, _i_MEMB(_size)(self))); return _i_MEMB(_test)(self, i); } + +STC_INLINE void _i_MEMB(_set)(Self *self, const isize i) + { self->buffer[i/_cbits_WB] |= _cbits_bit(i); } + +STC_INLINE void _i_MEMB(_reset)(Self *self, const isize i) + { self->buffer[i/_cbits_WB] &= ~_cbits_bit(i); } + +STC_INLINE void _i_MEMB(_set_value)(Self *self, const isize i, const bool b) { + self->buffer[i/_cbits_WB] ^= ((uintptr_t)-(int)b ^ self->buffer[i/_cbits_WB]) & _cbits_bit(i); +} + +STC_INLINE void _i_MEMB(_flip)(Self *self, const isize i) + { self->buffer[i/_cbits_WB] ^= _cbits_bit(i); } + +STC_INLINE void _i_MEMB(_flip_all)(Self *self) { + isize n = _cbits_words(_i_MEMB(_size)(self)); + while (n--) self->buffer[n] ^= ~(uintptr_t)0; +} + +STC_INLINE Self _i_MEMB(_from)(const char* str) { + isize n = c_strlen(str); + Self set = _i_MEMB(_with_size)(n, false); + while (n--) if (str[n] == '1') _i_MEMB(_set)(&set, n); + return set; +} + +/* Intersection */ +STC_INLINE void _i_MEMB(_intersect)(Self *self, const Self* other) { + _i_assert(self->_size == other->_size); + isize n = _cbits_words(_i_MEMB(_size)(self)); + while (n--) self->buffer[n] &= other->buffer[n]; +} +/* Union */ +STC_INLINE void _i_MEMB(_union)(Self *self, const Self* other) { + _i_assert(self->_size == other->_size); + isize n = _cbits_words(_i_MEMB(_size)(self)); + while (n--) self->buffer[n] |= other->buffer[n]; +} +/* Exclusive disjunction */ +STC_INLINE void _i_MEMB(_xor)(Self *self, const Self* other) { + _i_assert(self->_size == other->_size); + isize n = _cbits_words(_i_MEMB(_size)(self)); + while (n--) self->buffer[n] ^= other->buffer[n]; +} + +STC_INLINE isize _i_MEMB(_count)(const Self* self) + { return _cbits_count(self->buffer, _i_MEMB(_size)(self)); } + +STC_INLINE char* _i_MEMB(_to_str)(const Self* self, char* out, isize start, isize stop) + { return _cbits_to_str(self->buffer, _i_MEMB(_size)(self), out, start, stop); } + +STC_INLINE bool _i_MEMB(_subset_of)(const Self* self, const Self* other) { + _i_assert(self->_size == other->_size); + return _cbits_subset_of(self->buffer, other->buffer, _i_MEMB(_size)(self)); +} + +STC_INLINE bool _i_MEMB(_disjoint)(const Self* self, const Self* other) { + _i_assert(self->_size == other->_size); + return _cbits_disjoint(self->buffer, other->buffer, _i_MEMB(_size)(self)); +} + +#include "priv/linkage2.h" +#undef i_type +#undef _i_length +#undef _i_MEMB +#undef _i_assert +#undef Self diff --git a/stc/common.h b/stc/common.h new file mode 100644 index 0000000..cb63688 --- /dev/null +++ b/stc/common.h @@ -0,0 +1,355 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STC_COMMON_H_INCLUDED +#define STC_COMMON_H_INCLUDED + +#ifdef _MSC_VER + #pragma warning(disable: 4116 4996) // unnamed type definition in parentheses +#endif +#include +#include +#include +#include +#include + +typedef ptrdiff_t isize; +#ifndef STC_NO_INT_DEFS + typedef int8_t int8; + typedef uint8_t uint8; + typedef int16_t int16; + typedef uint16_t uint16; + typedef int32_t int32; + typedef uint32_t uint32; + typedef int64_t int64; + typedef uint64_t uint64; +#endif +#if !defined STC_HAS_TYPEOF && (_MSC_FULL_VER >= 193933428 || \ + defined __GNUC__ || defined __clang__ || defined __TINYC__) + #define STC_HAS_TYPEOF 1 +#endif +#if defined __GNUC__ + #define c_GNUATTR(...) __attribute__((__VA_ARGS__)) +#else + #define c_GNUATTR(...) +#endif +#define STC_INLINE static inline c_GNUATTR(unused) +#define c_ZI PRIiPTR +#define c_ZU PRIuPTR +#define c_NPOS INTPTR_MAX + +// Macro overloading feature support +#define c_MACRO_OVERLOAD(name, ...) \ + c_JOIN(name ## _,c_NUMARGS(__VA_ARGS__))(__VA_ARGS__) +#define c_JOIN0(a, b) a ## b +#define c_JOIN(a, b) c_JOIN0(a, b) +#define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N)) +#define _c_APPLY_ARG_N(args) _c_ARG_N args +#define _c_RSEQ_N 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, +#define _c_ARG_N(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,N,...) N + +// Saturated overloading +// #define foo(...) foo_I(__VA_ARGS__, c_COMMA_N(foo_3), c_COMMA_N(foo_2), c_COMMA_N(foo_1),)(__VA_ARGS__) +// #define foo_I(a,b,c, n, ...) c_TUPLE_AT_1(n, foo_n,) +#define c_TUPLE_AT_1(x,y,...) y +#define c_COMMA_N(x) ,x +#define c_EXPAND(...) __VA_ARGS__ + +// Select arg, e.g. for #define i_type A,B then c_GETARG(2, i_type) is B +#define c_GETARG(N, ...) c_ARG_##N(__VA_ARGS__,) +#define c_ARG_1(a, ...) a +#define c_ARG_2(a, b, ...) b +#define c_ARG_3(a, b, c, ...) c +#define c_ARG_4(a, b, c, d, ...) d + +#define _i_new_n(T, n) ((T*)i_malloc((n)*c_sizeof(T))) +#define _i_new_zeros(T, n) ((T*)i_calloc(n, c_sizeof(T))) +#define _i_realloc_n(ptr, old_n, n) i_realloc(ptr, (old_n)*c_sizeof *(ptr), (n)*c_sizeof *(ptr)) +#define _i_free_n(ptr, n) i_free(ptr, (n)*c_sizeof *(ptr)) + +#ifndef __cplusplus + #define c_new(T, ...) ((T*)c_safe_memcpy(c_malloc(c_sizeof(T)), ((T[]){__VA_ARGS__}), c_sizeof(T))) + #define c_literal(T) (T) + #define c_make_array(T, ...) ((T[])__VA_ARGS__) + #define c_make_array2d(T, N, ...) ((T[][N])__VA_ARGS__) +#else + #include + #define c_new(T, ...) new (c_malloc(c_sizeof(T))) T(__VA_ARGS__) + #define c_literal(T) T + template struct _c_Array { T data[M][N]; }; + #define c_make_array(T, ...) (_c_Array{{__VA_ARGS__}}.data[0]) + #define c_make_array2d(T, N, ...) (_c_Array{__VA_ARGS__}.data) +#endif + +#ifdef STC_ALLOCATOR + #define c_malloc c_JOIN(STC_ALLOCATOR, _malloc) + #define c_calloc c_JOIN(STC_ALLOCATOR, _calloc) + #define c_realloc c_JOIN(STC_ALLOCATOR, _realloc) + #define c_free c_JOIN(STC_ALLOCATOR, _free) +#else + #define c_malloc(sz) malloc(c_i2u_size(sz)) + #define c_calloc(n, sz) calloc(c_i2u_size(n), c_i2u_size(sz)) + #define c_realloc(ptr, old_sz, sz) realloc(ptr, c_i2u_size(1 ? (sz) : (old_sz))) + #define c_free(ptr, sz) ((void)(sz), free(ptr)) +#endif + +#define c_new_n(T, n) ((T*)c_malloc((n)*c_sizeof(T))) +#define c_free_n(ptr, n) c_free(ptr, (n)*c_sizeof *(ptr)) +#define c_realloc_n(ptr, old_n, n) c_realloc(ptr, (old_n)*c_sizeof *(ptr), (n)*c_sizeof *(ptr)) +#define c_delete_n(T, ptr, n) do { \ + T* _tp = ptr; isize _n = n, _i = _n; \ + while (_i--) T##_drop((_tp + _i)); \ + c_free(_tp, _n*c_sizeof(T)); \ +} while (0) + +#define c_static_assert(expr) (void)sizeof(int[(expr) ? 1 : -1]) +#if defined STC_NDEBUG || defined NDEBUG + #define c_assert(expr) (void)sizeof(expr) +#else + #define c_assert(expr) assert(expr) +#endif +#define c_container_of(p, C, m) ((C*)((char*)(1 ? (p) : &((C*)0)->m) - offsetof(C, m))) +#define c_const_cast(Tp, p) ((Tp)(1 ? (p) : (Tp)0)) +#define c_litstrlen(literal) (c_sizeof("" literal) - 1) +#define c_countof(a) (isize)(sizeof(a)/sizeof 0[a]) +#define c_arraylen(a) c_countof(a) // [deprecated]? + +// expect signed ints to/from these (use with gcc -Wconversion) +#define c_sizeof (isize)sizeof +#define c_strlen(s) (isize)strlen(s) +#define c_strncmp(a, b, ilen) strncmp(a, b, c_i2u_size(ilen)) +#define c_memcpy(d, s, ilen) memcpy(d, s, c_i2u_size(ilen)) +#define c_memmove(d, s, ilen) memmove(d, s, c_i2u_size(ilen)) +#define c_memset(d, val, ilen) memset(d, val, c_i2u_size(ilen)) +#define c_memcmp(a, b, ilen) memcmp(a, b, c_i2u_size(ilen)) +// library internal, but may be useful in user code: +#define c_u2i_size(u) (isize)(1 ? (u) : (size_t)1) // warns if u is signed +#define c_i2u_size(i) (size_t)(1 ? (i) : -1) // warns if i is unsigned +#define c_uless(a, b) ((size_t)(a) < (size_t)(b)) +#define c_safe_cast(T, From, x) ((T)(1 ? (x) : (From){0})) + +// x, y are i_keyraw* type, which defaults to i_key*. vp is i_key* type. +#define c_memcmp_eq(x, y) (memcmp(x, y, sizeof *(x)) == 0) +#define c_default_eq(x, y) (*(x) == *(y)) +#define c_default_less(x, y) (*(x) < *(y)) +#define c_default_cmp(x, y) (c_default_less(y, x) - c_default_less(x, y)) +#define c_default_hash(vp) c_hash_n(vp, sizeof *(vp)) +#define c_default_clone(v) (v) +#define c_default_toraw(vp) (*(vp)) +#define c_default_drop(vp) ((void) (vp)) + +// non-owning char pointer +typedef const char* cstr_raw; +#define cstr_raw_cmp(x, y) strcmp(*(x), *(y)) +#define cstr_raw_eq(x, y) (cstr_raw_cmp(x, y) == 0) +#define cstr_raw_hash(vp) c_hash_str(*(vp)) +#define cstr_raw_clone(v) (v) +#define cstr_raw_drop(vp) ((void)vp) + +// Control block macros + +// [deprecated]: +#define c_init(...) c_make(__VA_ARGS__) +#define c_forlist(...) for (c_items(_VA_ARGS__)) +#define c_foritems(...) for (c_items(__VA_ARGS__)) +#define c_foreach(...) for (c_each(__VA_ARGS__)) +#define c_foreach_n(...) for (c_each_n(__VA_ARGS__)) +#define c_foreach_kv(...) for (c_each_kv(__VA_ARGS__)) +#define c_foreach_reverse(...) for (c_each_reverse(__VA_ARGS__)) +#define c_forrange(...) for (c_range(__VA_ARGS__)) +#define c_forrange32(...) for (c_range32(__VA_ARGS__)) + +// New: +#define c_each(...) c_MACRO_OVERLOAD(c_each, __VA_ARGS__) +#define c_each_3(it, C, cnt) \ + C##_iter it = C##_begin(&cnt); it.ref; C##_next(&it) +#define c_each_4(it, C, start, end) \ + _c_each(it, C, start, (end).ref, _) + +#define c_each_n(...) c_MACRO_OVERLOAD(c_each_n, __VA_ARGS__) +#define c_each_n_3(it, C, cnt) c_each_n_4(it, C, cnt, INTPTR_MAX) +#define c_each_n_4(it, C, cnt, n) \ + struct {C##_iter iter; C##_value* ref; isize size, index;} \ + it = {.iter=C##_begin(&cnt), .size=n}; (it.ref = it.iter.ref) && it.index < it.size; C##_next(&it.iter), ++it.index + +#define c_each_reverse(...) c_MACRO_OVERLOAD(c_each_reverse, __VA_ARGS__) +#define c_each_reverse_3(it, C, cnt) /* works for stack, vec, queue, deque */ \ + C##_iter it = C##_rbegin(&cnt); it.ref; C##_rnext(&it) +#define c_each_reverse_4(it, C, start, end) \ + _c_each(it, C, start, (end).ref, _r) + +#define _c_each(it, C, start, endref, rev) /* private */ \ + C##_iter it = (start), *_endref_##it = c_safe_cast(C##_iter*, C##_value*, endref) \ + ; it.ref != (C##_value*)_endref_##it; C##rev##next(&it) + +#define c_each_kv(...) c_MACRO_OVERLOAD(c_each_kv, __VA_ARGS__) +#define c_each_kv_4(key, val, C, cnt) /* structured binding for maps */ \ + _c_each_kv(key, val, C, C##_begin(&cnt), NULL) +#define c_each_kv_5(key, val, C, start, end) \ + _c_each_kv(key, val, C, start, (end).ref) + +#define _c_each_kv(key, val, C, start, endref) /* private */ \ + const C##_key *key = (const C##_key*)&key; key; ) \ + for (C##_mapped *val; key; key = NULL) \ + for (C##_iter _it_##key = start, *_endref_##key = c_safe_cast(C##_iter*, C##_value*, endref); \ + _it_##key.ref != (C##_value*)_endref_##key && (key = &_it_##key.ref->first, val = &_it_##key.ref->second); \ + C##_next(&_it_##key) + +#define c_items(it, T, ...) \ + struct {T* ref; int size, index;} \ + it = {.ref=c_make_array(T, __VA_ARGS__), .size=(int)(sizeof((T[])__VA_ARGS__)/sizeof(T))} \ + ; it.index < it.size ; ++it.ref, ++it.index + +// c_range, c_range32: python-like int range iteration +#define c_range_t(...) c_MACRO_OVERLOAD(c_range_t, __VA_ARGS__) +#define c_range_t_3(T, i, stop) c_range_t_4(T, i, 0, stop) +#define c_range_t_4(T, i, start, stop) \ + T i=start, _c_end_##i=stop; i < _c_end_##i; ++i +#define c_range_t_5(T, i, start, stop, step) \ + T i=start, _c_inc_##i=step, _c_end_##i=(stop) - (_c_inc_##i > 0) \ + ; (_c_inc_##i > 0) == (i <= _c_end_##i) ; i += _c_inc_##i + +#define c_range(...) c_MACRO_OVERLOAD(c_range, __VA_ARGS__) +#define c_range_1(stop) c_range_t_4(isize, _c_i1, 0, stop) +#define c_range_2(i, stop) c_range_t_4(isize, i, 0, stop) +#define c_range_3(i, start, stop) c_range_t_4(isize, i, start, stop) +#define c_range_4(i, start, stop, step) c_range_t_5(isize, i, start, stop, step) + +#define c_range32(...) c_MACRO_OVERLOAD(c_range32, __VA_ARGS__) +#define c_range32_2(i, stop) c_range_t_4(int32_t, i, 0, stop) +#define c_range32_3(i, start, stop) c_range_t_4(int32_t, i, start, stop) +#define c_range32_4(i, start, stop, step) c_range_t_5(int32_t, i, start, stop, step) + +// make container from a literal list +#define c_make(C, ...) \ + C##_from_n(c_make_array(C##_raw, __VA_ARGS__), c_sizeof((C##_raw[])__VA_ARGS__)/c_sizeof(C##_raw)) + +// put multiple raw-type elements from a literal list into a container +#define c_put_items(C, cnt, ...) \ + C##_put_n(cnt, c_make_array(C##_raw, __VA_ARGS__), c_sizeof((C##_raw[])__VA_ARGS__)/c_sizeof(C##_raw)) + +// drop multiple containers of same type +#define c_drop(C, ...) \ + do { for (c_items(_c_i2, C*, {__VA_ARGS__})) C##_drop(*_c_i2.ref); } while(0) + +// RAII scopes +#define c_defer(...) \ + for (int _c_i3 = 0; _c_i3++ == 0; __VA_ARGS__) + +#define c_with(...) c_MACRO_OVERLOAD(c_with, __VA_ARGS__) +#define c_with_2(init, deinit) \ + for (int _c_i4 = 0; _c_i4 == 0; ) for (init; _c_i4++ == 0; deinit) +#define c_with_3(init, condition, deinit) \ + for (int _c_i5 = 0; _c_i5 == 0; ) for (init; _c_i5++ == 0 && (condition); deinit) + +// General functions + +STC_INLINE void* c_safe_memcpy(void* dst, const void* src, isize size) + { return dst ? memcpy(dst, src, (size_t)size) : NULL; } + +#if INTPTR_MAX == INT64_MAX + #define FNV_BASIS 0xcbf29ce484222325 + #define FNV_PRIME 0x00000100000001b3 +#else + #define FNV_BASIS 0x811c9dc5 + #define FNV_PRIME 0x01000193 +#endif + +STC_INLINE size_t c_basehash_n(const void* key, isize len) { + const uint8_t* msg = (const uint8_t*)key; + size_t h = FNV_BASIS, block = 0; + + while (len >= c_sizeof h) { + memcpy(&block, msg, sizeof h); + h ^= block; + h *= FNV_PRIME; + msg += c_sizeof h; + len -= c_sizeof h; + } + while (len--) { + h ^= *(msg++); + h *= FNV_PRIME; + } + return h; +} + +STC_INLINE size_t c_hash_n(const void* key, isize len) { + uint64_t b8; uint32_t b4; + switch (len) { + case 8: memcpy(&b8, key, 8); return (size_t)(b8 * 0xc6a4a7935bd1e99d); + case 4: memcpy(&b4, key, 4); return b4 * FNV_BASIS; + default: return c_basehash_n(key, len); + } +} + +STC_INLINE size_t c_hash_str(const char *str) { + const uint8_t* msg = (const uint8_t*)str; + size_t h = FNV_BASIS; + while (*msg) { + h ^= *(msg++); + h *= FNV_PRIME; + } + return h; +} + +#define c_hash_mix(...) /* non-commutative hash combine */ \ + c_hash_mix_n(c_make_array(size_t, {__VA_ARGS__}), c_sizeof((size_t[]){__VA_ARGS__})/c_sizeof(size_t)) + +STC_INLINE size_t c_hash_mix_n(size_t h[], isize n) { + for (isize i = 1; i < n; ++i) h[0] += h[0] ^ h[i]; + return h[0]; +} + +// generic typesafe swap +#define c_swap(xp, yp) do { \ + (void)sizeof((xp) == (yp)); \ + char _tv[sizeof *(xp)]; \ + void *_xp = xp, *_yp = yp; \ + memcpy(_tv, _xp, sizeof _tv); \ + memcpy(_xp, _yp, sizeof _tv); \ + memcpy(_yp, _tv, sizeof _tv); \ +} while (0) + +// get next power of two +STC_INLINE isize c_next_pow2(isize n) { + n--; + n |= n >> 1, n |= n >> 2; + n |= n >> 4, n |= n >> 8; + n |= n >> 16; + #if INTPTR_MAX == INT64_MAX + n |= n >> 32; + #endif + return n + 1; +} + +STC_INLINE char* c_strnstrn(const char *str, isize slen, const char *needle, isize nlen) { + if (nlen == 0) return (char *)str; + if (nlen > slen) return NULL; + slen -= nlen; + do { + if (*str == *needle && !c_memcmp(str, needle, nlen)) + return (char *)str; + ++str; + } while (slen--); + return NULL; +} +#endif // STC_COMMON_H_INCLUDED diff --git a/stc/coption.h b/stc/coption.h new file mode 100644 index 0000000..6765064 --- /dev/null +++ b/stc/coption.h @@ -0,0 +1,180 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* +Inspired by https://attractivechaos.wordpress.com/2018/08/31/a-survey-of-argument-parsing-libraries-in-c-c +Fixed major bugs with optional arguments (both long and short). +Added arg->optstr output field, more consistent API. + +coption_get() is similar to GNU's getopt_long(). Each call parses one option and +returns the option name. opt->arg points to the option argument if present. +The function returns -1 when all command-line arguments are parsed. In this case, +opt->ind is the index of the first non-option argument. + +#include +#include + +int main(int argc, char *argv[]) +{ + coption_long longopts[] = { + {"foo", coption_no_argument, 'f'}, + {"bar", coption_required_argument, 'b'}, + {"opt", coption_optional_argument, 'o'}, + {0} + }; + const char* optstr = "xy:z::123"; + printf("program -x -y ARG -z [ARG] -1 -2 -3 --foo --bar ARG --opt [ARG] [ARGUMENTS]\n"); + int c; + coption opt = coption_init(); + while ((c = coption_get(&opt, argc, argv, optstr, longopts)) != -1) { + switch (c) { + case '?': printf("error: unknown option: %s\n", opt.optstr); return 1; + case ':': printf("error: missing argument for %s (%c)\n", opt.optstr, opt.opt); return 2; + default: printf("option: %c [%s]\n", opt.opt, opt.arg ? opt.arg : ""); break; + } + } + printf("\nNon-option arguments:"); + for (int i = opt.ind; i < argc; ++i) + printf(" %s", argv[i]); + putchar('\n'); + return 0; +} +*/ +#ifndef STC_COPTION_H_INCLUDED +#define STC_COPTION_H_INCLUDED + +#include +#include + +typedef enum { + coption_no_argument, + coption_required_argument, + coption_optional_argument +} coption_type; + +typedef struct { + const char *name; + coption_type type; + int val; +} coption_long; + +typedef struct { + int ind; /* equivalent to optind */ + int opt; /* equivalent to optopt */ + const char *optstr; /* points to the option string */ + const char *arg; /* equivalent to optarg */ + int _i, _pos, _nargs; + char _optstr[4]; +} coption; + +static inline coption coption_init(void) { + coption opt = {1, 0, NULL, NULL, 1, 0, 0, {'-', '?', '\0'}}; + return opt; +} + +/* move argv[j] over n elements to the left */ +static void coption_permute_(char *argv[], int j, int n) { + int k; + char *p = argv[j]; + for (k = 0; k < n; ++k) + argv[j - k] = argv[j - k - 1]; + argv[j - k] = p; +} + +/* @param opt output; must be initialized to coption_init() on first call + * @return ASCII val for a short option; longopt.val for a long option; + * -1 if argv[] is fully processed; '?' for an unknown option or + * an ambiguous long option; ':' if an option argument is missing + */ +static int coption_get(coption *opt, int argc, char *argv[], + const char *shortopts, const coption_long *longopts) { + int optc = -1, i0, j, posixly_correct = (shortopts && shortopts[0] == '+'); + if (!posixly_correct) { + while (opt->_i < argc && (argv[opt->_i][0] != '-' || argv[opt->_i][1] == '\0')) + ++opt->_i, ++opt->_nargs; + } + opt->opt = 0, opt->optstr = NULL, opt->arg = NULL, i0 = opt->_i; + if (opt->_i >= argc || argv[opt->_i][0] != '-' || argv[opt->_i][1] == '\0') { + opt->ind = opt->_i - opt->_nargs; + return -1; + } + if (argv[opt->_i][0] == '-' && argv[opt->_i][1] == '-') { /* "--" or a long option */ + if (argv[opt->_i][2] == '\0') { /* a bare "--" */ + coption_permute_(argv, opt->_i, opt->_nargs); + ++opt->_i, opt->ind = opt->_i - opt->_nargs; + return -1; + } + optc = '?', opt->_pos = -1; + if (longopts) { /* parse long options */ + int k, n_exact = 0, n_partial = 0; + const coption_long *o = 0, *o_exact = 0, *o_partial = 0; + for (j = 2; argv[opt->_i][j] != '\0' && argv[opt->_i][j] != '='; ++j) {} /* find the end of the option name */ + for (k = 0; longopts[k].name != 0; ++k) + if (strncmp(&argv[opt->_i][2], longopts[k].name, (size_t)(j - 2)) == 0) { + if (longopts[k].name[j - 2] == 0) ++n_exact, o_exact = &longopts[k]; + else ++n_partial, o_partial = &longopts[k]; + } + opt->optstr = argv[opt->_i]; + if (n_exact > 1 || (n_exact == 0 && n_partial > 1)) return '?'; + o = n_exact == 1? o_exact : n_partial == 1? o_partial : 0; + if (o) { + opt->opt = optc = o->val; + if (o->type != coption_no_argument) { + if (argv[opt->_i][j] == '=') + opt->arg = &argv[opt->_i][j + 1]; + else if (argv[opt->_i][j] == '\0' && opt->_i < argc - 1 && (o->type == coption_required_argument || + argv[opt->_i + 1][0] != '-')) + opt->arg = argv[++opt->_i]; + else if (o->type == coption_required_argument) + optc = ':'; /* missing option argument */ + } + } + } + } else if (shortopts) { /* a short option */ + const char *p; + if (opt->_pos == 0) opt->_pos = 1; + optc = opt->opt = argv[opt->_i][opt->_pos++]; + opt->_optstr[1] = optc, opt->optstr = opt->_optstr; + p = strchr(shortopts, optc); + if (p == 0) { + optc = '?'; /* unknown option */ + } else if (p[1] == ':') { + if (argv[opt->_i][opt->_pos] != '\0') + opt->arg = &argv[opt->_i][opt->_pos]; + else if (opt->_i < argc - 1 && (p[2] != ':' || argv[opt->_i + 1][0] != '-')) + opt->arg = argv[++opt->_i]; + else if (p[2] != ':') + optc = ':'; + opt->_pos = -1; + } + } + if (opt->_pos < 0 || argv[opt->_i][opt->_pos] == 0) { + ++opt->_i, opt->_pos = 0; + if (opt->_nargs > 0) /* permute */ + for (j = i0; j < opt->_i; ++j) + coption_permute_(argv, j, opt->_nargs); + } + opt->ind = opt->_i - opt->_nargs; + return optc; +} + +#endif // STC_COPTION_H_INCLUDED diff --git a/stc/coroutine.h b/stc/coroutine.h new file mode 100644 index 0000000..bfecc46 --- /dev/null +++ b/stc/coroutine.h @@ -0,0 +1,563 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STC_COROUTINE_H_INCLUDED +#define STC_COROUTINE_H_INCLUDED +/* +#include +#include + +struct iterpair { + cco_base base; // required member + int max_x, max_y; + int x, y; +}; + +int iterpair(struct iterpair* I) { + cco_async (I) { + for (I->x = 0; I->x < I->max_x; I->x++) + for (I->y = 0; I->y < I->max_y; I->y++) + cco_yield; // suspend + } + + puts("done"); + return 0; // CCO_DONE +} + +int main(void) { + struct iterpair it = {.max_x=3, .max_y=3}; + int n = 0; + while (iterpair(&it)) + { + printf("%d %d\n", it.x, it.y); + // example of early stop: + if (++n == 7) cco_stop(&it); // signal to stop/finalize in next + } + return 0; +} +*/ +#include +#include "common.h" + +enum { + CCO_STATE_INIT = 0, + CCO_STATE_DONE = -1, + CCO_STATE_DROP = -2, +}; +enum cco_status { + CCO_DONE = 0, + CCO_YIELD = 1<<12, + CCO_SUSPEND = 1<<13, + CCO_AWAIT = 1<<14, +}; +#define CCO_CANCEL (1U<<30) + +typedef struct { + int launch_count; + int await_count; +} cco_group; // waitgroup + +#define cco_state_struct(Prefix) \ + struct Prefix##_state { \ + int32_t pos:24; \ + bool drop; \ + struct Prefix##_fiber* fb; \ + cco_group* wg; \ + } + +#define cco_is_initial(co) ((co)->base.state.pos == CCO_STATE_INIT) +#define cco_is_done(co) ((co)->base.state.pos == CCO_STATE_DONE) +#define cco_is_active(co) ((co)->base.state.pos != CCO_STATE_DONE) + +#if defined STC_HAS_TYPEOF && STC_HAS_TYPEOF + #define _cco_state(co) __typeof__((co)->base.state) + #define _cco_validate_task_struct(co) \ + c_static_assert(/* error: co->base not first member in task struct */ \ + sizeof((co)->base) == sizeof(cco_base) || \ + offsetof(__typeof__(*(co)), base) == 0) +#else + #define _cco_state(co) cco_state + #define _cco_validate_task_struct(co) (void)0 +#endif + +#define cco_async(co) \ + if (0) goto _resume; \ + else for (_cco_state(co)* _state = (_cco_validate_task_struct(co), (_cco_state(co)*) &(co)->base.state) \ + ; _state->pos != CCO_STATE_DONE \ + ; _state->pos = CCO_STATE_DONE, \ + (void)(sizeof((co)->base) > sizeof(cco_base) && _state->wg ? --_state->wg->launch_count : 0)) \ + _resume: switch (_state->pos) case CCO_STATE_INIT: // thanks, @liigo! + +#define cco_drop /* label */ \ + _state->drop = true; /* FALLTHRU */ \ + case CCO_STATE_DROP +#define cco_cleanup [fix: use cco_drop:] +#define cco_routine [fix: use cco_async] + +#define cco_stop(co) \ + do { \ + cco_state* _s = (cco_state*)&(co)->base.state; \ + if (!_s->drop) { _s->pos = CCO_STATE_DROP; _s->drop = true; } \ + } while (0) + +#define cco_reset_state(co) \ + do { \ + cco_state* _s = (cco_state*)&(co)->base.state; \ + _s->pos = CCO_STATE_INIT, _s->drop = false; \ + } while (0) + +#define cco_return \ + do { \ + _state->pos = (_state->drop ? CCO_STATE_DONE : CCO_STATE_DROP); \ + _state->drop = true; \ + goto _resume; \ + } while (0) + +#define cco_exit() \ + do { \ + _state->pos = CCO_STATE_DONE; \ + goto _resume; \ + } while (0) + +#define cco_yield_v(status) \ + do { \ + _state->pos = __LINE__; return status; \ + case __LINE__:; \ + } while (0) + +#define cco_yield \ + cco_yield_v(CCO_YIELD) + +#define cco_suspend \ + cco_yield_v(CCO_SUSPEND) + +#define cco_await(until) \ + do { \ + _state->pos = __LINE__; /* FALLTHRU */ \ + case __LINE__: if (!(until)) return CCO_AWAIT; \ + } while (0) + +/* cco_await_coroutine(): assumes coroutine returns a status value (int) */ +#define cco_await_coroutine(...) c_MACRO_OVERLOAD(cco_await_coroutine, __VA_ARGS__) +#define cco_await_coroutine_1(corocall) cco_await_coroutine_2(corocall, CCO_DONE) +#define cco_await_coroutine_2(corocall, awaitbits) \ + do { \ + _state->pos = __LINE__; /* FALLTHRU */ \ + case __LINE__: { \ + int _res = corocall; \ + if (_res & ~(awaitbits)) return _res; \ + } \ + } while (0) + +/* cco_run_coroutine(): assumes coroutine returns a status value (int) */ +#define cco_run_coroutine(corocall) \ + while ((1 ? (corocall) : -1) != CCO_DONE) + + +/* + * Tasks and Fibers + */ +struct cco_error { + int32_t code, line; + const char* file; +}; + +#define cco_fiber_struct(Prefix, Env) \ + typedef Env Prefix##_env; \ + struct Prefix##_fiber { \ + struct cco_task* task; \ + Prefix##_env* env; \ + struct cco_task* parent_task; \ + struct cco_task_fiber* next; \ + struct cco_task_state recover_state; \ + struct cco_error err; \ + int awaitbits, status; \ + cco_base base; /* is a coroutine object itself */ \ + } + +/* Define a Task struct */ +#define cco_task_struct(...) c_MACRO_OVERLOAD(cco_task_struct, __VA_ARGS__) +#define cco_task_struct_1(Task) \ + cco_task_struct_2(Task, struct _cco_environment) + +#define cco_task_struct_2(Task, Env) \ + cco_fiber_struct(Task, Env); \ + cco_state_struct(Task); \ + _cco_task_struct(Task) + +#define _cco_task_struct(Task) \ + struct Task; \ + typedef struct { \ + int (*func)(struct Task*); \ + int awaitbits; \ + struct Task##_state state; \ + struct cco_task* parent_task; \ + } Task##_base; \ + struct Task + +/* Base cco_task type */ +typedef cco_state_struct(cco_task) cco_state; +typedef struct { cco_state state; } cco_base; +cco_fiber_struct(cco_task, void); +_cco_task_struct(cco_task) { cco_task_base base; }; +typedef struct cco_task_fiber cco_fiber; +typedef struct cco_task cco_task; + +#define cco_err() (&_state->fb->err) +#define cco_status() (_state->fb->status + 0) +#define cco_fb(task) ((cco_fiber*)(task)->base.state.fb + 0) +#define cco_env(task) (task)->base.state.fb->env +#define cco_set_env(task, the_env) ((task)->base.state.fb->env = the_env) + +#define cco_cast_task(...) \ + ((void)sizeof((__VA_ARGS__)->base.func(__VA_ARGS__)), (cco_task *)(__VA_ARGS__)) + +/* Return with error and unwind await stack; must be recovered in cco_drop section */ +#define cco_throw(error_code) \ + do { \ + cco_fiber* _fb = (cco_fiber*)_state->fb; \ + _fb->err.code = error_code; \ + _fb->err.line = __LINE__; \ + _fb->err.file = __FILE__; \ + cco_return; \ + } while (0) + +#define cco_cancel_fiber(a_fiber) \ + do { \ + cco_fiber* _fb1 = a_fiber; \ + _fb1->err.code = CCO_CANCEL; \ + _fb1->err.line = __LINE__; \ + _fb1->err.file = __FILE__; \ + cco_stop(_fb1->task); \ + } while (0) + +/* Cancel job/task and unwind await stack; MAY be stopped (recovered) in cco_drop section */ +/* Equals cco_throw(CCO_CANCEL) if a_task is in current fiber. */ +#define cco_cancel(a_task) \ + do { \ + cco_fiber* _fb2 = cco_cast_task(a_task)->base.state.fb; \ + cco_cancel_fiber(_fb2); \ + if (_fb2 == (cco_fiber*)_state->fb) goto _resume; \ + } while (0) + +#define cco_cancel_group(waitgroup) \ + _cco_cancel_group((cco_fiber*)_state->fb, waitgroup) + +#define cco_cancel_all() \ + for (cco_fiber *_fbi = _state->fb->next; _fbi != (cco_fiber*)_state->fb; _fbi = _fbi->next) \ + cco_cancel_fiber(_fbi) \ + +/* Recover the thrown error; to be used in cco_drop section upon handling cco_err()->code */ +#define cco_recover \ + do { \ + cco_fiber* _fb = (cco_fiber*)_state->fb; \ + c_assert(_fb->err.code); \ + _fb->task->base.state = _fb->recover_state; \ + _fb->err.code = 0; \ + goto _resume; \ + } while (0) + +/* Asymmetric coroutine await/call */ +#define cco_await_task(...) c_MACRO_OVERLOAD(cco_await_task, __VA_ARGS__) +#define cco_await_task_1(a_task) cco_await_task_2(a_task, CCO_DONE) +#define cco_await_task_2(a_task, _awaitbits) do { \ + { cco_task* _await_task = cco_cast_task(a_task); \ + (void)sizeof(cco_env(a_task) == _state->fb->env); \ + cco_fiber* _fb = (cco_fiber*)_state->fb; \ + _await_task->base.awaitbits = (_awaitbits); \ + _await_task->base.parent_task = _fb->task; \ + _fb->task = _await_task; \ + _await_task->base.state.fb = _fb; \ + } \ + cco_suspend; \ +} while (0) + +/* Symmetric coroutine flow of control transfer */ +#define cco_yield_to(a_task) do { \ + { cco_task* _to_task = cco_cast_task(a_task); \ + (void)sizeof(cco_env(a_task) == _state->fb->env); \ + cco_fiber* _fb = (cco_fiber*)_state->fb; \ + _to_task->base.awaitbits = _fb->task->base.awaitbits; \ + _to_task->base.parent_task = NULL; \ + _fb->task = _to_task; \ + _to_task->base.state.fb = _fb; \ + } \ + cco_suspend; \ +} while (0) + +#define cco_resume(a_task) \ + _cco_resume_task(cco_cast_task(a_task)) + +static inline int _cco_resume_task(cco_task* task) + { return task->base.func(task); } + +/* + * cco_run_fiber()/cco_run_task(): Run fibers/tasks in parallel + */ +#define cco_new_fiber(...) c_MACRO_OVERLOAD(cco_new_fiber, __VA_ARGS__) +#define cco_new_fiber_1(task) \ + _cco_new_fiber(cco_cast_task(task), NULL, NULL) +#define cco_new_fiber_2(task, env) \ + _cco_new_fiber(cco_cast_task(task), ((void)sizeof((env) == cco_env(task)), env), NULL) + +#define cco_spawn(...) c_MACRO_OVERLOAD(cco_spawn, __VA_ARGS__) +#define cco_spawn_1(task) _cco_spawn(cco_cast_task(task), NULL, (cco_fiber*)_state->fb, NULL) +#define cco_spawn_2(task, env) \ + _cco_spawn(cco_cast_task(task), ((void)sizeof((env) == cco_env(task)), env), (cco_fiber*)_state->fb) +#define cco_spawn_3(task, env, fiber) \ + _cco_spawn(cco_cast_task(task), ((void)sizeof((env) == cco_env(task)), env), (cco_fiber*)((void)sizeof((fiber)->parent_task), fiber), NULL) + +#define cco_reset_group(waitgroup) ((waitgroup)->launch_count = 0) +#define cco_launch(...) c_MACRO_OVERLOAD(cco_launch, __VA_ARGS__) +#define cco_launch_2(task, waitgroup) cco_launch_3(task, waitgroup, NULL) +#define cco_launch_3(task, waitgroup, env) do { \ + cco_group* _wg = waitgroup; _wg->launch_count += 1; \ + _cco_spawn(cco_cast_task(task), ((void)sizeof((env) == cco_env(task)), env), (cco_fiber*)_state->fb, _wg); \ +} while (0) + +#define cco_await_all(waitgroup) \ + cco_await((waitgroup)->launch_count == 0); \ + +#define cco_await_n(waitgroup, n) do { \ + const int n_ = n; \ + (waitgroup)->await_count = n_ < 0 ? -n_ : (waitgroup)->launch_count - n_; \ + cco_await((waitgroup)->launch_count == (waitgroup)->await_count); \ +} while (0) + +#define cco_await_any(waitgroup) \ + cco_await_n(waitgroup, 1) + +#define cco_await_cancel(waitgroup) do { \ + /* Note: current fiber must not be in the waitgroup */ \ + cco_cancel_group(waitgroup); \ + cco_await_all(waitgroup); \ +} while (0) + +#define cco_run_fiber(...) c_MACRO_OVERLOAD(cco_run_fiber, __VA_ARGS__) +#define cco_run_fiber_1(fiber_ref) \ + for (cco_fiber** _it_ref = (cco_fiber**)((void)sizeof((fiber_ref)[0]->env), fiber_ref) \ + ; (*_it_ref = cco_execute_next(*_it_ref)) != NULL; ) +#define cco_run_fiber_2(it, fiber) \ + for (cco_fiber* it = (cco_fiber*)((void)sizeof((fiber)->env), fiber) \ + ; (it = cco_execute_next(it)) != NULL; ) + +#define cco_run_task(...) c_MACRO_OVERLOAD(cco_run_task, __VA_ARGS__) +#define cco_run_task_1(task) cco_run_fiber_2(_it_fb, cco_new_fiber_1(task)) +#define cco_run_task_2(task, env) cco_run_fiber_2(_it_fb, cco_new_fiber_2(task, env)) +#define cco_run_task_3(it, task, env) cco_run_fiber_2(it, cco_new_fiber_2(task, env)) + +#define cco_joined() \ + ((cco_fiber*)_state->fb == _state->fb->next) + +extern cco_fiber* _cco_new_fiber(cco_task* task, void* env, cco_group* wg); +extern cco_fiber* _cco_spawn(cco_task* task, void* env, cco_fiber* fb, cco_group* wg); +extern int cco_execute(cco_fiber* fb); // is a coroutine itself +extern cco_fiber* cco_execute_next(cco_fiber* fb); // resume and return the next fiber +extern void _cco_cancel_group(cco_fiber* fb, cco_group* waitgroup); + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement || defined STC_IMPLEMENT +#include + +int cco_execute(cco_fiber* fb) { + cco_async (fb) { + while (1) { + fb->parent_task = fb->task->base.parent_task; + fb->awaitbits = fb->task->base.awaitbits; + fb->status = fb->task->base.func(fb->task); // resume + // Note: if fb->status == CCO_DONE, fb->task may already be destructed. + if (fb->err.code && (fb->status == CCO_DONE || !fb->task->base.state.drop)) { + fb->task = fb->parent_task; + if (fb->task == NULL) + break; + fb->recover_state = fb->task->base.state; + cco_stop(fb->task); + continue; + } + if (!((fb->status & ~fb->awaitbits) || (fb->task = fb->parent_task) != NULL)) + break; + cco_suspend; + } + } + + if ((uint32_t)fb->err.code & ~CCO_CANCEL) { // Allow CCO_CANCEL not to trigger error. + fprintf(stderr, __FILE__ ": error: unhandled coroutine error '%d'\n" + "%s:%d: cco_throw(%d);\n", + fb->err.code, fb->err.file, fb->err.line, fb->err.code); + exit(fb->err.code); + } + return CCO_DONE; +} + +cco_fiber* cco_execute_next(cco_fiber* fb) { + cco_fiber *_next = fb->next, *unlinked; + int ret = cco_execute(_next); + + if (ret == CCO_DONE) { + unlinked = _next; + _next = (_next == fb ? NULL : _next->next); + fb->next = _next; + c_free_n(unlinked, 1); + } + return _next; +} + +void _cco_cancel_group(cco_fiber* fb, cco_group* waitgroup) { + for (cco_fiber *fbi = fb->next; fbi != fb; fbi = fbi->next) { + cco_task* top = fbi->task; + while (top->base.parent_task) + top = top->base.parent_task; + if (top->base.state.wg == waitgroup) + cco_cancel_fiber(fbi); + } +} + +cco_fiber* _cco_new_fiber(cco_task* _task, void* env, cco_group* wg) { + cco_fiber* new_fb = c_new(cco_fiber, {.task=_task, .env=env}); + _task->base.state.fb = new_fb; + _task->base.state.wg = wg; + return (new_fb->next = new_fb); +} + +cco_fiber* _cco_spawn(cco_task* _task, void* env, cco_fiber* fb, cco_group* wg) { + cco_fiber* new_fb; + new_fb = fb->next = (fb->next == NULL ? fb : c_new(cco_fiber, {.next=fb->next})); + new_fb->task = _task; + new_fb->env = (env == NULL ? fb->env : env); + _task->base.state.fb = new_fb; + _task->base.state.wg = wg; + return new_fb; +} + +#undef i_implement +#endif + +/* + * Iterate containers with already defined iterator (prefer to use in coroutines only): + */ +#define cco_each(existing_it, C, cnt) \ + existing_it = C##_begin(&cnt); (existing_it).ref; C##_next(&existing_it) + +#define cco_each_reverse(existing_it, C, cnt) \ + existing_it = C##_rbegin(&cnt); (existing_it).ref; C##_rnext(&existing_it) + +/* + * Using c_filter with coroutine iterators: + */ +#define cco_flt_take(n) \ + (c_flt_take(n), fltbase.done ? (_it.base.state.pos = CCO_STATE_DROP, _it.base.state.drop = 1) : 1) + +#define cco_flt_takewhile(pred) \ + (c_flt_takewhile(pred), fltbase.done ? (_it.base.state.pos = CCO_STATE_DROP, _it.base.state.drop = 1) : 1) + + +/* + * Semaphore + */ + +typedef struct { ptrdiff_t acq_count; } cco_semaphore; + +#define cco_make_semaphore(value) (c_literal(cco_semaphore){value}) +#define cco_set_semaphore(sem, value) ((sem)->acq_count = value) +#define cco_acquire_semaphore(sem) (--(sem)->acq_count) +#define cco_release_semaphore(sem) (++(sem)->acq_count) + +#define cco_await_semaphore(sem) \ + do { \ + cco_await((sem)->acq_count > 0); \ + cco_acquire_semaphore(sem); \ + } while (0) + + +/* + * Timer + */ + +#ifdef _WIN32 + #ifdef __cplusplus + #define _c_LINKC extern "C" __declspec(dllimport) + #else + #define _c_LINKC __declspec(dllimport) + #endif + #ifndef _WINDOWS_ // windows.h + typedef long long LARGE_INTEGER; + _c_LINKC int __stdcall QueryPerformanceCounter(LARGE_INTEGER*); + //_c_LINKC int __stdcall QueryPerformanceFrequency(LARGE_INTEGER*); + #endif + #define cco_timer_freq() 10000000LL /* 1/10th microseconds */ + //static inline long long cco_timer_freq(void) { + // long long quad; + // QueryPerformanceFrequency((LARGE_INTEGER*)&quad); + // return quad; + //} + + static inline long long cco_timer_ticks(void) { + long long quad; + QueryPerformanceCounter((LARGE_INTEGER*)&quad); + return quad; + } +#else + #include + #define cco_timer_freq() 1000000LL + + static inline long long cco_timer_ticks(void) { /* microseconds */ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec*cco_timer_freq() + tv.tv_usec; + } +#endif + +typedef struct { double duration; long long start_time; } cco_timer; + +static inline cco_timer cco_make_timer(double sec) { + cco_timer tm = {.duration=sec, .start_time=cco_timer_ticks()}; + return tm; +} + +static inline void cco_start_timer(cco_timer* tm, double sec) { + tm->duration = sec; + tm->start_time = cco_timer_ticks(); +} + +static inline void cco_restart_timer(cco_timer* tm) { + tm->start_time = cco_timer_ticks(); +} + +static inline double cco_timer_elapsed(cco_timer* tm) { + return (double)(cco_timer_ticks() - tm->start_time)*(1.0/cco_timer_freq()); +} + +static inline bool cco_timer_expired(cco_timer* tm) { + return cco_timer_elapsed(tm) >= tm->duration; +} + +static inline double cco_timer_remaining(cco_timer* tm) { + return tm->duration - cco_timer_elapsed(tm); +} + +#define cco_await_timer(tm, sec) \ + do { \ + cco_start_timer(tm, sec); \ + cco_await(cco_timer_expired(tm)); \ + } while (0) + +#endif // STC_COROUTINE_H_INCLUDED diff --git a/stc/cregex.h b/stc/cregex.h new file mode 100644 index 0000000..0a1ab03 --- /dev/null +++ b/stc/cregex.h @@ -0,0 +1,168 @@ +/* +This is a Unix port of the Plan 9 regular expression library, by Rob Pike. + +Copyright © 2021 Plan 9 Foundation +Copyright © 2022 Tyge Løvset, for additions made in 2022. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef STC_CREGEX_H_INCLUDED +#define STC_CREGEX_H_INCLUDED +/* + * cregex.h + * + * This is a extended version of regexp9, supporting UTF8 input, common + * shorthand character classes, ++. + */ +#include "common.h" +#include "types.h" // csview, cstr types + +enum { + CREG_DEFAULT = 0, + + /* compile-flags */ + CREG_DOTALL = 1<<0, /* dot matches newline too */ + CREG_ICASE = 1<<1, /* ignore case */ + + /* match-flags */ + CREG_FULLMATCH = 1<<2, /* like start-, end-of-line anchors were in pattern: "^ ... $" */ + CREG_NEXT = 1<<3, /* use end of previous match[0] as start of input */ + + /* replace-flags */ + CREG_STRIP = 1<<5, /* only keep the matched strings, strip rest */ + + /* limits */ + CREG_MAX_CLASSES = 16, + CREG_MAX_CAPTURES = 32, +}; + +typedef enum { + CREG_OK = 0, + CREG_NOMATCH = -1, + CREG_MATCHERROR = -2, + CREG_OUTOFMEMORY = -3, + CREG_UNMATCHEDLEFTPARENTHESIS = -4, + CREG_UNMATCHEDRIGHTPARENTHESIS = -5, + CREG_TOOMANYSUBEXPRESSIONS = -6, + CREG_TOOMANYCHARACTERCLASSES = -7, + CREG_MALFORMEDCHARACTERCLASS = -8, + CREG_MISSINGOPERAND = -9, + CREG_UNKNOWNOPERATOR = -10, + CREG_OPERANDSTACKOVERFLOW = -11, + CREG_OPERATORSTACKOVERFLOW = -12, + CREG_OPERATORSTACKUNDERFLOW = -13, +} cregex_result; + +typedef struct { + struct _Reprog* prog; + int error; +} cregex; + +typedef struct { + const cregex* regex; + csview input; + csview match[CREG_MAX_CAPTURES]; +} cregex_iter; + +#define c_match(it, re, str) \ + cregex_iter it = {.regex=re, .input={str}, .match={{0}}}; \ + cregex_match(it.regex, it.input.buf, it.match, CREG_NEXT) == CREG_OK && it.match[0].size; + +#define c_match_sv(it, re, strview) \ + cregex_iter it = {.regex=re, .input=strview, .match={{0}}}; \ + cregex_match_sv(it.regex, it.input, it.match, CREG_NEXT) == CREG_OK && it.match[0].size; + +/* compile a regex from a pattern. return CREG_OK, or negative error code on failure. */ +extern int cregex_compile_pro(cregex *re, const char* pattern, int cflags); + +#define cregex_compile(...) \ + c_ARG_4(__VA_ARGS__, cregex_compile_pro(__VA_ARGS__), cregex_compile_pro(__VA_ARGS__, CREG_DEFAULT), _too_few_args_) + +/* construct and return a regex from a pattern. return CREG_OK, or negative error code on failure. */ +STC_INLINE cregex cregex_make(const char* pattern, int cflags) { + cregex re = {0}; + cregex_compile_pro(&re, pattern, cflags); + return re; +} +STC_INLINE cregex cregex_from(const char* pattern) + { return cregex_make(pattern, CREG_DEFAULT); } + +/* destroy regex */ +extern void cregex_drop(cregex* re); + +/* number of capture groups in a regex pattern, excluding the full match capture (0) */ +extern int cregex_captures(const cregex* re); + +/* ----- Private ----- */ + +struct cregex_match_opt { csview* match; int flags; int _dummy; }; +struct cregex_replace_opt { int count; bool(*xform)(int group, csview match, cstr* out); int flags; int _dummy; }; + +extern int cregex_match_opt(const cregex* re, const char* input, const char* input_end, struct cregex_match_opt opt); +extern int cregex_match_aio_opt(const char* pattern, const char* input, const char* input_end, struct cregex_match_opt opt); +extern cstr cregex_replace_opt(const cregex* re, const char* input, const char* input_end, const char* replace, struct cregex_replace_opt opt); +extern cstr cregex_replace_aio_opt(const char* pattern, const char* input, const char* input_end, const char* replace, struct cregex_replace_opt opt); + +static inline int cregex_match_sv_opt(const cregex* re, csview sv, struct cregex_match_opt opt) + { return cregex_match_opt(re, sv.buf, sv.buf+sv.size, opt); } +static inline int cregex_match_aio_sv_opt(const char* pattern, csview sv, struct cregex_match_opt opt) + { return cregex_match_aio_opt(pattern, sv.buf, sv.buf+sv.size, opt); } +static inline cstr cregex_replace_sv_opt(const cregex* re, csview sv, const char* replace, struct cregex_replace_opt opt) + { return cregex_replace_opt(re, sv.buf, sv.buf+sv.size, replace, opt); } +static inline cstr cregex_replace_aio_sv_opt(const char* pattern, csview sv, const char* replace, struct cregex_replace_opt opt) + { return cregex_replace_aio_opt(pattern, sv.buf, sv.buf+sv.size, replace, opt); } + +/* match: return CREG_OK, CREG_NOMATCH or CREG_MATCHERROR. */ +#define _cregex_match(re, str, ...) cregex_match_opt(re, str, NULL, (struct cregex_match_opt){__VA_ARGS__}) +#define _cregex_match_sv(re, sv, ...) cregex_match_sv_opt(re, sv, (struct cregex_match_opt){__VA_ARGS__}) +/* all-in-one: compile RE pattern + match + free */ +#define _cregex_match_aio(pattern, str, ...) cregex_match_aio_opt(pattern, str, NULL, (struct cregex_match_opt){__VA_ARGS__}) +#define _cregex_match_aio_sv(pattern, sv, ...) cregex_match_aio_sv_opt(pattern, sv, (struct cregex_match_opt){__VA_ARGS__}) + +/* replace input with a string using regular expression */ +#define _cregex_replace(re, str, replace, ...) cregex_replace_opt(re, str, NULL, replace, (struct cregex_replace_opt){__VA_ARGS__}) +#define _cregex_replace_sv(re, sv, replace, ...) cregex_replace_sv_opt(re, sv, replace, (struct cregex_replace_opt){__VA_ARGS__}) +/* all-in-one: compile RE string pattern + match + replace + free */ +#define _cregex_replace_aio(pattern, str, replace, ...) cregex_replace_aio_opt(pattern, str, NULL, replace, (struct cregex_replace_opt){__VA_ARGS__}) +#define _cregex_replace_aio_sv(pattern, sv, replace, ...) cregex_replace_aio_sv_opt(pattern, sv, replace, (struct cregex_replace_opt){__VA_ARGS__}) + +/* ----- API functions ---- */ + +#define cregex_match(...) _cregex_match(__VA_ARGS__, ._dummy=0) +#define cregex_match_sv(...) _cregex_match_sv(__VA_ARGS__, ._dummy=0) +#define cregex_match_aio(...) _cregex_match_aio(__VA_ARGS__, ._dummy=0) +#define cregex_match_aio_sv(...) _cregex_match_aio_sv(__VA_ARGS__, ._dummy=0) +#define cregex_is_match(re, str) (_cregex_match(re, str, 0) == CREG_OK) + +#define cregex_replace(...) _cregex_replace(__VA_ARGS__, ._dummy=0) +#define cregex_replace_sv(...) _cregex_replace_sv(__VA_ARGS__, ._dummy=0) +#define cregex_replace_aio(...) _cregex_replace_aio(__VA_ARGS__, ._dummy=0) +#define cregex_replace_aio_sv(...) _cregex_replace_aio_sv(__VA_ARGS__, ._dummy=0) + +#endif // STC_CREGEX_H_INCLUDED + +#if defined STC_IMPLEMENT || defined i_implement || defined i_import + #include "priv/linkage.h" + #include "priv/cregex_prv.c" + #if defined i_import + #include "priv/utf8_prv.c" + #include "priv/cstr_prv.c" + #endif + #include "priv/linkage2.h" +#endif diff --git a/stc/cspan.h b/stc/cspan.h new file mode 100644 index 0000000..6538518 --- /dev/null +++ b/stc/cspan.h @@ -0,0 +1,479 @@ +/* + MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* +#include +#include +#include +use_cspan(Span2f, float, 2); +use_cspan(Intspan, int); + +int demo1() { + float raw[4*5]; + Span2f ms = cspan_md(raw, 4, 5); + + for (int i=0; ishape[0]==0 ? NULL : self->data, ._s=self}; \ + } \ + STC_INLINE Self##_iter Self##_end(const Self* self) { \ + (void)self; \ + return c_literal(Self##_iter){0}; \ + } \ + STC_INLINE void Self##_next(Self##_iter* it) { \ + isize off = it->_s->stride.d[RANK - 1]; \ + bool done = _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, &off); \ + if (done) it->ref = NULL; else it->ref += off; \ + } \ + STC_INLINE isize Self##_size(const Self* self) \ + { return cspan_size(self); } \ + STC_INLINE Self Self##_transposed(Self sp) \ + { _cspan_transpose(sp.shape, sp.stride.d, cspan_rank(&sp)); return sp; } \ + STC_INLINE Self Self##_swapped_axes(Self sp, int ax1, int ax2) \ + { _cspan_swap_axes(sp.shape, sp.stride.d, cspan_rank(&sp), ax1, ax2); return sp; } \ + struct stc_nostruct + +#define use_cspan_with_eq_4(Self, T, i_eq, RANK) \ + use_cspan_3(Self, T, RANK); \ + STC_INLINE bool Self##_eq(const Self* x, const Self* y) { \ + if (memcmp(x->shape, y->shape, sizeof x->shape) != 0) \ + return false; \ + for (Self##_iter _i = Self##_begin(x), _j = Self##_begin(y); \ + _i.ref != NULL; Self##_next(&_i), Self##_next(&_j)) \ + { if (!(i_eq(_i.ref, _j.ref))) return false; } \ + return true; \ + } \ + STC_INLINE bool Self##_equals(Self sp1, Self sp2) \ + { return Self##_eq(&sp1, &sp2); } \ + struct stc_nostruct + +#define use_cspan2(Self, T) use_cspan_2(Self, T); use_cspan_3(Self##2, T, 2) +#define use_cspan3(Self, T) use_cspan2(Self, T); use_cspan_3(Self##3, T, 3) +#define use_cspan2_with_eq(Self, T, eq) use_cspan_with_eq_3(Self, T, eq); \ + use_cspan_with_eq_4(Self##2, T, eq, 2) +#define use_cspan3_with_eq(Self, T, eq) use_cspan2_with_eq(Self, T, eq); \ + use_cspan_with_eq_4(Self##3, T, eq, 3) +#define use_cspan_tuple(N) typedef struct { _istride d[N]; } cspan_tuple##N +use_cspan_tuple(1); use_cspan_tuple(2); +use_cspan_tuple(3); use_cspan_tuple(4); +use_cspan_tuple(5); use_cspan_tuple(6); +use_cspan_tuple(7); use_cspan_tuple(8); + + +// Construct a cspan from a pointer+size +#define cspan_from_n(dataptr, n) \ + {.data=dataptr, \ + .shape={(_istride)(n)}, \ + .stride=c_literal(cspan_tuple1){.d={1}}} + +// Create a 1d-span in the local lexical scope. N must be a compile-time constant. +#define cspan_by_copy(dataptr, N) \ + cspan_from_n(memcpy((char[(N)*sizeof *(dataptr)]){0}, dataptr, (N)*sizeof *(dataptr)), N) + +// Create a zeroed out 1d-span in the local lexical scope. N must be a compile-time constant. +#define cspan_zeros(Span, N) \ + ((Span)cspan_from_n((Span##_value[N]){0}, N)) + +// Create a global scope 1d-span from constant initializer list, otherwise like c_make(Span, ...). +#define cspan_make(Span, ...) \ + ((Span)cspan_from_n(c_make_array(Span##_value, __VA_ARGS__), \ + sizeof((Span##_value[])__VA_ARGS__)/sizeof(Span##_value))) + +// Make 1d-span from a c-array. +#define cspan_from_array(array) \ + cspan_from_n(array, c_arraylen(array)) + +// Make 1d-span from a vec or stack container. +#define cspan_from_vec(container) \ + cspan_from_n((container)->data, (container)->size) + +// Make a 1d-sub-span from a 1d-span +#define cspan_subspan(self, offset, count) \ + {.data=cspan_at(self, offset), \ + .shape={(_istride)(count)}, \ + .stride=(self)->stride} + +// Accessors +// +#define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self)) +#define cspan_rank(self) c_arraylen((self)->shape) // constexpr +#define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__)) +#define cspan_front(self) ((self)->data) +#define cspan_back(self) ((self)->data + cspan_size(self) - 1) + +#define cspan_index(...) cspan_index_fn(__VA_ARGS__, c_COMMA_N(cspan_index_3d), c_COMMA_N(cspan_index_2d), \ + c_COMMA_N(cspan_index_1d),)(__VA_ARGS__) +#define cspan_index_fn(self, i,j,k,n, ...) c_TUPLE_AT_1(n, cspan_index_nd,) +#define cspan_index_1d(self, i) (c_static_assert(cspan_rank(self) == 1), \ + c_assert((i) < (self)->shape[0]), \ + (i)*(self)->stride.d[0]) +#define cspan_index_2d(self, i,j) (c_static_assert(cspan_rank(self) == 2), \ + c_assert((i) < (self)->shape[0] && (j) < (self)->shape[1]), \ + (i)*(self)->stride.d[0] + (j)*(self)->stride.d[1]) +#define cspan_index_3d(self, i,j,k) (c_static_assert(cspan_rank(self) == 3), \ + c_assert((i) < (self)->shape[0] && (j) < (self)->shape[1] && (k) < (self)->shape[2]), \ + (i)*(self)->stride.d[0] + (j)*(self)->stride.d[1] + (k)*(self)->stride.d[2]) +#define cspan_index_nd(self, ...) _cspan_index((self)->shape, (self)->stride.d, c_make_array(isize, {__VA_ARGS__}), \ + (c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__)), cspan_rank(self))) + + +// Multi-dimensional span constructors +// +typedef enum {c_ROWMAJOR, c_COLMAJOR, c_STRIDED} cspan_layout; + +#define cspan_is_colmajor(self) \ + _cspan_is_layout(c_COLMAJOR, (self)->shape, (self)->stride.d, cspan_rank(self)) +#define cspan_is_rowmajor(self) \ + _cspan_is_layout(c_ROWMAJOR, (self)->shape, (self)->stride.d, cspan_rank(self)) +#define cspan_get_layout(self) \ + (cspan_is_rowmajor(self) ? c_ROWMAJOR : cspan_is_colmajor(self) ? c_COLMAJOR : c_STRIDED) + +#define cspan_md(dataptr, ...) \ + cspan_md_layout(c_ROWMAJOR, dataptr, __VA_ARGS__) + +// Span2 sp1 = cspan_md(data, 30, 50); +// Span2 sp2 = {data, cspan_shape(15, 25), cspan_strides(50*2, 2)}; // every second in each dim +#define cspan_shape(...) {__VA_ARGS__} +#define cspan_strides(...) {.d={__VA_ARGS__}} + +#define cspan_md_layout(layout, dataptr, ...) \ + {.data=dataptr, \ + .shape={__VA_ARGS__}, \ + .stride=*(c_JOIN(cspan_tuple,c_NUMARGS(__VA_ARGS__))*) \ + _cspan_shape2stride(layout, c_make_array(_istride, {__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} + +// Transpose matrix +#define cspan_transpose(self) \ + _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self)) + +// Swap two matrix axes +#define cspan_swap_axes(self, ax1, ax2) \ + _cspan_swap_axes((self)->shape, (self)->stride.d, cspan_rank(self), ax1, ax2) + +// Set all span elements to value. +#define cspan_set_all(Span, self, value) do { \ + Span##_value _v = value; \ + for (c_each_3(_it, Span, *(self))) *_it.ref = _v; \ +} while (0) + +// General slicing function. +// +#define c_END (_istride)(((size_t)1 << (sizeof(_istride)*8 - 1)) - 1) +#define c_ALL 0,c_END + +#define cspan_slice(self, Outspan, ...) \ + Outspan##_slice_((self)->data, (self)->shape, (self)->stride.d, \ + c_make_array2d(const isize, 3, {__VA_ARGS__}), \ + (c_static_assert(cspan_rank(self) == sizeof((isize[][3]){__VA_ARGS__})/sizeof(isize[3])), cspan_rank(self))) + +// submd#(): Reduces rank, fully typesafe + range checked by default +// int ms3[N1][N2][N3]; +// int (*ms2)[N3] = ms3[1]; // traditional, lose range test/info. VLA. +// Span3 ms3 = cspan_md(data, N1,N2,N3); // Uses cspan_md instead. +// *cspan_at(&ms3, 1,1,1) = 42; +// Span2 ms2 = cspan_slice(&ms3, Span2, {1}, {c_ALL}, {c_ALL}); +// Span2 ms2 = cspan_submd3(&ms3, 1); // Same as line above, optimized. +#define cspan_submd2(self, x) \ + {.data=cspan_at(self, x, 0), \ + .shape={(self)->shape[1]}, \ + .stride=c_literal(cspan_tuple1){.d={(self)->stride.d[1]}}} + +#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__) +#define cspan_submd3_2(self, x) \ + {.data=cspan_at(self, x, 0, 0), \ + .shape={(self)->shape[1], (self)->shape[2]}, \ + .stride=c_literal(cspan_tuple2){.d={(self)->stride.d[1], (self)->stride.d[2]}}} +#define cspan_submd3_3(self, x, y) \ + {.data=cspan_at(self, x, y, 0), \ + .shape={(self)->shape[2]}, \ + .stride=c_literal(cspan_tuple1){.d={(self)->stride.d[2]}}} + +#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__) +#define cspan_submd4_2(self, x) \ + {.data=cspan_at(self, x, 0, 0, 0), \ + .shape={(self)->shape[1], (self)->shape[2], (self)->shape[3]}, \ + .stride=c_literal(cspan_tuple3){.d={(self)->stride.d[1], (self)->stride.d[2], (self)->stride.d[3]}}} +#define cspan_submd4_3(self, x, y) \ + {.data=cspan_at(self, x, y, 0, 0), \ + .shape={(self)->shape[2], (self)->shape[3]}, \ + .stride=c_literal(cspan_tuple2){.d={(self)->stride.d[2], (self)->stride.d[3]}}} +#define cspan_submd4_4(self, x, y, z) \ + {.data=cspan_at(self, x, y, z, 0), \ + .shape={(self)->shape[3]}, \ + .stride=c_literal(cspan_tuple1){.d={(self)->stride.d[3]}}} + +#define cspan_print(...) c_MACRO_OVERLOAD(cspan_print, __VA_ARGS__) +#define cspan_print_3(Span, fmt, span) \ + cspan_print_4(Span, fmt, span, stdout) +#define cspan_print_4(Span, fmt, span, fp) \ + cspan_print_5(Span, fmt, span, fp, "[]") +#define cspan_print_5(Span, fmt, span, fp, brackets) \ + cspan_print_6(Span, fmt, span, fp, brackets, c_EXPAND) +#define cspan_print_complex(Span, prec, span, fp) \ + cspan_print_6(Span, "%." #prec "f%+." #prec "fi", span, fp, "[]", cspan_CMPLX_FLD) +#define cspan_CMPLX_FLD(x) creal(x), cimag(x) + +#define cspan_print_6(Span, fmt, span, fp, brackets, field) do { \ + const Span _s = span; \ + const char *_f = fmt, *_b = brackets; \ + FILE* _fp = fp; \ + int _w, _max = 0; \ + char _res[2][20], _fld[64]; \ + for (c_each_3(_it, Span, _s)) { \ + _w = snprintf(NULL, 0ULL, _f, field(_it.ref[0])); \ + if (_w > _max) _max = _w; \ + } \ + for (c_each_3(_it, Span, _s)) { \ + _cspan_print_assist(_it.pos, _s.shape, cspan_rank(&_s), _b, _res); \ + _w = _max + (_it.pos[cspan_rank(&_s) - 1] > 0); \ + snprintf(_fld, sizeof _fld, _f, field(_it.ref[0])); \ + fprintf(_fp, "%s%*s%s", _res[0], _w, _fld, _res[1]); \ + } \ +} while (0) + +/* ----- PRIVATE ----- */ + +STC_INLINE isize _cspan_size(const _istride shape[], int rank) { + isize size = shape[0]; + while (--rank) size *= shape[rank]; + return size; +} + +STC_INLINE void _cspan_swap_axes(_istride shape[], _istride stride[], + int rank, int ax1, int ax2) { + (void)rank; + c_assert(c_uless(ax1, rank) & c_uless(ax2, rank)); + c_swap(shape + ax1, shape + ax2); + c_swap(stride + ax1, stride + ax2); +} + +STC_INLINE void _cspan_transpose(_istride shape[], _istride stride[], int rank) { + for (int i = 0; i < --rank; ++i) { + c_swap(shape + i, shape + rank); + c_swap(stride + i, stride + rank); + } +} + +STC_INLINE isize _cspan_index(const _istride shape[], const _istride stride[], + const isize args[], int rank) { + isize off = 0; + (void)shape; + while (rank-- != 0) { + c_assert(args[rank] < shape[rank]); + off += args[rank]*stride[rank]; + } + return off; +} + +STC_API void _cspan_print_assist(_istride pos[], const _istride shape[], const int rank, + const char* brackets, char result[2][20]); + +STC_API bool _cspan_nextN(_istride pos[], const _istride shape[], const _istride stride[], + int rank, isize* off); +#define _cspan_next1(pos, shape, stride, rank, off) (++pos[0] == shape[0]) +#define _cspan_next2(pos, shape, stride, rank, off) (++pos[1] == shape[1] && \ + (pos[1] = 0, *off += stride[0] - (isize)shape[1]*stride[1], ++pos[0] == shape[0])) +#define _cspan_next3(pos, shape, stride, rank, off) (++pos[2] == shape[2] && \ + (pos[2] = 0, *off += stride[1] - (isize)shape[2]*stride[2], ++pos[1] == shape[1]) && \ + (pos[1] = 0, *off += stride[0] - (isize)shape[1]*stride[1], ++pos[0] == shape[0])) +#define _cspan_next4 _cspan_nextN +#define _cspan_next5 _cspan_nextN +#define _cspan_next6 _cspan_nextN +#define _cspan_next7 _cspan_nextN +#define _cspan_next8 _cspan_nextN + +STC_API isize _cspan_slice(_istride oshape[], _istride ostride[], int* orank, + const _istride shape[], const _istride stride[], + const isize args[][3], int rank); +STC_API _istride* _cspan_shape2stride(cspan_layout layout, _istride shape[], int rank); +STC_API bool _cspan_is_layout(cspan_layout layout, const _istride shape[], const _istride strides[], int rank); + +#endif // STC_CSPAN_H_INCLUDED + +/* --------------------- IMPLEMENTATION --------------------- */ +#if defined i_implement + +STC_DEF bool _cspan_is_layout(cspan_layout layout, const _istride shape[], const _istride strides[], int rank) { + _istride tmpshape[16]; // 16 = "max" rank + size_t sz = (size_t)rank*sizeof(_istride); + memcpy(tmpshape, shape, sz); + return memcmp(strides, _cspan_shape2stride(layout, tmpshape, rank), sz) == 0; +} + +STC_DEF void _cspan_print_assist(_istride pos[], const _istride shape[], const int rank, + const char* brackets, char result[2][20]) { + int n = 0, j = 0, r = rank - 1; + memset(result, 0, 32); + + // left braces: + while (n <= r && pos[r - n] == 0) + ++n; + if (n) for (; j < rank; ++j) + result[0][j] = j < rank - n ? ' ' : brackets[0]; + + // right braces: + for (j = 0; r >= 0 && pos[r] + 1 == shape[r]; --r, ++j) + result[1][j] = brackets[1]; + + // comma and newlines: + n = (j > 0) + ((j > 1) & (j < rank)); + if (brackets[2] && j < rank) + result[1][j++] = brackets[2]; // comma + while (n--) + result[1][j++] = '\n'; +} + +STC_DEF bool _cspan_nextN(_istride pos[], const _istride shape[], const _istride stride[], + int rank, isize* off) { + ++pos[--rank]; + for (; rank && pos[rank] == shape[rank]; --rank) { + pos[rank] = 0; ++pos[rank - 1]; + *off += stride[rank - 1] - (isize)shape[rank]*stride[rank]; + } + return pos[rank] == shape[rank]; +} + +STC_DEF _istride* _cspan_shape2stride(cspan_layout layout, _istride shpstri[], int rank) { + int i, inc; + if (layout == c_COLMAJOR) i = 0, inc = 1; + else i = rank - 1, inc = -1; + _istride k = 1, s1 = shpstri[i], s2; + + shpstri[i] = 1; + while (--rank) { + i += inc; + s2 = shpstri[i]; + shpstri[i] = (k *= s1); + s1 = s2; + } + return shpstri; +} + +STC_DEF isize _cspan_slice(_istride oshape[], _istride ostride[], int* orank, + const _istride shape[], const _istride stride[], + const isize args[][3], int rank) { + isize end, off = 0; + int i = 0, oi = 0; + + for (; i < rank; ++i) { + off += args[i][0]*stride[i]; + switch (args[i][1]) { + case 0: c_assert(c_uless(args[i][0], shape[i])); continue; + case c_END: end = shape[i]; break; + default: end = args[i][1]; + } + oshape[oi] = (_istride)(end - args[i][0]); + ostride[oi] = stride[i]; + c_assert((oshape[oi] > 0) & !c_uless(shape[i], end)); + if (args[i][2] > 0) { + ostride[oi] *= (_istride)args[i][2]; + oshape[oi] = (oshape[oi] - 1)/(_istride)args[i][2] + 1; + } + ++oi; + } + *orank = oi; + return off; +} +#endif // IMPLEMENT +#include "priv/linkage2.h" diff --git a/stc/cstr.h b/stc/cstr.h new file mode 100644 index 0000000..d7c3556 --- /dev/null +++ b/stc/cstr.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* A string type with short string optimization in C99. + * Stores up to a 22 bytes long string inside a 24 bytes string representation (x64). + */ +#define i_header // external linkage by default. override with i_static. +#include "priv/linkage.h" + +#ifndef STC_CSTR_H_INCLUDED +#define STC_CSTR_H_INCLUDED + +#include "common.h" +#include "types.h" +#include "priv/utf8_prv.h" +#include "priv/cstr_prv.h" + +#endif // STC_CSTR_H_INCLUDED + +#if defined i_implement || \ + defined STC_CSTR_CORE || \ + defined STC_CSTR_IO || \ + defined STC_CSTR_UTF8 + #include "priv/cstr_prv.c" +#endif // i_implement + +#if defined i_import || defined STC_CSTR_UTF8 + #include "priv/utf8_prv.c" +#endif + +#include "priv/linkage2.h" diff --git a/stc/csview.h b/stc/csview.h new file mode 100644 index 0000000..7b88cb0 --- /dev/null +++ b/stc/csview.h @@ -0,0 +1,246 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// csview is a non-zero-terminated string view. + +#ifndef STC_CSVIEW_H_INCLUDED +#define STC_CSVIEW_H_INCLUDED + +#include "common.h" +#include "types.h" +#include "priv/utf8_prv.h" + +#define csview_init() c_sv_1("") +#define csview_drop(p) c_default_drop(p) +#define csview_clone(sv) c_default_clone(sv) + +csview_iter csview_advance(csview_iter it, isize u8pos); +csview csview_subview_pro(csview sv, isize pos, isize n); +csview csview_token(csview sv, const char* sep, isize* pos); +csview csview_u8_subview(csview sv, isize u8pos, isize u8len); +csview csview_u8_tail(csview sv, isize u8len); +csview_iter csview_u8_at(csview sv, isize u8pos); + +STC_INLINE csview csview_from(const char* str) + { return c_literal(csview){str, c_strlen(str)}; } +STC_INLINE csview csview_from_n(const char* str, isize n) + { return c_literal(csview){str, n}; } + +STC_INLINE void csview_clear(csview* self) { *self = csview_init(); } +STC_INLINE isize csview_size(csview sv) { return sv.size; } +STC_INLINE bool csview_is_empty(csview sv) { return sv.size == 0; } + +STC_INLINE bool csview_equals_sv(csview sv1, csview sv2) + { return sv1.size == sv2.size && !c_memcmp(sv1.buf, sv2.buf, sv1.size); } + +STC_INLINE bool csview_equals(csview sv, const char* str) + { return csview_equals_sv(sv, c_sv_2(str, c_strlen(str))); } + +STC_INLINE size_t csview_hash(const csview *self) + { return c_basehash_n(self->buf, self->size); } + +STC_INLINE isize csview_find_sv(csview sv, csview search) { + char* res = c_strnstrn(sv.buf, sv.size, search.buf, search.size); + return res ? (res - sv.buf) : c_NPOS; +} + +STC_INLINE isize csview_find(csview sv, const char* str) + { return csview_find_sv(sv, c_sv_2(str, c_strlen(str))); } + +STC_INLINE bool csview_contains(csview sv, const char* str) + { return csview_find(sv, str) != c_NPOS; } + +STC_INLINE bool csview_starts_with(csview sv, const char* str) { + isize n = c_strlen(str); + return n <= sv.size && !c_memcmp(sv.buf, str, n); +} + +STC_INLINE bool csview_ends_with(csview sv, const char* str) { + isize n = c_strlen(str); + return n <= sv.size && !c_memcmp(sv.buf + sv.size - n, str, n); +} + +STC_INLINE csview csview_subview(csview sv, isize pos, isize len) { + c_assert(((size_t)pos <= (size_t)sv.size) & (len >= 0)); + if (pos + len > sv.size) len = sv.size - pos; + sv.buf += pos, sv.size = len; + return sv; +} + +STC_INLINE csview csview_slice(csview sv, isize p1, isize p2) { + c_assert(((size_t)p1 <= (size_t)p2) & ((size_t)p1 <= (size_t)sv.size)); + if (p2 > sv.size) p2 = sv.size; + sv.buf += p1, sv.size = p2 - p1; + return sv; +} + +STC_INLINE csview csview_trim_start(csview sv) + { while (sv.size && *sv.buf <= ' ') ++sv.buf, --sv.size; return sv; } + +STC_INLINE csview csview_trim_end(csview sv) + { while (sv.size && sv.buf[sv.size - 1] <= ' ') --sv.size; return sv; } + +STC_INLINE csview csview_trim(csview sv) + { return csview_trim_end(csview_trim_start(sv)); } + +STC_INLINE csview csview_tail(csview sv, isize len) + { return csview_subview(sv, sv.size - len, len); } + +/* utf8 iterator */ +STC_INLINE csview_iter csview_begin(const csview* self) { + csview_iter it = {.u8 = {{self->buf, utf8_chr_size(self->buf)}, + self->buf + self->size}}; + return it; +} +STC_INLINE csview_iter csview_end(const csview* self) { + (void)self; csview_iter it = {0}; return it; +} +STC_INLINE void csview_next(csview_iter* it) { + it->ref += it->chr.size; + it->chr.size = utf8_chr_size(it->ref); + if (it->ref == it->u8.end) it->ref = NULL; +} + +/* utf8 */ +STC_INLINE csview csview_u8_from(const char* str, isize u8pos, isize u8len) + { return utf8_subview(str, u8pos, u8len); } + +STC_INLINE isize csview_u8_size(csview sv) + { return utf8_count_n(sv.buf, sv.size); } + +STC_INLINE bool csview_u8_valid(csview sv) // requires linking with utf8 symbols + { return utf8_valid_n(sv.buf, sv.size); } + +#define c_fortoken(...) for (c_token(__VA_ARGS__)) // [deprecated] + +#define c_token_sv(it, separator, sv) \ + struct { csview input, token; const char* sep; isize pos; } \ + it = {.input=sv, .sep=separator} ; \ + it.pos <= it.input.size && (it.token = csview_token(it.input, it.sep, &it.pos)).buf ; + +#define c_token(it, separator, str) \ + c_token_sv(it, separator, csview_from(str)) + +/* ---- Container helper functions ---- */ + +STC_INLINE int csview_cmp(const csview* x, const csview* y) { + isize n = x->size < y->size ? x->size : y->size; + int c = c_memcmp(x->buf, y->buf, n); + return c ? c : c_default_cmp(&x->size, &y->size); +} + +STC_INLINE bool csview_eq(const csview* x, const csview* y) + { return x->size == y->size && !c_memcmp(x->buf, y->buf, x->size); } + +/* ---- case insensitive ---- */ + +STC_INLINE bool csview_iequals_sv(csview sv1, csview sv2) + { return sv1.size == sv2.size && !utf8_icompare(sv1, sv2); } + +STC_INLINE bool csview_iequals(csview sv, const char* str) + { return csview_iequals_sv(sv, c_sv(str, c_strlen(str))); } + +STC_INLINE bool csview_ieq(const csview* x, const csview* y) + { return csview_iequals_sv(*x, *y); } + +STC_INLINE int csview_icmp(const csview* x, const csview* y) + { return utf8_icompare(*x, *y); } + +STC_INLINE bool csview_istarts_with(csview sv, const char* str) { + isize n = c_strlen(str); + return n <= sv.size && !utf8_icompare(sv, c_sv(str, n)); +} + +STC_INLINE bool csview_iends_with(csview sv, const char* str) { + isize n = c_strlen(str); + return n <= sv.size && !utf8_icmp(sv.buf + sv.size - n, str); +} + +#endif // STC_CSVIEW_H_INCLUDED + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined STC_IMPLEMENT || defined i_implement +#ifndef STC_CSVIEW_C_INCLUDED +#define STC_CSVIEW_C_INCLUDED + +csview_iter csview_advance(csview_iter it, isize u8pos) { + int inc = 1; + if (u8pos < 0) u8pos = -u8pos, inc = -1; + while (u8pos && it.ref != it.u8.end) + u8pos -= (*(it.ref += inc) & 0xC0) != 0x80; + if (it.ref == it.u8.end) it.ref = NULL; + else it.chr.size = utf8_chr_size(it.ref); + return it; +} + +csview csview_subview_pro(csview sv, isize pos, isize len) { + if (pos < 0) { + pos += sv.size; + if (pos < 0) pos = 0; + } + if (pos > sv.size) pos = sv.size; + if (pos + len > sv.size) len = sv.size - pos; + sv.buf += pos, sv.size = len; + return sv; +} + +csview csview_token(csview sv, const char* sep, isize* pos) { + isize sep_size = c_strlen(sep); + csview slice = {sv.buf + *pos, sv.size - *pos}; + const char* res = c_strnstrn(slice.buf, slice.size, sep, sep_size); + csview tok = {slice.buf, res ? (res - slice.buf) : slice.size}; + *pos += tok.size + sep_size; + return tok; +} + +csview csview_u8_subview(csview sv, isize u8pos, isize u8len) { + const char* s, *end = &sv.buf[sv.size]; + while ((u8pos > 0) & (sv.buf != end)) + u8pos -= (*++sv.buf & 0xC0) != 0x80; + s = sv.buf; + while ((u8len > 0) & (s != end)) + u8len -= (*++s & 0xC0) != 0x80; + sv.size = s - sv.buf; return sv; +} + +csview csview_u8_tail(csview sv, isize u8len) { + const char* p = &sv.buf[sv.size]; + while (u8len && p != sv.buf) + u8len -= (*--p & 0xC0) != 0x80; + sv.size -= p - sv.buf, sv.buf = p; + return sv; +} + +csview_iter csview_u8_at(csview sv, isize u8pos) { + const char *end = &sv.buf[sv.size]; + while ((u8pos > 0) & (sv.buf != end)) + u8pos -= (*++sv.buf & 0xC0) != 0x80; + sv.size = utf8_chr_size(sv.buf); + c_assert(sv.buf != end); + return c_literal(csview_iter){.u8 = {sv, end}}; +} +#endif // STC_CSVIEW_C_INCLUDED +#endif // i_implement + +#if defined i_import + #include "priv/utf8_prv.c" +#endif diff --git a/stc/deque.h b/stc/deque.h new file mode 100644 index 0000000..4573403 --- /dev/null +++ b/stc/deque.h @@ -0,0 +1,205 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// Deque - double ended queue. Implemented as a ring buffer, extension of queue. + +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_DEQUE_H_INCLUDED +#define STC_DEQUE_H_INCLUDED +#include "common.h" +#include +#endif // STC_DEQUE_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix deque_ +#endif +#define _pop _pop_front +#define _pull _pull_front +#include "priv/template.h" +#include "priv/queue_prv.h" +#undef _pop +#undef _pull + +STC_API _m_value* _c_MEMB(_push_front)(Self* self, _m_value value); +STC_API _m_iter _c_MEMB(_insert_n)(Self* self, isize idx, const _m_value* arr, isize n); +STC_API _m_iter _c_MEMB(_insert_uninit)(Self* self, isize idx, isize n); +STC_API void _c_MEMB(_erase_n)(Self* self, isize idx, isize n); + +STC_INLINE const _m_value* +_c_MEMB(_at)(const Self* self, isize idx) { + c_assert(c_uless(idx, _c_MEMB(_size)(self))); + return self->cbuf + _cbuf_topos(self, idx); +} + +STC_INLINE _m_value* +_c_MEMB(_at_mut)(Self* self, isize idx) + { return (_m_value*)_c_MEMB(_at)(self, idx); } + +STC_INLINE _m_value* +_c_MEMB(_push_back)(Self* self, _m_value val) + { return _c_MEMB(_push)(self, val); } + +STC_INLINE void +_c_MEMB(_pop_back)(Self* self) { + c_assert(!_c_MEMB(_is_empty)(self)); + self->end = (self->end - 1) & self->capmask; + i_keydrop((self->cbuf + self->end)); +} + +STC_INLINE _m_value _c_MEMB(_pull_back)(Self* self) { // move back out of deque + c_assert(!_c_MEMB(_is_empty)(self)); + self->end = (self->end - 1) & self->capmask; + return self->cbuf[self->end]; +} + +STC_INLINE _m_iter +_c_MEMB(_insert_at)(Self* self, _m_iter it, const _m_value val) { + isize idx = _cbuf_toidx(self, it.pos); + return _c_MEMB(_insert_n)(self, idx, &val, 1); +} + +STC_INLINE _m_iter +_c_MEMB(_erase_at)(Self* self, _m_iter it) { + _c_MEMB(_erase_n)(self, _cbuf_toidx(self, it.pos), 1); + if (it.pos == self->end) it.ref = NULL; + return it; +} + +STC_INLINE _m_iter +_c_MEMB(_erase_range)(Self* self, _m_iter it1, _m_iter it2) { + isize idx1 = _cbuf_toidx(self, it1.pos); + isize idx2 = _cbuf_toidx(self, it2.pos); + _c_MEMB(_erase_n)(self, idx1, idx2 - idx1); + if (it1.pos == self->end) it1.ref = NULL; + return it1; +} + +#if !defined i_no_emplace +STC_API _m_iter +_c_MEMB(_emplace_n)(Self* self, isize idx, const _m_raw* raw, isize n); + +STC_INLINE _m_value* +_c_MEMB(_emplace_front)(Self* self, const _m_raw raw) + { return _c_MEMB(_push_front)(self, i_keyfrom(raw)); } + +STC_INLINE _m_value* +_c_MEMB(_emplace_back)(Self* self, const _m_raw raw) + { return _c_MEMB(_push)(self, i_keyfrom(raw)); } + +STC_INLINE _m_iter +_c_MEMB(_emplace_at)(Self* self, _m_iter it, const _m_raw raw) + { return _c_MEMB(_insert_at)(self, it, i_keyfrom(raw)); } +#endif + +#if defined _i_has_eq +STC_API _m_iter _c_MEMB(_find_in)(const Self* self, _m_iter p1, _m_iter p2, _m_raw raw); + +STC_INLINE _m_iter +_c_MEMB(_find)(const Self* self, _m_raw raw) { + return _c_MEMB(_find_in)(self, _c_MEMB(_begin)(self), _c_MEMB(_end)(self), raw); +} +#endif // _i_has_eq + +#if defined _i_has_cmp +#include "priv/sort_prv.h" +#endif // _i_has_cmp + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF _m_value* +_c_MEMB(_push_front)(Self* self, _m_value value) { + isize start = (self->start - 1) & self->capmask; + if (start == self->end) { // full + if (!_c_MEMB(_reserve)(self, self->capmask + 3)) // => 2x expand + return NULL; + start = (self->start - 1) & self->capmask; + } + _m_value *v = self->cbuf + start; + self->start = start; + *v = value; + return v; +} + +STC_DEF void +_c_MEMB(_erase_n)(Self* self, const isize idx, const isize n) { + const isize len = _c_MEMB(_size)(self); + c_assert(idx + n <= len); + for (isize i = idx + n - 1; i >= idx; --i) + i_keydrop(_c_MEMB(_at_mut)(self, i)); + for (isize i = idx, j = i + n; j < len; ++i, ++j) + *_c_MEMB(_at_mut)(self, i) = *_c_MEMB(_at)(self, j); + self->end = (self->end - n) & self->capmask; +} + +STC_DEF _m_iter +_c_MEMB(_insert_uninit)(Self* self, const isize idx, const isize n) { + const isize len = _c_MEMB(_size)(self); + _m_iter it = {._s=self}; + if (len + n >= self->capmask) + if (!_c_MEMB(_reserve)(self, len + n)) // minimum 2x expand + return it; + it.pos = _cbuf_topos(self, idx); + it.ref = self->cbuf + it.pos; + self->end = (self->end + n) & self->capmask; + + if (it.pos < self->end) // common case because of reserve policy + c_memmove(it.ref + n, it.ref, (len - idx)*c_sizeof *it.ref); + else for (isize i = len - 1, j = i + n; i >= idx; --i, --j) + *_c_MEMB(_at_mut)(self, j) = *_c_MEMB(_at)(self, i); + return it; +} + +STC_DEF _m_iter +_c_MEMB(_insert_n)(Self* self, const isize idx, const _m_value* arr, const isize n) { + _m_iter it = _c_MEMB(_insert_uninit)(self, idx, n); + for (isize i = idx, j = 0; j < n; ++i, ++j) + *_c_MEMB(_at_mut)(self, i) = arr[j]; + return it; +} + +#if !defined i_no_emplace +STC_DEF _m_iter +_c_MEMB(_emplace_n)(Self* self, const isize idx, const _m_raw* raw, const isize n) { + _m_iter it = _c_MEMB(_insert_uninit)(self, idx, n); + for (isize i = idx, j = 0; j < n; ++i, ++j) + *_c_MEMB(_at_mut)(self, i) = i_keyfrom(raw[j]); + return it; +} +#endif + +#if defined _i_has_eq +STC_DEF _m_iter +_c_MEMB(_find_in)(const Self* self, _m_iter i1, _m_iter i2, _m_raw raw) { + (void)self; + for (; i1.ref != i2.ref; _c_MEMB(_next)(&i1)) { + const _m_raw r = i_keytoraw(i1.ref); + if (i_eq((&raw), (&r))) + break; + } + return i1; +} +#endif +#endif // IMPLEMENTATION +#include "sys/finalize.h" diff --git a/stc/hashmap.h b/stc/hashmap.h new file mode 100644 index 0000000..5eaaf9d --- /dev/null +++ b/stc/hashmap.h @@ -0,0 +1,43 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvmap + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Unordered map - implemented with the robin-hood hashing scheme. +/* +#define T IMap, int, int +#include +#include + +int main(void) { + IMap map = c_make(IMap, {{12, 32}, {42, 54}}); + IMap_insert(&map, 5, 15); + IMap_insert(&map, 8, 18); + + for (c_each_kv(k, v, IMap, map)) + printf(" %d -> %d\n", *k, *v); + + IMap_drop(&map); +} +*/ + +#define _i_prefix hmap_ +#include "hmap.h" diff --git a/stc/hashset.h b/stc/hashset.h new file mode 100644 index 0000000..76a858d --- /dev/null +++ b/stc/hashset.h @@ -0,0 +1,44 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Unordered set - implemented with the robin-hood hashing scheme. +/* +#define T ISet, int +#include +#include + +int main(void) { + ISet set = {0}; + ISet_insert(&set, 5); + ISet_insert(&set, 8); + + for (c_each(i, ISet, set)) + printf(" %d\n", *i.ref); + + ISet_drop(&set); +} +*/ + +#define _i_prefix hset_ +#define _i_is_set +#include "hmap.h" diff --git a/stc/hmap.h b/stc/hmap.h new file mode 100644 index 0000000..89418db --- /dev/null +++ b/stc/hmap.h @@ -0,0 +1,513 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Unordered set/map - implemented with the robin-hood hashing scheme. +/* +#include + +#define T icmap, int, char +#include + +int main(void) { + icmap m = {0}; + icmap_emplace(&m, 5, 'a'); + icmap_emplace(&m, 8, 'b'); + icmap_emplace(&m, 12, 'c'); + + icmap_value* v = icmap_get(&m, 10); // NULL + char val = *icmap_at(&m, 5); // 'a' + icmap_emplace_or_assign(&m, 5, 'd'); // update + icmap_erase(&m, 8); + + for (c_each(i, icmap, m)) + printf("map %d: %c\n", i.ref->first, i.ref->second); + + icmap_drop(&m); +} +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_HMAP_H_INCLUDED +#define STC_HMAP_H_INCLUDED +#include "common.h" +#include +#define _hashmask 0x3fU +#define _distmask 0x3ffU +struct hmap_meta { uint16_t hashx:6, dist:10; }; // dist: 0=empty, 1=PSL 0, 2=PSL 1, ... +#endif // STC_HMAP_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix hmap_ +#endif +#ifndef _i_is_set + #define _i_is_map + #define _i_MAP_ONLY c_true + #define _i_SET_ONLY c_false + #define _i_keyref(vp) (&(vp)->first) +#else + #define _i_MAP_ONLY c_false + #define _i_SET_ONLY c_true + #define _i_keyref(vp) (vp) +#endif +#define _i_is_hash +#include "priv/template.h" +#ifndef i_declared + _c_DEFTYPES(_declare_htable, Self, i_key, i_val, _i_MAP_ONLY, _i_SET_ONLY, _i_aux_def); +#endif + +_i_MAP_ONLY( struct _m_value { + _m_key first; + _m_mapped second; +}; ) + +typedef i_keyraw _m_keyraw; +typedef i_valraw _m_rmapped; +typedef _i_SET_ONLY( i_keyraw ) + _i_MAP_ONLY( struct { _m_keyraw first; + _m_rmapped second; } ) +_m_raw; + +#if !defined i_no_clone +STC_API Self _c_MEMB(_clone)(Self map); +#endif +STC_API void _c_MEMB(_drop)(const Self* cself); +STC_API void _c_MEMB(_clear)(Self* self); +STC_API bool _c_MEMB(_reserve)(Self* self, isize capacity); +STC_API void _c_MEMB(_erase_entry)(Self* self, _m_value* val); +STC_API float _c_MEMB(_max_load_factor)(const Self* self); +STC_API isize _c_MEMB(_capacity)(const Self* map); +STC_API _m_result _c_MEMB(_bucket_lookup_)(const Self* self, const _m_keyraw* rkeyptr); +STC_API _m_result _c_MEMB(_bucket_insert_)(const Self* self, const _m_keyraw* rkeyptr); + +STC_INLINE bool _c_MEMB(_is_empty)(const Self* map) { return !map->size; } +STC_INLINE isize _c_MEMB(_size)(const Self* map) { return (isize)map->size; } +STC_INLINE isize _c_MEMB(_bucket_count)(Self* map) { return map->bucket_count; } +STC_INLINE bool _c_MEMB(_contains)(const Self* self, _m_keyraw rkey) + { return self->size && _c_MEMB(_bucket_lookup_)(self, &rkey).ref; } +STC_INLINE void _c_MEMB(_shrink_to_fit)(Self* self) + { _c_MEMB(_reserve)(self, (isize)self->size); } + +#ifndef i_max_load_factor + #define i_max_load_factor 0.80f +#endif + +STC_INLINE _m_result +_c_MEMB(_insert_entry_)(Self* self, _m_keyraw rkey) { + if (self->size >= (isize)((float)self->bucket_count * (i_max_load_factor))) + if (!_c_MEMB(_reserve)(self, (isize)(self->size*3/2 + 2))) + return c_literal(_m_result){0}; + + _m_result res = _c_MEMB(_bucket_insert_)(self, &rkey); + self->size += res.inserted; + return res; +} + +#ifdef _i_is_map + STC_API _m_result _c_MEMB(_insert_or_assign)(Self* self, _m_key key, _m_mapped mapped); + #if !defined i_no_emplace + STC_API _m_result _c_MEMB(_emplace_or_assign)(Self* self, _m_keyraw rkey, _m_rmapped rmapped); + #endif + + STC_INLINE const _m_mapped* _c_MEMB(_at)(const Self* self, _m_keyraw rkey) { + _m_result res = _c_MEMB(_bucket_lookup_)(self, &rkey); + c_assert(res.ref); + return &res.ref->second; + } + + STC_INLINE _m_mapped* _c_MEMB(_at_mut)(Self* self, _m_keyraw rkey) + { return (_m_mapped*)_c_MEMB(_at)(self, rkey); } +#endif // _i_is_map + +#if !defined i_no_clone + STC_INLINE void _c_MEMB(_copy)(Self *self, const Self* other) { + if (self == other) + return; + _c_MEMB(_drop)(self); + *self = _c_MEMB(_clone)(*other); + } + + STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value _val) { + (void)self; + *_i_keyref(&_val) = i_keyclone((*_i_keyref(&_val))); + _i_MAP_ONLY( _val.second = i_valclone(_val.second); ) + return _val; + } +#endif // !i_no_clone + +#if !defined i_no_emplace + STC_INLINE _m_result + _c_MEMB(_emplace)(Self* self, _m_keyraw rkey _i_MAP_ONLY(, _m_rmapped rmapped)) { + _m_result _res = _c_MEMB(_insert_entry_)(self, rkey); + if (_res.inserted) { + *_i_keyref(_res.ref) = i_keyfrom(rkey); + _i_MAP_ONLY( _res.ref->second = i_valfrom(rmapped); ) + } + return _res; + } +#endif // !i_no_emplace + +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* val) { + return _i_SET_ONLY( i_keytoraw(val) ) + _i_MAP_ONLY( c_literal(_m_raw){i_keytoraw((&val->first)), i_valtoraw((&val->second))} ); +} + +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* _val) { + (void)self; + i_keydrop(_i_keyref(_val)); + _i_MAP_ONLY( i_valdrop((&_val->second)); ) +} + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->bucket_count = self->size = 0; + self->meta = NULL; self->table = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +STC_INLINE _m_result +_c_MEMB(_insert)(Self* self, _m_key _key _i_MAP_ONLY(, _m_mapped _mapped)) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw((&_key))); + if (_res.inserted) + { *_i_keyref(_res.ref) = _key; _i_MAP_ONLY( _res.ref->second = _mapped; )} + else + { i_keydrop((&_key)); _i_MAP_ONLY( i_valdrop((&_mapped)); )} + return _res; +} + +STC_INLINE _m_value* _c_MEMB(_push)(Self* self, _m_value _val) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw(_i_keyref(&_val))); + if (_res.inserted) + *_res.ref = _val; + else + _c_MEMB(_value_drop)(self, &_val); + return _res.ref; +} + +#ifdef _i_is_map +STC_INLINE _m_result _c_MEMB(_put)(Self* self, _m_keyraw rkey, _m_rmapped rmapped) { + #ifdef i_no_emplace + return _c_MEMB(_insert_or_assign)(self, rkey, rmapped); + #else + return _c_MEMB(_emplace_or_assign)(self, rkey, rmapped); + #endif +} +#endif + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) { + while (n--) + #if defined _i_is_set && defined i_no_emplace + _c_MEMB(_insert)(self, *raw++); + #elif defined _i_is_set + _c_MEMB(_emplace)(self, *raw++); + #else + _c_MEMB(_put)(self, raw->first, raw->second), ++raw; + #endif +} + +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) + { Self cx = {0}; return cx; } + +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) + { Self cx = {0}; _c_MEMB(_put_n)(&cx, raw, n); return cx; } + +STC_INLINE Self _c_MEMB(_with_capacity)(const isize cap) + { Self cx = {0}; _c_MEMB(_reserve)(&cx, cap); return cx; } +#endif + +STC_API _m_iter _c_MEMB(_begin)(const Self* self); + +STC_INLINE _m_iter _c_MEMB(_end)(const Self* self) + { (void)self; return c_literal(_m_iter){0}; } + +STC_INLINE void _c_MEMB(_next)(_m_iter* it) { + while ((++it->ref, (++it->_mref)->dist == 0)) ; + if (it->ref == it->_end) it->ref = NULL; +} + +STC_INLINE _m_iter _c_MEMB(_advance)(_m_iter it, size_t n) { + while (n-- && it.ref) _c_MEMB(_next)(&it); + return it; +} + +STC_INLINE _m_iter +_c_MEMB(_find)(const Self* self, _m_keyraw rkey) { + _m_value* ref; + if (self->size != 0 && (ref = _c_MEMB(_bucket_lookup_)(self, &rkey).ref) != NULL) + return c_literal(_m_iter){ref, + &self->table[self->bucket_count], + &self->meta[ref - self->table]}; + return _c_MEMB(_end)(self); +} + +STC_INLINE const _m_value* +_c_MEMB(_get)(const Self* self, _m_keyraw rkey) { + return self->size ? _c_MEMB(_bucket_lookup_)(self, &rkey).ref : NULL; +} + +STC_INLINE _m_value* +_c_MEMB(_get_mut)(Self* self, _m_keyraw rkey) + { return (_m_value*)_c_MEMB(_get)(self, rkey); } + +STC_INLINE int +_c_MEMB(_erase)(Self* self, _m_keyraw rkey) { + _m_value* ref; + if (self->size != 0 && (ref = _c_MEMB(_bucket_lookup_)(self, &rkey).ref) != NULL) + { _c_MEMB(_erase_entry)(self, ref); return 1; } + return 0; +} + +STC_INLINE _m_iter +_c_MEMB(_erase_at)(Self* self, _m_iter it) { + _c_MEMB(_erase_entry)(self, it.ref); + if (it._mref->dist == 0) + _c_MEMB(_next)(&it); + return it; +} + +STC_INLINE bool +_c_MEMB(_eq)(const Self* self, const Self* other) { + if (_c_MEMB(_size)(self) != _c_MEMB(_size)(other)) return false; + for (_m_iter i = _c_MEMB(_begin)(self); i.ref; _c_MEMB(_next)(&i)) { + const _m_keyraw _raw = i_keytoraw(_i_keyref(i.ref)); + if (!_c_MEMB(_contains)(other, _raw)) return false; + } + return true; +} + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF _m_iter _c_MEMB(_begin)(const Self* self) { + _m_iter it = {self->table, self->table, self->meta}; + if (it.ref == NULL) return it; + it._end += self->bucket_count; + while (it._mref->dist == 0) + ++it.ref, ++it._mref; + if (it.ref == it._end) it.ref = NULL; + return it; +} + +STC_DEF float _c_MEMB(_max_load_factor)(const Self* self) { + (void)self; return (float)(i_max_load_factor); +} + +STC_DEF isize _c_MEMB(_capacity)(const Self* map) { + return (isize)((float)map->bucket_count * (i_max_load_factor)); +} + +static void _c_MEMB(_wipe_)(Self* self) { + if (self->size == 0) + return; + _m_value* d = self->table, *_end = &d[self->bucket_count]; + struct hmap_meta* m = self->meta; + for (; d != _end; ++d) + if ((m++)->dist) + _c_MEMB(_value_drop)(self, d); +} + +STC_DEF void _c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + if (self->bucket_count > 0) { + _c_MEMB(_wipe_)(self); + _i_free_n(self->meta, self->bucket_count + 1); + _i_free_n(self->table, self->bucket_count); + } +} + +STC_DEF void _c_MEMB(_clear)(Self* self) { + _c_MEMB(_wipe_)(self); + self->size = 0; + c_memset(self->meta, 0, c_sizeof(struct hmap_meta)*self->bucket_count); +} + +#ifdef _i_is_map + STC_DEF _m_result + _c_MEMB(_insert_or_assign)(Self* self, _m_key _key, _m_mapped _mapped) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw((&_key))); + _m_mapped* _mp = _res.ref ? &_res.ref->second : &_mapped; + if (_res.inserted) + _res.ref->first = _key; + else + { i_keydrop((&_key)); i_valdrop(_mp); } + *_mp = _mapped; + return _res; + } + + #if !defined i_no_emplace + STC_DEF _m_result + _c_MEMB(_emplace_or_assign)(Self* self, _m_keyraw rkey, _m_rmapped rmapped) { + _m_result _res = _c_MEMB(_insert_entry_)(self, rkey); + if (_res.inserted) + _res.ref->first = i_keyfrom(rkey); + else { + if (_res.ref == NULL) return _res; + i_valdrop((&_res.ref->second)); + } + _res.ref->second = i_valfrom(rmapped); + return _res; + } + #endif // !i_no_emplace +#endif // _i_is_map + +STC_DEF _m_result +_c_MEMB(_bucket_lookup_)(const Self* self, const _m_keyraw* rkeyptr) { + const size_t _hash = i_hash(rkeyptr); + const size_t _idxmask = (size_t)self->bucket_count - 1; + _m_result _res = {.idx=_hash & _idxmask, .hashx=(uint8_t)((_hash >> 24) & _hashmask), .dist=1}; + + while (_res.dist <= self->meta[_res.idx].dist) { + if (self->meta[_res.idx].hashx == _res.hashx) { + const _m_keyraw _raw = i_keytoraw(_i_keyref(&self->table[_res.idx])); + if (i_eq((&_raw), rkeyptr)) { + _res.ref = &self->table[_res.idx]; + break; + } + } + _res.idx = (_res.idx + 1) & _idxmask; + ++_res.dist; + } + return _res; +} + +STC_DEF _m_result +_c_MEMB(_bucket_insert_)(const Self* self, const _m_keyraw* rkeyptr) { + _m_result res = _c_MEMB(_bucket_lookup_)(self, rkeyptr); + if (res.ref) // bucket exists + return res; + res.ref = &self->table[res.idx]; + res.inserted = true; + struct hmap_meta mnew = {.hashx=(uint16_t)(res.hashx & _hashmask), + .dist=(uint16_t)(res.dist & _distmask)}; + struct hmap_meta mcur = self->meta[res.idx]; + self->meta[res.idx] = mnew; + + if (mcur.dist != 0) { // collision, reorder buckets + size_t mask = (size_t)self->bucket_count - 1; + _m_value dcur = *res.ref; + for (;;) { + res.idx = (res.idx + 1) & mask; + ++mcur.dist; + if (self->meta[res.idx].dist == 0) + break; + if (self->meta[res.idx].dist < mcur.dist) { + c_swap(&mcur, &self->meta[res.idx]); + c_swap(&dcur, &self->table[res.idx]); + } + } + self->meta[res.idx] = mcur; + self->table[res.idx] = dcur; + } + return res; +} + + +#if !defined i_no_clone + STC_DEF Self + _c_MEMB(_clone)(Self map) { + if (map.bucket_count == 0) + return c_literal(Self){0}; + Self out = map, *self = &out; // _i_new_n may refer self via i_aux + const isize _mbytes = (map.bucket_count + 1)*c_sizeof *map.meta; + out.table = (_m_value *)i_malloc(map.bucket_count*c_sizeof *out.table); + out.meta = (struct hmap_meta *)i_malloc(_mbytes); + + if (out.table && out.meta) { + c_memcpy(out.meta, map.meta, _mbytes); + for (isize i = 0; i < map.bucket_count; ++i) + if (map.meta[i].dist) + out.table[i] = _c_MEMB(_value_clone)(self, map.table[i]); + return out; + } else { + if (out.meta) i_free(out.meta, _mbytes); + if (out.table) _i_free_n(out.table, map.bucket_count); + return c_literal(Self){0}; + } + } +#endif + +STC_DEF bool +_c_MEMB(_reserve)(Self* _self, const isize _newcap) { + isize _newbucks = (isize)((float)_newcap / (i_max_load_factor)) + 4; + _newbucks = c_next_pow2(_newbucks); + + if (_newcap < _self->size || _newbucks == _self->bucket_count) + return true; + Self map = *_self, *self = ↦ (void)self; + map.table = _i_new_n(_m_value, _newbucks); + map.meta = _i_new_zeros(struct hmap_meta, _newbucks + 1); + map.bucket_count = _newbucks; + + bool ok = map.table && map.meta; + if (ok) { // Rehash: + map.meta[_newbucks].dist = _distmask; // end-mark for iter + const _m_value* d = _self->table; + const struct hmap_meta* m = _self->meta; + + for (isize i = 0; i < _self->bucket_count; ++i, ++d) if (m[i].dist != 0) { + _m_keyraw r = i_keytoraw(_i_keyref(d)); + *_c_MEMB(_bucket_insert_)(&map, &r).ref = *d; // move element + } + c_swap(_self, &map); + } + _i_free_n(map.meta, map.bucket_count + (int)(map.meta != NULL)); + _i_free_n(map.table, map.bucket_count); + return ok; +} + +STC_DEF void +_c_MEMB(_erase_entry)(Self* self, _m_value* _val) { + _m_value* d = self->table; + struct hmap_meta *m = self->meta; + size_t i = (size_t)(_val - d), j = i; + size_t mask = (size_t)self->bucket_count - 1; + + _c_MEMB(_value_drop)(self, _val); + for (;;) { + j = (j + 1) & mask; + if (m[j].dist < 2) // 0 => empty, 1 => PSL 0 + break; + d[i] = d[j]; + m[i] = m[j]; + --m[i].dist; + i = j; + } + m[i].dist = 0; + --self->size; +} + +#endif // i_implement +#undef i_max_load_factor +#undef _i_is_set +#undef _i_is_map +#undef _i_is_hash +#undef _i_keyref +#undef _i_MAP_ONLY +#undef _i_SET_ONLY +#include "sys/finalize.h" diff --git a/stc/hset.h b/stc/hset.h new file mode 100644 index 0000000..d8a7d99 --- /dev/null +++ b/stc/hset.h @@ -0,0 +1,43 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Unordered set - implemented with the robin-hood hashing scheme. +/* +#define T iset, int +#include +#include + +int main(void) { + iset set = {0}; + iset_insert(&set, 5); + iset_insert(&set, 8); + + for (c_each(i, iset, set)) + printf("set %d\n", *i.ref); + iset_drop(&set); +} +*/ + +#define _i_prefix hset_ +#define _i_is_set +#include "hmap.h" diff --git a/stc/list.h b/stc/list.h new file mode 100644 index 0000000..e5a1b4e --- /dev/null +++ b/stc/list.h @@ -0,0 +1,425 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Circular Singly-linked Lists. + This implements a std::forward_list-like class in C. Because it is circular, + it also support both push_back() and push_front(), unlike std::forward_list: + + #include + #include + + #define T List, long, (c_use_cmp) // enable sorting, uses default *x < *y. + #include + + int main(void) + { + List list = {0}; + + for (int i = 0; i < 5000000; ++i) // five million + List_push_back(&list, crand64_uint() & (1<<24) - 1; + + int n = 0; + for (c_each(i, List, list)) + if (++n % 100000 == 0) printf("%8d: %10zu\n", n, *i.ref); + + // Sort them... + List_sort(&list); // sort.h quicksort + + n = 0; + puts("sorted"); + for (c_each(i, List, list)) + if (++n % 100000 == 0) printf("%8d: %10zu\n", n, *i.ref); + + List_drop(&list); + } +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_LIST_H_INCLUDED +#define STC_LIST_H_INCLUDED +#include "common.h" +#include + +#define _c_list_complete_types(SELF, dummy) \ + struct SELF##_node { \ + SELF##_value value; /* must be first! */ \ + struct SELF##_node *next; \ + } + +#define _clist_tonode(vp) c_safe_cast(_m_node*, _m_value*, vp) + +#define _c_list_insert_entry_after(ref, val) \ + _m_node *entry = _i_new_n(_m_node, 1); entry->value = val; \ + _c_list_insert_after_node(ref, entry) + +#define _c_list_insert_after_node(ref, entry) \ + if (ref) entry->next = ref->next, ref->next = entry; \ + else entry->next = entry + // +: set self->last based on node + +#endif // STC_LIST_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix list_ +#endif +#include "priv/template.h" + +#define _i_is_list +#ifndef i_declared + _c_DEFTYPES(_declare_list, Self, i_key, _i_aux_def); +#endif +_c_DEFTYPES(_c_list_complete_types, Self, dummy); +typedef i_keyraw _m_raw; + +STC_API void _c_MEMB(_drop)(const Self* cself); +STC_API _m_value* _c_MEMB(_push_back)(Self* self, _m_value value); +STC_API _m_value* _c_MEMB(_push_front)(Self* self, _m_value value); +STC_API _m_iter _c_MEMB(_insert_at)(Self* self, _m_iter it, _m_value value); +STC_API _m_iter _c_MEMB(_erase_at)(Self* self, _m_iter it); +STC_API _m_iter _c_MEMB(_erase_range)(Self* self, _m_iter it1, _m_iter it2); +#if defined _i_has_eq +STC_API _m_iter _c_MEMB(_find_in)(const Self* self, _m_iter it1, _m_iter it2, _m_raw val); +STC_API isize _c_MEMB(_remove)(Self* self, _m_raw val); +#endif +#if defined _i_has_cmp +STC_API bool _c_MEMB(_sort)(Self* self); +#endif +STC_API void _c_MEMB(_reverse)(Self* self); +STC_API _m_iter _c_MEMB(_splice)(Self* self, _m_iter it, Self* other); +STC_API Self _c_MEMB(_split_off)(Self* self, _m_iter it1, _m_iter it2); +STC_API _m_value* _c_MEMB(_push_back_node)(Self* self, _m_node* node); +STC_API _m_value* _c_MEMB(_insert_after_node)(Self* self, _m_node* ref, _m_node* node); +STC_API _m_node* _c_MEMB(_unlink_after_node)(Self* self, _m_node* ref); +STC_API void _c_MEMB(_erase_after_node)(Self* self, _m_node* ref); +STC_INLINE _m_node* _c_MEMB(_get_node)(_m_value* pval) { return _clist_tonode(pval); } +STC_INLINE _m_node* _c_MEMB(_unlink_front_node)(Self* self) + { return _c_MEMB(_unlink_after_node)(self, self->last); } +#if !defined i_no_clone +STC_API Self _c_MEMB(_clone)(Self cx); +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val) + { (void)self; return i_keyclone(val); } + +STC_INLINE void +_c_MEMB(_copy)(Self *self, const Self* other) { + if (self->last == other->last) return; + _c_MEMB(_drop)(self); + *self = _c_MEMB(_clone)(*other); +} +#endif // !i_no_clone + +#if !defined i_no_emplace +STC_INLINE _m_value* _c_MEMB(_emplace_back)(Self* self, _m_raw raw) + { return _c_MEMB(_push_back)(self, i_keyfrom(raw)); } +STC_INLINE _m_value* _c_MEMB(_emplace_front)(Self* self, _m_raw raw) + { return _c_MEMB(_push_front)(self, i_keyfrom(raw)); } +STC_INLINE _m_iter _c_MEMB(_emplace_at)(Self* self, _m_iter it, _m_raw raw) + { return _c_MEMB(_insert_at)(self, it, i_keyfrom(raw)); } +STC_INLINE _m_value* _c_MEMB(_emplace)(Self* self, _m_raw raw) + { return _c_MEMB(_push_back)(self, i_keyfrom(raw)); } +#endif // !i_no_emplace + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) + { while (n--) _c_MEMB(_push_back)(self, i_keyfrom(*raw++)); } +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) { return c_literal(Self){0}; } +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) + { Self cx = {0}; _c_MEMB(_put_n)(&cx, raw, n); return cx; } +#endif +STC_INLINE bool _c_MEMB(_reserve)(Self* self, isize n) { (void)(self + n); return true; } +STC_INLINE bool _c_MEMB(_is_empty)(const Self* self) { return self->last == NULL; } +STC_INLINE void _c_MEMB(_clear)(Self* self) { _c_MEMB(_drop)(self); } +STC_INLINE _m_value* _c_MEMB(_push)(Self* self, _m_value value) + { return _c_MEMB(_push_back)(self, value); } +STC_INLINE void _c_MEMB(_pop_front)(Self* self) + { c_assert(!_c_MEMB(_is_empty)(self)); _c_MEMB(_erase_after_node)(self, self->last); } +STC_INLINE const _m_value* _c_MEMB(_front)(const Self* self) { return &self->last->next->value; } +STC_INLINE _m_value* _c_MEMB(_front_mut)(Self* self) { return &self->last->next->value; } +STC_INLINE const _m_value* _c_MEMB(_back)(const Self* self) { return &self->last->value; } +STC_INLINE _m_value* _c_MEMB(_back_mut)(Self* self) { return &self->last->value; } +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* pval) { return i_keytoraw(pval); } +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* pval) { (void)self; i_keydrop(pval); } + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->last = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +STC_INLINE isize +_c_MEMB(_count)(const Self* self) { + isize n = 1; const _m_node *node = self->last; + if (node == NULL) return 0; + while ((node = node->next) != self->last) ++n; + return n; +} + +STC_INLINE _m_iter +_c_MEMB(_begin)(const Self* self) { + _m_value* head = self->last ? &self->last->next->value : NULL; + return c_literal(_m_iter){head, &self->last, self->last}; +} + +STC_INLINE _m_iter +_c_MEMB(_end)(const Self* self) + { (void)self; return c_literal(_m_iter){0}; } + +STC_INLINE void +_c_MEMB(_next)(_m_iter* it) { + _m_node* node = it->prev = _clist_tonode(it->ref); + it->ref = (node == *it->_last ? NULL : &node->next->value); +} + +STC_INLINE _m_iter +_c_MEMB(_advance)(_m_iter it, size_t n) { + while (n-- && it.ref) _c_MEMB(_next)(&it); + return it; +} + +STC_INLINE _m_iter +_c_MEMB(_splice_range)(Self* self, _m_iter it, + Self* other, _m_iter it1, _m_iter it2) { + Self tmp = _c_MEMB(_split_off)(other, it1, it2); + return _c_MEMB(_splice)(self, it, &tmp); +} + +#if defined _i_has_eq +STC_INLINE _m_iter +_c_MEMB(_find)(const Self* self, _m_raw val) { + return _c_MEMB(_find_in)(self, _c_MEMB(_begin)(self), _c_MEMB(_end)(self), val); +} + +STC_INLINE bool _c_MEMB(_eq)(const Self* self, const Self* other) { + _m_iter i = _c_MEMB(_begin)(self), j = _c_MEMB(_begin)(other); + for (; i.ref && j.ref; _c_MEMB(_next)(&i), _c_MEMB(_next)(&j)) { + const _m_raw _rx = i_keytoraw(i.ref), _ry = i_keytoraw(j.ref); + if (!(i_eq((&_rx), (&_ry)))) return false; + } + return !(i.ref || j.ref); +} +#endif + +// -------------------------- IMPLEMENTATION ------------------------- +#if defined i_implement + +#if !defined i_no_clone +STC_DEF Self +_c_MEMB(_clone)(Self lst) { + Self out = lst, *self = &out; (void)self; // may be used by i_keyclone via i_aux + out.last = NULL; + for (c_each(it, Self, lst)) + _c_MEMB(_push_back)(&out, i_keyclone((*it.ref))); + return out; +} +#endif + +STC_DEF void +_c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + while (self->last) + _c_MEMB(_erase_after_node)(self, self->last); +} + +STC_DEF _m_value* +_c_MEMB(_push_back)(Self* self, _m_value value) { + _c_list_insert_entry_after(self->last, value); + self->last = entry; + return &entry->value; +} + +STC_DEF _m_value* +_c_MEMB(_push_front)(Self* self, _m_value value) { + _c_list_insert_entry_after(self->last, value); + if (self->last == NULL) + self->last = entry; + return &entry->value; +} + +STC_DEF _m_value* +_c_MEMB(_push_back_node)(Self* self, _m_node* node) { + _c_list_insert_after_node(self->last, node); + self->last = node; + return &node->value; +} + +STC_DEF _m_value* +_c_MEMB(_insert_after_node)(Self* self, _m_node* ref, _m_node* node) { + _c_list_insert_after_node(ref, node); + if (self->last == NULL) + self->last = node; + return &node->value; +} + +STC_DEF _m_iter +_c_MEMB(_insert_at)(Self* self, _m_iter it, _m_value value) { + _m_node* node = it.ref ? it.prev : self->last; + _c_list_insert_entry_after(node, value); + if (self->last == NULL || it.ref == NULL) { + it.prev = self->last ? self->last : entry; + self->last = entry; + } + it.ref = &entry->value; + return it; +} + +STC_DEF _m_iter +_c_MEMB(_erase_at)(Self* self, _m_iter it) { + _m_node *node = _clist_tonode(it.ref); + it.ref = (node == self->last) ? NULL : &node->next->value; + _c_MEMB(_erase_after_node)(self, it.prev); + return it; +} + +STC_DEF _m_iter +_c_MEMB(_erase_range)(Self* self, _m_iter it1, _m_iter it2) { + _m_node *end = it2.ref ? _clist_tonode(it2.ref) : self->last->next; + if (it1.ref != it2.ref) do { + _c_MEMB(_erase_after_node)(self, it1.prev); + if (self->last == NULL) break; + } while (it1.prev->next != end); + return it2; +} + +STC_DEF void +_c_MEMB(_erase_after_node)(Self* self, _m_node* ref) { + _m_node* node = _c_MEMB(_unlink_after_node)(self, ref); + i_keydrop((&node->value)); + _i_free_n(node, 1); +} + +STC_DEF _m_node* +_c_MEMB(_unlink_after_node)(Self* self, _m_node* ref) { + _m_node* node = ref->next, *next = node->next; + ref->next = next; + if (node == next) + self->last = NULL; + else if (node == self->last) + self->last = ref; + return node; +} + +STC_DEF void +_c_MEMB(_reverse)(Self* self) { + Self rev = *self; + rev.last = NULL; + while (self->last) { + _m_node* node = _c_MEMB(_unlink_after_node)(self, self->last); + _c_MEMB(_insert_after_node)(&rev, rev.last, node); + } + *self = rev; +} + +STC_DEF _m_iter +_c_MEMB(_splice)(Self* self, _m_iter it, Self* other) { + if (self->last == NULL) + self->last = other->last; + else if (other->last) { + _m_node *p = it.ref ? it.prev : self->last, *next = p->next; + it.prev = other->last; + p->next = it.prev->next; + it.prev->next = next; + if (it.ref == NULL) self->last = it.prev; + } + other->last = NULL; + return it; +} + +STC_DEF Self +_c_MEMB(_split_off)(Self* self, _m_iter it1, _m_iter it2) { + Self lst = *self; + lst.last = NULL; + if (it1.ref == it2.ref) + return lst; + _m_node *p1 = it1.prev, + *p2 = it2.ref ? it2.prev : self->last; + p1->next = p2->next; + p2->next = _clist_tonode(it1.ref); + if (self->last == p2) + self->last = (p1 == p2) ? NULL : p1; + lst.last = p2; + return lst; +} + +#if defined _i_has_eq +STC_DEF _m_iter +_c_MEMB(_find_in)(const Self* self, _m_iter it1, _m_iter it2, _m_raw val) { + (void)self; + for (c_each(it, Self, it1, it2)) { + _m_raw r = i_keytoraw(it.ref); + if (i_eq((&r), (&val))) + return it; + } + it2.ref = NULL; return it2; +} + +STC_DEF isize +_c_MEMB(_remove)(Self* self, _m_raw val) { + isize n = 0; + _m_node *prev = self->last, *node; + if (prev) do { + node = prev->next; + _m_raw r = i_keytoraw((&node->value)); + if (i_eq((&r), (&val))) { + _c_MEMB(_erase_after_node)(self, prev), ++n; + if (self->last == NULL) break; + } else + prev = node; + } while (node != self->last); + return n; +} +#endif + +#if defined _i_has_cmp +#include "priv/sort_prv.h" + +STC_DEF bool _c_MEMB(_sort)(Self* self) { + isize len = 0, cap = 0; + _m_value *arr = NULL, *p = NULL; + _m_node* keep; + for (c_each(i, Self, *self)) { + if (len == cap) { + isize cap_n = cap + cap/2 + 8; + if ((p = (_m_value *)_i_realloc_n(arr, cap, cap_n)) == NULL) + goto done; + arr = p, cap = cap_n; + } + arr[len++] = *i.ref; + } + keep = self->last; + self->last = (_m_node *)arr; + _c_MEMB(_sort_lowhigh)(self, 0, len - 1); + self->last = keep; + for (c_each(i, Self, *self)) + *i.ref = *p++; + done: _i_free_n(arr, cap); + return p != NULL; +} +#endif // _i_has_cmp +#endif // i_implement +#undef _i_is_list +#include "sys/finalize.h" diff --git a/stc/pqueue.h b/stc/pqueue.h new file mode 100644 index 0000000..d77972d --- /dev/null +++ b/stc/pqueue.h @@ -0,0 +1,181 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_PQUEUE_H_INCLUDED +#define STC_PQUEUE_H_INCLUDED +#include "common.h" +#include +#endif // STC_PQUEUIE_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix pqueue_ +#endif +#define _i_sorted +#include "priv/template.h" +#ifndef i_declared + _c_DEFTYPES(_declare_stack, Self, i_key, _i_aux_def); +#endif +typedef i_keyraw _m_raw; + +STC_API void _c_MEMB(_make_heap)(Self* self); +STC_API void _c_MEMB(_erase_at)(Self* self, isize idx); +STC_API _m_value* _c_MEMB(_push)(Self* self, _m_value value); + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) + { while (n--) _c_MEMB(_push)(self, i_keyfrom(*raw++)); } + +STC_INLINE bool _c_MEMB(_reserve)(Self* self, const isize cap) { + if (cap != self->size && cap <= self->capacity) return true; + _m_value *d = (_m_value *)_i_realloc_n(self->data, self->capacity, cap); + return d ? (self->data = d, self->capacity = cap, true) : false; +} + +STC_INLINE void _c_MEMB(_shrink_to_fit)(Self* self) + { _c_MEMB(_reserve)(self, self->size); } + +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) + { return c_literal(Self){0}; } + +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) + { Self cx = {0}; _c_MEMB(_put_n)(&cx, raw, n); return cx; } + +STC_INLINE Self _c_MEMB(_with_capacity)(const isize cap) + { Self cx = {0}; _c_MEMB(_reserve)(&cx, cap); return cx; } +#endif + +STC_INLINE void _c_MEMB(_clear)(Self* self) { + isize i = self->size; self->size = 0; + while (i--) { i_keydrop((self->data + i)); } +} + +STC_INLINE void _c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + _c_MEMB(_clear)(self); + _i_free_n(self->data, self->capacity); +} + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->size = self->capacity = 0; + self->data = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +STC_INLINE isize _c_MEMB(_size)(const Self* q) + { return q->size; } + +STC_INLINE bool _c_MEMB(_is_empty)(const Self* q) + { return !q->size; } + +STC_INLINE isize _c_MEMB(_capacity)(const Self* q) + { return q->capacity; } + +STC_INLINE const _m_value* _c_MEMB(_top)(const Self* self) + { return &self->data[0]; } + +STC_INLINE void _c_MEMB(_pop)(Self* self) + { c_assert(!_c_MEMB(_is_empty)(self)); _c_MEMB(_erase_at)(self, 0); } + +STC_INLINE _m_value _c_MEMB(_pull)(Self* self) + { _m_value v = self->data[0]; _c_MEMB(_erase_at)(self, 0); return v; } + +#if !defined i_no_clone +STC_API Self _c_MEMB(_clone)(Self q); + +STC_INLINE void _c_MEMB(_copy)(Self *self, const Self* other) { + if (self == other) return; + _c_MEMB(_drop)(self); + *self = _c_MEMB(_clone)(*other); +} +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val) + { (void)self; return i_keyclone(val); } +#endif // !i_no_clone + +#if !defined i_no_emplace +STC_INLINE void _c_MEMB(_emplace)(Self* self, _m_raw raw) + { _c_MEMB(_push)(self, i_keyfrom(raw)); } +#endif // !i_no_emplace + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF void +_c_MEMB(_sift_down_)(Self* self, const isize idx, const isize n) { + _m_value t, *arr = self->data - 1; + for (isize r = idx, c = idx*2; c <= n; c *= 2) { + c += i_less((&arr[c]), (&arr[c + (c < n)])); + if (!(i_less((&arr[r]), (&arr[c])))) return; + t = arr[r], arr[r] = arr[c], arr[r = c] = t; + } +} + +STC_DEF void +_c_MEMB(_make_heap)(Self* self) { + isize n = self->size; + for (isize k = n/2; k != 0; --k) + _c_MEMB(_sift_down_)(self, k, n); +} + +#if !defined i_no_clone +STC_DEF Self _c_MEMB(_clone)(Self q) { + Self out = q, *self = &out; (void)self; + out.capacity = out.size = 0; out.data = NULL; + _c_MEMB(_reserve)(&out, q.size); + out.size = q.size; + for (c_range(i, q.size)) + out.data[i] = i_keyclone(q.data[i]); + return out; +} +#endif + +STC_DEF void +_c_MEMB(_erase_at)(Self* self, const isize idx) { + i_keydrop((self->data + idx)); + const isize n = --self->size; + self->data[idx] = self->data[n]; + _c_MEMB(_sift_down_)(self, idx + 1, n); +} + +STC_DEF _m_value* +_c_MEMB(_push)(Self* self, _m_value value) { + if (self->size == self->capacity) + _c_MEMB(_reserve)(self, self->size*3/2 + 4); + _m_value *arr = self->data - 1; /* base 1 */ + isize c = ++self->size; + for (; c > 1 && (i_less((&arr[c/2]), (&value))); c /= 2) + arr[c] = arr[c/2]; + arr[c] = value; + return arr + c; +} +#endif + +#undef _i_sorted +#include "sys/finalize.h" diff --git a/stc/priv/cregex_prv.c b/stc/priv/cregex_prv.c new file mode 100644 index 0000000..5d4e2b2 --- /dev/null +++ b/stc/priv/cregex_prv.c @@ -0,0 +1,1340 @@ +/* +This is a Unix port of the Plan 9 regular expression library, by Rob Pike. +Please send comments about the packaging to Russ Cox . + +Copyright © 2021 Plan 9 Foundation +Copyright © 2023 Tyge Løvset, for additions. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef STC_CREGEX_PRV_C_INCLUDED +#define STC_CREGEX_PRV_C_INCLUDED + +#include +#include "utf8_prv.h" +#include "cstr_prv.h" +#include "ucd_prv.c" + +typedef uint32_t _Rune; /* Utf8 code point */ +typedef int32_t _Token; +/* max character classes per program */ +#define _NCLASS CREG_MAX_CLASSES +/* max subexpressions */ +#define _NSUBEXP CREG_MAX_CAPTURES +/* max rune ranges per character class */ +#define _NCCRUNE (_NSUBEXP * 2) + +/* + * character class, each pair of rune's defines a range + */ +typedef struct +{ + _Rune *end; + _Rune spans[_NCCRUNE]; +} _Reclass; + +/* + * Machine instructions + */ +typedef struct _Reinst +{ + _Token type; + union { + _Reclass *classp; /* class pointer */ + _Rune rune; /* character */ + int subid; /* sub-expression id for TOK_RBRA and TOK_LBRA */ + struct _Reinst *right; /* right child of TOK_OR */ + } r; + union { /* regexp relies on these two being in the same union */ + struct _Reinst *left; /* left child of TOK_OR */ + struct _Reinst *next; /* next instruction for TOK_CAT & TOK_LBRA */ + } l; +} _Reinst; + +typedef struct { + bool icase; + bool dotall; +} _Reflags; + +/* + * Reprogram definition + */ +typedef struct _Reprog +{ + _Reinst *startinst; /* start pc */ + _Reflags flags; + int nsubids; + isize allocsize; + _Reclass cclass[_NCLASS]; /* .data */ + _Reinst firstinst[]; /* .text : originally 5 elements? */ +} _Reprog; + +/* + * Sub expression matches + */ +typedef csview _Resub; + +/* + * substitution list + */ +typedef struct _Resublist +{ + _Resub m[_NSUBEXP]; +} _Resublist; + +/* + * Actions and Tokens (_Reinst types) + * + * 0x800000-0x80FFFF: operators, value => precedence + * 0x810000-0x81FFFF: TOK_RUNE and char classes. + * 0x820000-0x82FFFF: tokens, i.e. operands for operators + */ +enum { + TOK_MASK = 0xFF00000, + TOK_OPERATOR = 0x8000000, /* Bitmask of all operators */ + TOK_START = 0x8000001, /* Start, used for marker on stack */ + TOK_RBRA , /* Right bracket, ) */ + TOK_LBRA , /* Left bracket, ( */ + TOK_OR , /* Alternation, | */ + TOK_CAT , /* Concatentation, implicit operator */ + TOK_STAR , /* Closure, * */ + TOK_PLUS , /* a+ == aa* */ + TOK_QUEST , /* a? == a|nothing, i.e. 0 or 1 a's */ + TOK_RUNE = 0x8100000, + TOK_IRUNE , + ASC_an , ASC_AN, /* alphanum */ + ASC_al , ASC_AL, /* alpha */ + ASC_as , ASC_AS, /* ascii */ + ASC_bl , ASC_BL, /* blank */ + ASC_ct , ASC_CT, /* ctrl */ + ASC_d , ASC_D, /* digit */ + ASC_s , ASC_S, /* space */ + ASC_w , ASC_W, /* word */ + ASC_gr , ASC_GR, /* graphic */ + ASC_pr , ASC_PR, /* print */ + ASC_pu , ASC_PU, /* punct */ + ASC_lo , ASC_LO, /* lower */ + ASC_up , ASC_UP, /* upper */ + ASC_xd , ASC_XD, /* hex */ + UTF_al , UTF_AL, /* utf8 alpha */ + UTF_an , UTF_AN, /* utf8 alphanumeric */ + UTF_bl , UTF_BL, /* utf8 blank */ + UTF_lc , UTF_LC, /* utf8 letter cased */ + UTF_ll , UTF_LL, /* utf8 letter lowercase */ + UTF_lu , UTF_LU, /* utf8 letter uppercase */ + UTF_sp , UTF_SP, /* utf8 space */ + UTF_wr , UTF_WR, /* utf8 word */ + UTF_GRP = 0x8150000, + UTF_cc = UTF_GRP+2*U8G_Cc, UTF_CC, /* utf8 control char */ + UTF_lt = UTF_GRP+2*U8G_Lt, UTF_LT, /* utf8 letter titlecase */ + UTF_nd = UTF_GRP+2*U8G_Nd, UTF_ND, /* utf8 number decimal */ + UTF_nl = UTF_GRP+2*U8G_Nl, UTF_NL, /* utf8 number letter */ + UTF_pc = UTF_GRP+2*U8G_Pc, UTF_PC, /* utf8 punct connector */ + UTF_pd = UTF_GRP+2*U8G_Pd, UTF_PD, /* utf8 punct dash */ + UTF_pf = UTF_GRP+2*U8G_Pf, UTF_PF, /* utf8 punct final */ + UTF_pi = UTF_GRP+2*U8G_Pi, UTF_PI, /* utf8 punct initial */ + UTF_sc = UTF_GRP+2*U8G_Sc, UTF_SC, /* utf8 symbol currency */ + UTF_zl = UTF_GRP+2*U8G_Zl, UTF_ZL, /* utf8 separator line */ + UTF_zp = UTF_GRP+2*U8G_Zp, UTF_ZP, /* utf8 separator paragraph */ + UTF_zs = UTF_GRP+2*U8G_Zs, UTF_ZS, /* utf8 separator space */ + UTF_arabic = UTF_GRP+2*U8G_Arabic, UTF_ARABIC, + UTF_bengali = UTF_GRP+2*U8G_Bengali, UTF_BENGALI, + UTF_cyrillic = UTF_GRP+2*U8G_Cyrillic, UTF_CYRILLIC, + UTF_devanagari = UTF_GRP+2*U8G_Devanagari, UTF_DEVANAGARI, + UTF_georgian = UTF_GRP+2*U8G_Georgian, UTF_GEORGIAN, + UTF_greek = UTF_GRP+2*U8G_Greek, UTF_GREEK, + UTF_han = UTF_GRP+2*U8G_Han, UTF_HAN, + UTF_hiragana = UTF_GRP+2*U8G_Hiragana, UTF_HIRAGANA, + UTF_katakana = UTF_GRP+2*U8G_Katakana, UTF_KATAKANA, + UTF_latin = UTF_GRP+2*U8G_Latin, UTF_LATIN, + UTF_thai = UTF_GRP+2*U8G_Thai, UTF_THAI, + TOK_ANY = 0x8200000, /* Any character except newline, . */ + TOK_ANYNL , /* Any character including newline, . */ + TOK_NOP , /* No operation, internal use only */ + TOK_BOL , TOK_BOS, /* Beginning of line / string, ^ */ + TOK_EOL , TOK_EOS, /* End of line / string, $ */ + TOK_EOZ , /* End of line with optional NL */ + TOK_CCLASS , /* Character class, [] */ + TOK_NCCLASS , /* Negated character class, [] */ + TOK_WBOUND , /* Non-word boundary, not consuming meta char */ + TOK_NWBOUND , /* Word boundary, not consuming meta char */ + TOK_CASED , /* (?-i) */ + TOK_ICASE , /* (?i) */ + TOK_END = 0x82FFFFF, /* Terminate: match found */ +}; + +/* + * _regexec execution lists + */ +#define _LISTSIZE 10 +#define _BIGLISTSIZE (10*_LISTSIZE) + +typedef struct _Relist +{ + _Reinst* inst; /* Reinstruction of the thread */ + _Resublist se; /* matched subexpressions in this thread */ +} _Relist; + +typedef struct _Reljunk +{ + _Relist* relist[2]; + _Relist* reliste[2]; + int starttype; + _Rune startchar; + const char* starts; + const char* eol; +} _Reljunk; + +/* + * utf8 and _Rune code + */ + +static inline int +chartorune(_Rune *rune, const char *s) +{ + utf8_decode_t d = {.state=0}; + int n = utf8_decode_codepoint(&d, s, NULL); + *rune = d.codep; + return n; +} + +static const char* +utfrune(const char *s, _Rune c) // search +{ + if (c < 0x80) /* ascii */ + return strchr((char *)s, (int)c); + + utf8_decode_t d = {.state=0}; + while (*s != 0) { + int n = utf8_decode_codepoint(&d, s, NULL); + if (d.codep == c) return s; + s += n; + } + return NULL; +} + +static const char* +utfruneicase(const char *s, _Rune c) { + if (c < 0x80) { + for (int low = tolower((int)c); *s != 0; ++s) + if (tolower(*s) == low) + return s; + } else { + utf8_decode_t d = {.state=0}; + c = utf8_casefold(c); + while (*s != 0) { + int n = utf8_decode_codepoint(&d, s, NULL); + if (utf8_casefold(d.codep) == c) + return s; + s += n; + } + } + return NULL; +} + +/************ + * regaux.c * + ************/ + +/* + * save a new match in mp + */ +static void +_renewmatch(_Resub *mp, int ms, _Resublist *sp, int nsubids) +{ + if (mp==NULL || ms==0) + return; + if (mp[0].buf == NULL || sp->m[0].buf < mp[0].buf || + (sp->m[0].buf == mp[0].buf && sp->m[0].size > mp[0].size)) { + for (int i=0; im[i]; + } +} + +/* + * Note optimization in _renewthread: + * *lp must be pending when _renewthread called; if *l has been looked + * at already, the optimization is a bug. + */ +static _Relist* +_renewthread(_Relist *lp, /* _relist to add to */ + _Reinst *ip, /* instruction to add */ + int ms, + _Resublist *sep) /* pointers to subexpressions */ +{ + _Relist *p; + + for (p=lp; p->inst; p++) { + if (p->inst == ip) { + if (sep->m[0].buf < p->se.m[0].buf) { + if (ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } + return 0; + } + } + p->inst = ip; + if (ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + (++p)->inst = NULL; + return p; +} + +/* + * same as renewthread, but called with + * initial empty start pointer. + */ +static _Relist* +_renewemptythread(_Relist *lp, /* _relist to add to */ + _Reinst *ip, /* instruction to add */ + int ms, + const char *sp) /* pointers to subexpressions */ +{ + _Relist *p; + + for (p=lp; p->inst; p++) { + if (p->inst == ip) { + if (sp < p->se.m[0].buf) { + if (ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].buf = sp; + } + return 0; + } + } + p->inst = ip; + if (ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].buf = sp; + (++p)->inst = NULL; + return p; +} + +/* + * _Parser Information + */ +typedef struct _Node +{ + _Reinst* first; + _Reinst* last; +} _Node; + +#define _NSTACK 20 +typedef struct _Parser +{ + const char* exprp; /* pointer to next character in source expression */ + _Node andstack[_NSTACK]; + _Node* andp; + _Token atorstack[_NSTACK]; + _Token* atorp; + short subidstack[_NSTACK]; /* parallel to atorstack */ + short* subidp; + short cursubid; /* id of current subexpression */ + int error; + _Reflags flags; + int dot_type; + int rune_type; + bool litmode; + bool lastwasand; /* Last token was _operand */ + short nbra; + short nclass; + isize instcap; + _Rune yyrune; /* last lex'd rune */ + _Reclass *yyclassp; /* last lex'd class */ + _Reclass* classp; + _Reinst* freep; + jmp_buf regkaboom; +} _Parser; + +/* predeclared crap */ +static void _operator(_Parser *par, _Token type); +static void _pushand(_Parser *par, _Reinst *first, _Reinst *last); +static void _pushator(_Parser *par, _Token type); +static void _evaluntil(_Parser *par, _Token type); +static int _bldcclass(_Parser *par); + +static void +_rcerror(_Parser *par, cregex_result err) +{ + par->error = err; + longjmp(par->regkaboom, 1); +} + +static _Reinst* +_newinst(_Parser *par, _Token t) +{ + par->freep->type = t; + par->freep->l.left = 0; + par->freep->r.right = 0; + return par->freep++; +} + +static void +_operand(_Parser *par, _Token t) +{ + _Reinst *i; + + if (par->lastwasand) + _operator(par, TOK_CAT); /* catenate is implicit */ + i = _newinst(par, t); + switch (t) { + case TOK_CCLASS: case TOK_NCCLASS: + i->r.classp = par->yyclassp; break; + case TOK_RUNE: + i->r.rune = par->yyrune; break; + case TOK_IRUNE: + i->r.rune = utf8_casefold(par->yyrune); + } + _pushand(par, i, i); + par->lastwasand = true; +} + +static void +_operator(_Parser *par, _Token t) +{ + if (t==TOK_RBRA && --par->nbra<0) + _rcerror(par, CREG_UNMATCHEDRIGHTPARENTHESIS); + if (t==TOK_LBRA) { + if (++par->cursubid >= _NSUBEXP) + _rcerror(par, CREG_TOOMANYSUBEXPRESSIONS); + par->nbra++; + if (par->lastwasand) + _operator(par, TOK_CAT); + } else + _evaluntil(par, t); + if (t != TOK_RBRA) + _pushator(par, t); + par->lastwasand = 0; + if (t==TOK_STAR || t==TOK_QUEST || t==TOK_PLUS || t==TOK_RBRA) + par->lastwasand = true; /* these look like operands */ +} + +static void +_pushand(_Parser *par, _Reinst *f, _Reinst *l) +{ + if (par->andp >= &par->andstack[_NSTACK]) + _rcerror(par, CREG_OPERANDSTACKOVERFLOW); + par->andp->first = f; + par->andp->last = l; + par->andp++; +} + +static void +_pushator(_Parser *par, _Token t) +{ + if (par->atorp >= &par->atorstack[_NSTACK]) + _rcerror(par, CREG_OPERATORSTACKOVERFLOW); + *par->atorp++ = t; + *par->subidp++ = par->cursubid; +} + +static _Node* +_popand(_Parser *par, _Token op) +{ + (void)op; + _Reinst *inst; + + if (par->andp <= &par->andstack[0]) { + _rcerror(par, CREG_MISSINGOPERAND); + inst = _newinst(par, TOK_NOP); + _pushand(par, inst, inst); + } + return --par->andp; +} + +static _Token +_popator(_Parser *par) +{ + if (par->atorp <= &par->atorstack[0]) + _rcerror(par, CREG_OPERATORSTACKUNDERFLOW); + --par->subidp; + return *--par->atorp; +} + + +static void +_evaluntil(_Parser *par, _Token pri) +{ + _Node *op1, *op2; + _Reinst *inst1, *inst2; + + while (pri==TOK_RBRA || par->atorp[-1]>=pri) { + switch (_popator(par)) { + default: + _rcerror(par, CREG_UNKNOWNOPERATOR); + break; + case TOK_LBRA: /* must have been TOK_RBRA */ + op1 = _popand(par, '('); + inst2 = _newinst(par, TOK_RBRA); + inst2->r.subid = *par->subidp; + op1->last->l.next = inst2; + inst1 = _newinst(par, TOK_LBRA); + inst1->r.subid = *par->subidp; + inst1->l.next = op1->first; + _pushand(par, inst1, inst2); + return; + case TOK_OR: + op2 = _popand(par, '|'); + op1 = _popand(par, '|'); + inst2 = _newinst(par, TOK_NOP); + op2->last->l.next = inst2; + op1->last->l.next = inst2; + inst1 = _newinst(par, TOK_OR); + inst1->r.right = op1->first; + inst1->l.left = op2->first; + _pushand(par, inst1, inst2); + break; + case TOK_CAT: + op2 = _popand(par, 0); + op1 = _popand(par, 0); + op1->last->l.next = op2->first; + _pushand(par, op1->first, op2->last); + break; + case TOK_STAR: + op2 = _popand(par, '*'); + inst1 = _newinst(par, TOK_OR); + op2->last->l.next = inst1; + inst1->r.right = op2->first; + _pushand(par, inst1, inst1); + break; + case TOK_PLUS: + op2 = _popand(par, '+'); + inst1 = _newinst(par, TOK_OR); + op2->last->l.next = inst1; + inst1->r.right = op2->first; + _pushand(par, op2->first, inst1); + break; + case TOK_QUEST: + op2 = _popand(par, '?'); + inst1 = _newinst(par, TOK_OR); + inst2 = _newinst(par, TOK_NOP); + inst1->l.left = inst2; + inst1->r.right = op2->first; + op2->last->l.next = inst2; + _pushand(par, inst1, inst2); + break; + } + } +} + + +static _Reprog* +_optimize(_Parser *par, _Reprog *pp) +{ + _Reinst *inst, *target; + _Reclass *cl; + + /* + * get rid of NOOP chains + */ + for (inst = pp->firstinst; inst->type != TOK_END; inst++) { + target = inst->l.next; + while (target->type == TOK_NOP) + target = target->l.next; + inst->l.next = target; + } + + /* + * The original allocation is for an area larger than + * necessary. Reallocate to the actual space used + * and then relocate the code. + */ + if ((par->freep - pp->firstinst)*2 > par->instcap) + return pp; + + intptr_t ipp = (intptr_t)pp; // convert pointer to integer! + isize new_allocsize = c_sizeof(_Reprog) + (par->freep - pp->firstinst)*c_sizeof(_Reinst); + _Reprog *npp = (_Reprog *)c_realloc(pp, pp->allocsize, new_allocsize); + isize diff = (intptr_t)npp - ipp; + + if ((npp == NULL) | (diff == 0)) + return (_Reprog *)ipp; + npp->allocsize = new_allocsize; + par->freep = (_Reinst *)((char *)par->freep + diff); + + for (inst = npp->firstinst; inst < par->freep; inst++) { + switch (inst->type) { + case TOK_OR: + case TOK_STAR: + case TOK_PLUS: + case TOK_QUEST: + inst->r.right = (_Reinst *)((char*)inst->r.right + diff); + break; + case TOK_CCLASS: + case TOK_NCCLASS: + inst->r.right = (_Reinst *)((char*)inst->r.right + diff); + cl = inst->r.classp; + cl->end = (_Rune *)((char*)cl->end + diff); + break; + } + if (inst->l.left) + inst->l.left = (_Reinst *)((char*)inst->l.left + diff); + } + npp->startinst = (_Reinst *)((char*)npp->startinst + diff); + return npp; +} + + +static _Reclass* +_newclass(_Parser *par) +{ + if (par->nclass >= _NCLASS) + _rcerror(par, CREG_TOOMANYCHARACTERCLASSES); + return &(par->classp[par->nclass++]); +} + + +static int /* quoted */ +_nextc(_Parser *par, _Rune *rp) +{ + int ret; + for (;;) { + ret = par->litmode; + par->exprp += chartorune(rp, par->exprp); + + if (*rp == '\\') { + if (par->litmode) { + if (*par->exprp != 'E') + break; + par->exprp += 1; + par->litmode = false; + continue; + } + par->exprp += chartorune(rp, par->exprp); + if (*rp == 'Q') { + par->litmode = true; + continue; + } + if (*rp == 'x' && *par->exprp == '{') { + *rp = (_Rune)strtol(par->exprp + 1, (char **)&par->exprp, 16); + if (*par->exprp != '}') + _rcerror(par, CREG_UNMATCHEDRIGHTPARENTHESIS); + par->exprp++; + } + ret = 1; + } + break; + } + return ret; +} + + +static void +_lexasciiclass(_Parser *par, _Rune *rp) /* assume *rp == '[' and *par->exprp == ':' */ +{ + static struct { const char* c; int n, r; } cls[] = { + {"alnum:]", 7, ASC_an}, {"alpha:]", 7, ASC_al}, {"ascii:]", 7, ASC_as}, + {"blank:]", 7, ASC_bl}, {"cntrl:]", 7, ASC_ct}, {"digit:]", 7, ASC_d}, + {"graph:]", 7, ASC_gr}, {"lower:]", 7, ASC_lo}, {"print:]", 7, ASC_pr}, + {"punct:]", 7, ASC_pu}, {"space:]", 7, ASC_s}, {"upper:]", 7, ASC_up}, + {"xdigit:]", 8, ASC_xd}, {"word:]", 6, ASC_w}, + }; + int inv = par->exprp[1] == '^', off = 1 + inv; + for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i) + if (strncmp(par->exprp + off, cls[i].c, (size_t)cls[i].n) == 0) { + *rp = (_Rune)cls[i].r; + par->exprp += off + cls[i].n; + break; + } + if (par->rune_type == TOK_IRUNE && (*rp == ASC_lo || *rp == ASC_up)) + *rp = (_Rune)ASC_al; + if (inv && *rp != '[') + *rp += 1; +} + + +static void +_lexutfclass(_Parser *par, _Rune *rp) +{ + static struct { const char* c; uint32_t n, r; } cls[] = { + {"{Alpha}", 7, UTF_al}, {"{L&}", 4, UTF_lc}, + {"{Digit}", 7, UTF_nd}, {"{Nd}", 4, UTF_nd}, + {"{Lower}", 7, UTF_ll}, {"{Ll}", 4, UTF_ll}, + {"{Upper}", 7, UTF_lu}, {"{Lu}", 4, UTF_lu}, + {"{Cntrl}", 7, UTF_cc}, {"{Cc}", 4, UTF_cc}, + {"{Alnum}", 7, UTF_an}, {"{Blank}", 7, UTF_bl}, + {"{Space}", 7, UTF_sp}, {"{Word}", 6, UTF_wr}, + {"{XDigit}", 8, ASC_xd}, + {"{Lt}", 4, UTF_lt}, {"{Nl}", 4, UTF_nl}, + {"{Pc}", 4, UTF_pc}, {"{Pd}", 4, UTF_pd}, + {"{Pf}", 4, UTF_pf}, {"{Pi}", 4, UTF_pi}, + {"{Zl}", 4, UTF_zl}, {"{Zp}", 4, UTF_zp}, + {"{Zs}", 4, UTF_zs}, {"{Sc}", 4, UTF_sc}, + {"{Arabic}", 8, UTF_arabic}, + {"{Bengali}", 9, UTF_bengali}, + {"{Cyrillic}", 10, UTF_cyrillic}, + {"{Devanagari}", 12, UTF_devanagari}, + {"{Georgian}", 10, UTF_georgian}, + {"{Greek}", 7, UTF_greek}, + {"{Han}", 5, UTF_han}, + {"{Hiragana}", 10, UTF_hiragana}, + {"{Katakana}", 10, UTF_katakana}, + {"{Latin}", 7, UTF_latin}, + {"{Thai}", 6, UTF_thai}, + }; + unsigned inv = (*rp == 'P'); + for (unsigned i = 0; i < (sizeof cls/sizeof *cls); ++i) { + if (strncmp(par->exprp, cls[i].c, (size_t)cls[i].n) == 0) { + if (par->rune_type == TOK_IRUNE && (cls[i].r == UTF_ll || cls[i].r == UTF_lu)) + *rp = (_Rune)(UTF_lc + inv); + else + *rp = (_Rune)(cls[i].r + inv); + par->exprp += cls[i].n; + break; + } + } +} + +#define CASE_RUNE_MAPPINGS(rune) \ + case 't': rune = '\t'; break; \ + case 'n': rune = '\n'; break; \ + case 'r': rune = '\r'; break; \ + case 'v': rune = '\v'; break; \ + case 'f': rune = '\f'; break; \ + case 'a': rune = '\a'; break; \ + case 'd': rune = UTF_nd; break; \ + case 'D': rune = UTF_ND; break; \ + case 's': rune = UTF_sp; break; \ + case 'S': rune = UTF_SP; break; \ + case 'w': rune = UTF_wr; break; \ + case 'W': rune = UTF_WR; break + + +static _Token +_lex(_Parser *par) +{ + bool quoted = _nextc(par, &par->yyrune); + + if (quoted) { + if (par->litmode) + return par->rune_type; + + switch (par->yyrune) { + CASE_RUNE_MAPPINGS(par->yyrune); + case 'b': return TOK_WBOUND; + case 'B': return TOK_NWBOUND; + case 'A': return TOK_BOS; + case 'z': return TOK_EOS; + case 'Z': return TOK_EOZ; + case 'p': case 'P': + _lexutfclass(par, &par->yyrune); + break; + } + return par->rune_type; + } + + switch (par->yyrune) { + case 0 : return TOK_END; + case '*': return TOK_STAR; + case '?': return TOK_QUEST; + case '+': return TOK_PLUS; + case '|': return TOK_OR; + case '^': return TOK_BOL; + case '$': return TOK_EOL; + case '.': return par->dot_type; + case '[': return _bldcclass(par); + case '(': + if (par->exprp[0] == '?') { /* override global flags */ + for (int k = 1, enable = 1; ; ++k) switch (par->exprp[k]) { + case 0 : par->exprp += k; return TOK_END; + case ')': par->exprp += k + 1; + return TOK_CASED + (par->rune_type == TOK_IRUNE); + case '-': enable = 0; break; + case 's': par->dot_type = TOK_ANY + enable; break; + case 'i': par->rune_type = TOK_RUNE + enable; break; + default: _rcerror(par, CREG_UNKNOWNOPERATOR); return 0; + } + } + return TOK_LBRA; + case ')': return TOK_RBRA; + } + return par->rune_type; +} + + +static _Token +_bldcclass(_Parser *par) +{ + _Token type; + _Rune r[_NCCRUNE]; + _Rune *p, *ep, *np; + _Rune rune; + int quoted; + + /* we have already seen the '[' */ + type = TOK_CCLASS; + par->yyclassp = _newclass(par); + + /* look ahead for negation */ + /* SPECIAL CASE!!! negated classes don't match \n */ + ep = r; + quoted = _nextc(par, &rune); + if (!quoted && rune == '^') { + type = TOK_NCCLASS; + quoted = _nextc(par, &rune); + ep[0] = ep[1] = '\n'; + ep += 2; + } + + /* parse class into a set of spans */ + for (; ep < &r[_NCCRUNE]; quoted = _nextc(par, &rune)) { + if (rune == 0) { + _rcerror(par, CREG_MALFORMEDCHARACTERCLASS); + return 0; + } + if (!quoted) { + if (rune == ']') + break; + if (rune == '-') { + if (ep != r && *par->exprp != ']') { + quoted = _nextc(par, &rune); + if (rune == 0) { + _rcerror(par, CREG_MALFORMEDCHARACTERCLASS); + return 0; + } + ep[-1] = par->rune_type == TOK_IRUNE ? utf8_casefold(rune) : rune; + continue; + } + } + if (rune == '[' && *par->exprp == ':') + _lexasciiclass(par, &rune); + } else switch (rune) { + CASE_RUNE_MAPPINGS(rune); + case 'p': case 'P': + _lexutfclass(par, &rune); + break; + } + ep[0] = ep[1] = par->rune_type == TOK_IRUNE ? utf8_casefold(rune) : rune; + ep += 2; + } + + /* sort on span start */ + for (p = r; p < ep; p += 2) + for (np = p; np < ep; np += 2) + if (*np < *p) { + rune = np[0]; np[0] = p[0]; p[0] = rune; + rune = np[1]; np[1] = p[1]; p[1] = rune; + } + + /* merge spans */ + np = par->yyclassp->spans; + p = r; + if (r == ep) + par->yyclassp->end = np; + else { + np[0] = *p++; + np[1] = *p++; + for (; p < ep; p += 2) + if (p[0] <= np[1]) { + if (p[1] > np[1]) + np[1] = p[1]; + } else { + np += 2; + np[0] = p[0]; + np[1] = p[1]; + } + par->yyclassp->end = np+2; + } + + return type; +} + + +static _Reprog* +_regcomp1(_Reprog *pp, _Parser *par, const char *s, int cflags) +{ + _Token token; + + /* get memory for the program. estimated max usage */ + isize instcap = 5 + 6*c_strlen(s); + isize new_allocsize = c_sizeof(_Reprog) + instcap*c_sizeof(_Reinst); + pp = (_Reprog *)c_realloc(pp, pp ? pp->allocsize : 0, new_allocsize); + if (pp == NULL) { + par->error = CREG_OUTOFMEMORY; + return NULL; + } + pp->allocsize = new_allocsize; + pp->flags.icase = (cflags & CREG_ICASE) != 0; + pp->flags.dotall = (cflags & CREG_DOTALL) != 0; + par->instcap = instcap; + par->freep = pp->firstinst; + par->classp = pp->cclass; + par->error = 0; + + if (setjmp(par->regkaboom)) + goto out; + + /* go compile the sucker */ + par->flags = pp->flags; + par->rune_type = pp->flags.icase ? TOK_IRUNE : TOK_RUNE; + par->dot_type = pp->flags.dotall ? TOK_ANYNL : TOK_ANY; + par->litmode = false; + par->exprp = s; + par->nclass = 0; + par->nbra = 0; + par->atorp = par->atorstack; + par->andp = par->andstack; + par->subidp = par->subidstack; + par->lastwasand = false; + par->cursubid = 0; + + /* Start with a low priority operator to prime parser */ + _pushator(par, TOK_START-1); + while ((token = _lex(par)) != TOK_END) { + if ((token & TOK_MASK) == TOK_OPERATOR) + _operator(par, token); + else + _operand(par, token); + } + + /* Close with a low priority operator */ + _evaluntil(par, TOK_START); + + /* Force TOK_END */ + _operand(par, TOK_END); + _evaluntil(par, TOK_START); + + if (par->nbra) + _rcerror(par, CREG_UNMATCHEDLEFTPARENTHESIS); + --par->andp; /* points to first and only _operand */ + pp->startinst = par->andp->first; + + pp = _optimize(par, pp); + pp->nsubids = par->cursubid; +out: + if (par->error) { + c_free(pp, pp->allocsize); + pp = NULL; + } + return pp; +} + +#if defined __clang__ + #pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#elif defined __GNUC__ + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#endif + +static int +_runematch(_Rune s, _Rune r) +{ + int inv = 0, n; + switch (s) { + case ASC_D: inv = 1; case ASC_d: return inv ^ (isdigit((int)r) != 0); + case ASC_S: inv = 1; case ASC_s: return inv ^ (isspace((int)r) != 0); + case ASC_W: inv = 1; case ASC_w: return inv ^ ((isalnum((int)r) != 0) | (r == '_')); + case ASC_AL: inv = 1; case ASC_al: return inv ^ (isalpha((int)r) != 0); + case ASC_AN: inv = 1; case ASC_an: return inv ^ (isalnum((int)r) != 0); + case ASC_AS: return (r >= 128); case ASC_as: return (r < 128); + case ASC_BL: inv = 1; case ASC_bl: return inv ^ ((r == ' ') | (r == '\t')); + case ASC_CT: inv = 1; case ASC_ct: return inv ^ (iscntrl((int)r) != 0); + case ASC_GR: inv = 1; case ASC_gr: return inv ^ (isgraph((int)r) != 0); + case ASC_PR: inv = 1; case ASC_pr: return inv ^ (isprint((int)r) != 0); + case ASC_PU: inv = 1; case ASC_pu: return inv ^ (ispunct((int)r) != 0); + case ASC_LO: inv = 1; case ASC_lo: return inv ^ (islower((int)r) != 0); + case ASC_UP: inv = 1; case ASC_up: return inv ^ (isupper((int)r) != 0); + case ASC_XD: inv = 1; case ASC_xd: return inv ^ (isxdigit((int)r) != 0); + case UTF_AN: inv = 1; case UTF_an: return inv ^ (int)utf8_isalnum(r); + case UTF_BL: inv = 1; case UTF_bl: return inv ^ (int)utf8_isblank(r); + case UTF_SP: inv = 1; case UTF_sp: return inv ^ (int)utf8_isspace(r); + case UTF_LL: inv = 1; case UTF_ll: return inv ^ (int)utf8_islower(r); + case UTF_LU: inv = 1; case UTF_lu: return inv ^ (int)utf8_isupper(r); + case UTF_LC: inv = 1; case UTF_lc: return inv ^ (int)utf8_iscased(r); + case UTF_AL: inv = 1; case UTF_al: return inv ^ (int)utf8_isalpha(r); + case UTF_WR: inv = 1; case UTF_wr: return inv ^ (int)utf8_isword(r); + case UTF_cc: case UTF_CC: + case UTF_lt: case UTF_LT: + case UTF_nd: case UTF_ND: + case UTF_nl: case UTF_NL: + case UTF_pc: case UTF_PC: + case UTF_pd: case UTF_PD: + case UTF_pf: case UTF_PF: + case UTF_pi: case UTF_PI: + case UTF_sc: case UTF_SC: + case UTF_zl: case UTF_ZL: + case UTF_zp: case UTF_ZP: + case UTF_zs: case UTF_ZS: + case UTF_arabic: case UTF_ARABIC: + case UTF_bengali: case UTF_BENGALI: + case UTF_cyrillic: case UTF_CYRILLIC: + case UTF_devanagari: case UTF_DEVANAGARI: + case UTF_georgian: case UTF_GEORGIAN: + case UTF_greek: case UTF_GREEK: + case UTF_han: case UTF_HAN: + case UTF_hiragana: case UTF_HIRAGANA: + case UTF_katakana: case UTF_KATAKANA: + case UTF_latin: case UTF_LATIN: + case UTF_thai: case UTF_THAI: + n = (int)s - UTF_GRP; + inv = n & 1; + return inv ^ (int)utf8_isgroup(n / 2, r); + } + return s == r; +} + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +_regexec1(const _Reprog *progp, /* program to run */ + const char *bol, /* string to run machine on */ + _Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + _Reljunk *j, + int mflags +) +{ + int flag=0; + _Reinst *inst; + _Relist *tlp; + _Relist *tl, *nl; /* This list, next list */ + _Relist *tle, *nle; /* Ends of this and next list */ + const char *s, *p; + _Rune r, *rp, *ep; + int n, checkstart, match = 0; + int i; + + bool icase = progp->flags.icase; + checkstart = j->starttype; + if (mp) + for (i=0; irelist[0][0].inst = NULL; + j->relist[1][0].inst = NULL; + + /* Execute machine once for each character, including terminal NUL */ + s = j->starts; + do { + /* fast check for first char */ + if (checkstart) { + switch (j->starttype) { + case TOK_IRUNE: + p = utfruneicase(s, j->startchar); + goto next1; + case TOK_RUNE: + p = utfrune(s, j->startchar); + next1: + if (p == NULL || s == j->eol) + return match; + s = p; + break; + case TOK_BOL: + if (s == bol) + break; + p = utfrune(s, '\n'); + if (p == NULL || s == j->eol) + return match; + s = p+1; + break; + } + } + r = *(uint8_t*)s; + n = r < 0x80 ? 1 : chartorune(&r, s); + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = NULL; + + /* Add first instruction to current list */ + if (match == 0) + _renewemptythread(tl, progp->startinst, ms, s); + + /* Execute machine until current list is empty */ + for (tlp=tl; tlp->inst; tlp++) { /* assignment = */ + for (inst = tlp->inst; ; inst = inst->l.next) { + int ok = false; + + switch (inst->type) { + case TOK_IRUNE: + r = utf8_casefold(r); /* FALLTHRU */ + case TOK_RUNE: + ok = _runematch(inst->r.rune, r); + break; + case TOK_CASED: case TOK_ICASE: + icase = inst->type == TOK_ICASE; + continue; + case TOK_LBRA: + tlp->se.m[inst->r.subid].buf = s; + continue; + case TOK_RBRA: + tlp->se.m[inst->r.subid].size = (s - tlp->se.m[inst->r.subid].buf); + continue; + case TOK_ANY: + ok = (r != '\n'); + break; + case TOK_ANYNL: + ok = true; + break; + case TOK_BOL: + if (s == bol || s[-1] == '\n') continue; + break; + case TOK_BOS: + if (s == bol) continue; + break; + case TOK_EOL: + if (r == '\n') continue; /* FALLTHRU */ + case TOK_EOS: + if (s == j->eol || r == 0) continue; + break; + case TOK_EOZ: + if (s == j->eol || r == 0 || (r == '\n' && s[1] == 0)) continue; + break; + case TOK_NWBOUND: + ok = true; /* FALLTHRU */ + case TOK_WBOUND: + if (ok ^ (r == 0 || s == bol || s == j->eol || + (utf8_isword(utf8_peek_at(s, -1)) ^ + utf8_isword(utf8_peek(s))))) + continue; + break; + case TOK_NCCLASS: + ok = true; /* FALLTHRU */ + case TOK_CCLASS: + ep = inst->r.classp->end; + if (icase) r = utf8_casefold(r); + for (rp = inst->r.classp->spans; rp < ep; rp += 2) { + if ((r >= rp[0] && r <= rp[1]) || (rp[0] == rp[1] && _runematch(rp[0], r))) + break; + } + ok ^= (rp < ep); + break; + case TOK_OR: + /* evaluate right choice later */ + if (_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case TOK_END: /* Match! */ + match = !(mflags & CREG_FULLMATCH) || + ((s == j->eol || r == 0 || r == '\n') && + (tlp->se.m[0].buf == bol || tlp->se.m[0].buf[-1] == '\n')); + tlp->se.m[0].size = (s - tlp->se.m[0].buf); + if (mp != NULL) + _renewmatch(mp, ms, &tlp->se, progp->nsubids); + break; + } + + if (ok && _renewthread(nl, inst->l.next, ms, &tlp->se) == nle) + return -1; + break; + } + } + if (s == j->eol) + break; + checkstart = j->starttype && nl->inst==NULL; + s += n; + } while (r); + return match; +} + + +static int +_regexec2(const _Reprog *progp, /* program to run */ + const char *bol, /* string to run machine on */ + _Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + _Reljunk *j, + int mflags +) +{ + int rv; + _Relist *relists; + + /* mark space */ + isize sz = 2 * _BIGLISTSIZE*c_sizeof(_Relist); + relists = (_Relist *)c_malloc(sz); + if (relists == NULL) + return -1; + + j->relist[0] = relists; + j->relist[1] = relists + _BIGLISTSIZE; + j->reliste[0] = relists + _BIGLISTSIZE - 2; + j->reliste[1] = relists + 2*_BIGLISTSIZE - 2; + + rv = _regexec1(progp, bol, mp, ms, j, mflags); + c_free(relists, sz); + return rv; +} + +static int +_regexec(const _Reprog *progp, /* program to run */ + const char *bol, /* string to run machine on */ + const char *bol_end,/* end of string (or NULL for null-termination) */ + int ms, /* number of elements at mp */ + _Resub mp[], /* subexpression elements */ + int mflags) +{ + _Reljunk j; + _Relist relist0[_LISTSIZE], relist1[_LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.starts = bol; + j.eol = bol_end; + + if ((mflags & CREG_NEXT) && mp[0].buf) + j.starts = mp[0].buf + mp[0].size; + if (j.eol && j.starts > j.eol) + return 0; // no match + + j.starttype = 0; + j.startchar = 0; + int rune_type = progp->flags.icase ? TOK_IRUNE : TOK_RUNE; + if (progp->startinst->type == rune_type && progp->startinst->r.rune < 128) { + j.starttype = rune_type; + j.startchar = progp->startinst->r.rune; + } + if (progp->startinst->type == TOK_BOL) + j.starttype = TOK_BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + _LISTSIZE - 2; + j.reliste[1] = relist1 + _LISTSIZE - 2; + + rv = _regexec1(progp, bol, mp, ms, &j, mflags); + if (rv >= 0) + return rv; + rv = _regexec2(progp, bol, mp, ms, &j, mflags); + return rv; +} + + +static void +_build_substitution(const char* replace, int nmatch, const csview match[], + bool(*transform)(int, csview, cstr*), cstr* subst) { + cstr_buf mbuf = cstr_getbuf(subst); + isize len = 0, cap = mbuf.cap; + char* dst = mbuf.data; + cstr tr_str = {0}; + + while (*replace != '\0') { + if (*replace == '$') { + int arg = replace[1]; + if (arg >= '0' && arg <= '9') { + arg -= '0'; + if (replace[2] >= '0' && replace[2] <= '9' && replace[3] == ';') + { arg = arg*10 + (replace[2] - '0'); replace += 2; } + replace += 2; + if (arg < nmatch) { + csview tr_sv = transform && transform(arg, match[arg], &tr_str) + ? cstr_sv(&tr_str) : match[arg]; + if (len + tr_sv.size > cap) + dst = cstr_reserve(subst, cap += cap/2 + tr_sv.size); + for (int i = 0; i < tr_sv.size; ++i) + dst[len++] = tr_sv.buf[i]; + } + continue; + } + if (arg == '$') // allow e.g. "$$3" => "$3" + ++replace; + } + if (len == cap) + dst = cstr_reserve(subst, cap += cap/2 + 4); + dst[len++] = *replace++; + } + cstr_drop(&tr_str); + _cstr_set_size(subst, len); +} + + +/* --------------------------------------------------------------- + * API functions + */ + +int cregex_compile_pro(cregex *self, const char* pattern, int cflags) { + _Parser par; + self->prog = _regcomp1(self->prog, &par, pattern, cflags); + return self->error = par.error; +} + +int cregex_captures(const cregex* self) { + return self->prog ? self->prog->nsubids : 0; +} + +void cregex_drop(cregex* self) { + c_free(self->prog, self->prog->allocsize); +} + +int cregex_match_opt(const cregex* re, const char* input, const char* input_end, struct cregex_match_opt opt) { + int res = _regexec(re->prog, input, input_end, cregex_captures(re) + 1, opt.match, opt.flags); + switch (res) { + case 1: return CREG_OK; + case 0: return CREG_NOMATCH; + default: return CREG_MATCHERROR; + } +} + +int cregex_match_aio_opt(const char* pattern, const char* input, const char* input_end, struct cregex_match_opt opt) { + cregex re = cregex_make(pattern, opt.flags); + if (re.error != CREG_OK) return re.error; + int res = cregex_match_opt(&re, input, input_end, opt); + cregex_drop(&re); + return res; +} + +cstr cregex_replace_opt(const cregex* re, const char* input, const char* input_end, const char* replace, struct cregex_replace_opt opt) { + cstr out = {0}; + cstr subst = {0}; + csview match[CREG_MAX_CAPTURES]; + int nmatch = cregex_captures(re) + 1; + bool copy = !(opt.flags & CREG_STRIP); + struct cregex_match_opt mopt = {match}; + opt.count += (opt.count != 0); + + while (--opt.count && cregex_match_opt(re, input, input_end, mopt) == CREG_OK) { + _build_substitution(replace, nmatch, match, opt.xform, &subst); + const isize mpos = (match[0].buf - input); + if (copy & (mpos > 0)) + cstr_append_n(&out, input, mpos); + cstr_append_s(&out, subst); + input = match[0].buf + match[0].size; + } + if (copy) { + isize len = input_end ? input_end - input : c_strlen(input); + cstr_append_sv(&out, c_sv(input, len)); + } + cstr_drop(&subst); + return out; +} + +cstr cregex_replace_aio_opt(const char* pattern, const char* input, const char* input_end, const char* replace, struct cregex_replace_opt opt) { + cregex re = {0}; + if (cregex_compile_pro(&re, pattern, opt.flags) != CREG_OK) + assert(0); + cstr out = cregex_replace_opt(&re, input, input_end, replace, opt); + cregex_drop(&re); + return out; +} + +#endif // STC_CREGEX_PRV_C_INCLUDED diff --git a/stc/priv/cstr_prv.c b/stc/priv/cstr_prv.c new file mode 100644 index 0000000..c01218e --- /dev/null +++ b/stc/priv/cstr_prv.c @@ -0,0 +1,291 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// ------------------- STC_CSTR_CORE -------------------- +#if !defined STC_CSTR_CORE_C_INCLUDED && \ + (defined i_implement || defined STC_CSTR_CORE) +#define STC_CSTR_CORE_C_INCLUDED + +void cstr_drop(const cstr* self) { + if (cstr_is_long(self)) + cstr_l_drop(self); +} + +cstr* cstr_take(cstr* self, const cstr s) { + if (cstr_is_long(self) && self->lon.data != s.lon.data) + cstr_l_drop(self); + *self = s; + return self; +} + +size_t cstr_hash(const cstr *self) { + csview sv = cstr_sv(self); + return c_hash_str(sv.buf); +} + +isize cstr_find_sv(const cstr* self, csview search) { + csview sv = cstr_sv(self); + char* res = c_strnstrn(sv.buf, sv.size, search.buf, search.size); + return res ? (res - sv.buf) : c_NPOS; +} + +char* _cstr_internal_move(cstr* self, const isize pos1, const isize pos2) { + cstr_buf b = cstr_getbuf(self); + if (pos1 != pos2) { + const isize newlen = (b.size + pos2 - pos1); + if (newlen > b.cap) + b.data = cstr_reserve(self, b.size*3/2 + pos2 - pos1); + c_memmove(&b.data[pos2], &b.data[pos1], b.size - pos1); + _cstr_set_size(self, newlen); + } + return b.data; +} + +char* _cstr_init(cstr* self, const isize len, const isize cap) { + if (cap > cstr_s_cap) { + self->lon.data = (char *)c_malloc(cap + 1); + cstr_l_set_size(self, len); + cstr_l_set_cap(self, cap); + return self->lon.data; + } + cstr_s_set_size(self, len); + return self->sml.data; +} + +char* cstr_reserve(cstr* self, const isize cap) { + if (cstr_is_long(self)) { + if (cap > cstr_l_cap(self)) { + self->lon.data = (char *)c_realloc(self->lon.data, cstr_l_cap(self) + 1, cap + 1); + cstr_l_set_cap(self, cap); + } + return self->lon.data; + } + /* from short to long: */ + if (cap > cstr_s_cap) { + char* data = (char *)c_malloc(cap + 1); + const isize len = cstr_s_size(self); + /* copy full short buffer to emulate realloc() */ + c_memcpy(data, self->sml.data, c_sizeof self->sml); + self->lon.data = data; + self->lon.size = (size_t)len; + cstr_l_set_cap(self, cap); + return data; + } + return self->sml.data; +} + +char* cstr_resize(cstr* self, const isize size, const char value) { + cstr_buf b = cstr_getbuf(self); + if (size > b.size) { + if (size > b.cap && (b.data = cstr_reserve(self, size)) == NULL) + return NULL; + c_memset(b.data + b.size, value, size - b.size); + } + _cstr_set_size(self, size); + return b.data; +} + +isize cstr_find_at(const cstr* self, const isize pos, const char* search) { + csview sv = cstr_sv(self); + if (pos > sv.size) return c_NPOS; + const char* res = strstr((char*)sv.buf + pos, search); + return res ? (res - sv.buf) : c_NPOS; +} + +char* cstr_assign_n(cstr* self, const char* str, const isize len) { + char* d = cstr_reserve(self, len); + if (d) { _cstr_set_size(self, len); c_memmove(d, str, len); } + return d; +} + +char* cstr_append_n(cstr* self, const char* str, const isize len) { + cstr_buf b = cstr_getbuf(self); + if (b.size + len > b.cap) { + const size_t off = (size_t)(str - b.data); + b.data = cstr_reserve(self, b.size*3/2 + len); + if (b.data == NULL) return NULL; + if (off <= (size_t)b.size) str = b.data + off; /* handle self append */ + } + c_memcpy(b.data + b.size, str, len); + _cstr_set_size(self, b.size + len); + return b.data; +} + +cstr cstr_from_replace(csview in, csview search, csview repl, int32_t count) { + cstr out = cstr_init(); + isize from = 0; char* res; + if (count == 0) count = INT32_MAX; + if (search.size) + while (count-- && (res = c_strnstrn(in.buf + from, in.size - from, search.buf, search.size))) { + const isize pos = (res - in.buf); + cstr_append_n(&out, in.buf + from, pos - from); + cstr_append_n(&out, repl.buf, repl.size); + from = pos + search.size; + } + cstr_append_n(&out, in.buf + from, in.size - from); + return out; +} + +void cstr_erase(cstr* self, const isize pos, isize len) { + cstr_buf b = cstr_getbuf(self); + if (len > b.size - pos) len = b.size - pos; + c_memmove(&b.data[pos], &b.data[pos + len], b.size - (pos + len)); + _cstr_set_size(self, b.size - len); +} + +void cstr_shrink_to_fit(cstr* self) { + cstr_buf b = cstr_getbuf(self); + if (b.size == b.cap) + return; + if (b.size > cstr_s_cap) { + self->lon.data = (char *)c_realloc(self->lon.data, cstr_l_cap(self) + 1, b.size + 1); + cstr_l_set_cap(self, b.size); + } else if (b.cap > cstr_s_cap) { + c_memcpy(self->sml.data, b.data, b.size + 1); + cstr_s_set_size(self, b.size); + c_free(b.data, b.cap + 1); + } +} +#endif // STC_CSTR_CORE_C_INCLUDED + +// ------------------- STC_CSTR_IO -------------------- +#if !defined STC_CSTR_IO_C_INCLUDED && \ + (defined i_import || defined STC_CSTR_IO) +#define STC_CSTR_IO_C_INCLUDED + +char* cstr_append_uninit(cstr *self, isize len) { + cstr_buf b = cstr_getbuf(self); + if (b.size + len > b.cap && (b.data = cstr_reserve(self, b.size*3/2 + len)) == NULL) + return NULL; + _cstr_set_size(self, b.size + len); + return b.data + b.size; +} + +bool cstr_getdelim(cstr *self, const int delim, FILE *fp) { + int c = fgetc(fp); + if (c == EOF) + return false; + isize pos = 0; + cstr_buf b = cstr_getbuf(self); + for (;;) { + if (c == delim || c == EOF) { + _cstr_set_size(self, pos); + return true; + } + if (pos == b.cap) { + _cstr_set_size(self, pos); + char* data = cstr_reserve(self, (b.cap = b.cap*3/2 + 16)); + b.data = data; + } + b.data[pos++] = (char) c; + c = fgetc(fp); + } +} + +isize cstr_vfmt(cstr* self, isize start, const char* fmt, va_list args) { + va_list args2; + va_copy(args2, args); + const int n = vsnprintf(NULL, 0ULL, fmt, args); + vsnprintf(cstr_reserve(self, start + n) + start, (size_t)n+1, fmt, args2); + va_end(args2); + _cstr_set_size(self, start + n); + return n; +} + +cstr cstr_from_fmt(const char* fmt, ...) { + cstr s = cstr_init(); + va_list args; + va_start(args, fmt); + cstr_vfmt(&s, 0, fmt, args); + va_end(args); + return s; +} + +isize cstr_append_fmt(cstr* self, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + const isize n = cstr_vfmt(self, cstr_size(self), fmt, args); + va_end(args); + return n; +} + +/* NB! self-data in args is UB */ +isize cstr_printf(cstr* self, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + const isize n = cstr_vfmt(self, 0, fmt, args); + va_end(args); + return n; +} +#endif // STC_CSTR_IO_C_INCLUDED + +// ------------------- STC_CSTR_UTF8 -------------------- +#if !defined STC_CSTR_UTF8_C_INCLUDED && \ + (defined i_import || defined STC_CSTR_UTF8 || defined STC_UTF8_PRV_C_INCLUDED) +#define STC_CSTR_UTF8_C_INCLUDED + +#include + +void cstr_u8_erase(cstr* self, const isize u8pos, const isize u8len) { + csview b = cstr_sv(self); + csview span = utf8_subview(b.buf, u8pos, u8len); + c_memmove((void *)&span.buf[0], &span.buf[span.size], b.size - span.size - (span.buf - b.buf)); + _cstr_set_size(self, b.size - span.size); +} + +bool cstr_u8_valid(const cstr* self) + { return utf8_valid(cstr_str(self)); } + +static int toLower(int c) + { return c >= 'A' && c <= 'Z' ? c + 32 : c; } +static int toUpper(int c) + { return c >= 'a' && c <= 'z' ? c - 32 : c; } +static struct { + int (*conv_asc)(int); + uint32_t (*conv_utf)(uint32_t); +} +fn_tocase[] = {{toLower, utf8_casefold}, + {toLower, utf8_tolower}, + {toUpper, utf8_toupper}}; + +cstr cstr_tocase_sv(csview sv, int k) { + cstr out = {0}; + char *buf = cstr_reserve(&out, sv.size*3/2); + isize sz = 0; + utf8_decode_t d = {.state=0}; + const char* end = sv.buf + sv.size; + + while (sv.buf < end) { + sv.buf += utf8_decode_codepoint(&d, sv.buf, end); + + if (d.codep < 0x80) + buf[sz++] = (char)fn_tocase[k].conv_asc((int)d.codep); + else { + uint32_t cp = fn_tocase[k].conv_utf(d.codep); + sz += utf8_encode(buf + sz, cp); + } + } + _cstr_set_size(&out, sz); + cstr_shrink_to_fit(&out); + return out; +} +#endif // i_import STC_CSTR_UTF8_C_INCLUDED diff --git a/stc/priv/cstr_prv.h b/stc/priv/cstr_prv.h new file mode 100644 index 0000000..40f58c9 --- /dev/null +++ b/stc/priv/cstr_prv.h @@ -0,0 +1,420 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// IWYU pragma: private, include "stc/cstr.h" +#ifndef STC_CSTR_PRV_H_INCLUDED +#define STC_CSTR_PRV_H_INCLUDED + +#include /* FILE*, vsnprintf */ +#include /* malloc */ +#include /* size_t */ +#include /* cstr_vfmt() */ +/**************************** PRIVATE API **********************************/ + +#if defined __GNUC__ && !defined __clang__ + // linkage.h already does diagnostic push + // Warns wrongfully on -O3 on cstr_assign(&str, "literal longer than 23 ..."); + #pragma GCC diagnostic ignored "-Warray-bounds" +#endif + +enum { cstr_s_cap = sizeof(cstr_buf) - 2 }; +#define cstr_s_size(s) ((isize)(s)->sml.size) +#define cstr_s_set_size(s, len) ((s)->sml.data[(s)->sml.size = (uint8_t)(len)] = 0) +#define cstr_s_data(s) (s)->sml.data + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + #define byte_rotl_(x, b) ((x) << (b)*8 | (x) >> (sizeof(x) - (b))*8) + #define cstr_l_cap(s) (isize)(~byte_rotl_((s)->lon.ncap, sizeof((s)->lon.ncap) - 1)) + #define cstr_l_set_cap(s, cap) ((s)->lon.ncap = ~byte_rotl_((uintptr_t)(cap), 1)) +#else + #define cstr_l_cap(s) (isize)(~(s)->lon.ncap) + #define cstr_l_set_cap(s, cap) ((s)->lon.ncap = ~(uintptr_t)(cap)) +#endif +#define cstr_l_size(s) (isize)((s)->lon.size) +#define cstr_l_set_size(s, len) ((s)->lon.data[(s)->lon.size = (uintptr_t)(len)] = 0) +#define cstr_l_data(s) (s)->lon.data +#define cstr_l_drop(s) c_free((s)->lon.data, cstr_l_cap(s) + 1) + +#define cstr_is_long(s) ((s)->sml.size >= 128) +extern char* _cstr_init(cstr* self, isize len, isize cap); +extern char* _cstr_internal_move(cstr* self, isize pos1, isize pos2); + +/**************************** PUBLIC API **********************************/ + +#define cstr_init() (c_literal(cstr){0}) +#define cstr_lit(literal) cstr_from_n(literal, c_litstrlen(literal)) + +extern cstr cstr_from_replace(csview sv, csview search, csview repl, int32_t count); +extern cstr cstr_from_fmt(const char* fmt, ...) c_GNUATTR(format(printf, 1, 2)); + +extern void cstr_drop(const cstr* self); +extern cstr* cstr_take(cstr* self, const cstr s); +extern char* cstr_reserve(cstr* self, isize cap); +extern void cstr_shrink_to_fit(cstr* self); +extern char* cstr_resize(cstr* self, isize size, char value); +extern isize cstr_find_at(const cstr* self, isize pos, const char* search); +extern isize cstr_find_sv(const cstr* self, csview search); +extern char* cstr_assign_n(cstr* self, const char* str, isize len); +extern char* cstr_append_n(cstr* self, const char* str, isize len); +extern isize cstr_append_fmt(cstr* self, const char* fmt, ...) c_GNUATTR(format(printf, 2, 3)); +extern char* cstr_append_uninit(cstr *self, isize len); + +extern bool cstr_getdelim(cstr *self, int delim, FILE *fp); +extern void cstr_erase(cstr* self, isize pos, isize len); +extern isize cstr_printf(cstr* self, const char* fmt, ...) c_GNUATTR(format(printf, 2, 3)); +extern isize cstr_vfmt(cstr* self, isize start, const char* fmt, va_list args); +extern size_t cstr_hash(const cstr *self); +extern bool cstr_u8_valid(const cstr* self); +extern void cstr_u8_erase(cstr* self, isize u8pos, isize u8len); + +STC_INLINE cstr_buf cstr_getbuf(cstr* s) { + return cstr_is_long(s) ? c_literal(cstr_buf){s->lon.data, cstr_l_size(s), cstr_l_cap(s)} + : c_literal(cstr_buf){s->sml.data, cstr_s_size(s), cstr_s_cap}; +} +STC_INLINE zsview cstr_zv(const cstr* s) { + return cstr_is_long(s) ? c_literal(zsview){s->lon.data, cstr_l_size(s)} + : c_literal(zsview){s->sml.data, cstr_s_size(s)}; +} +STC_INLINE csview cstr_sv(const cstr* s) { + return cstr_is_long(s) ? c_literal(csview){s->lon.data, cstr_l_size(s)} + : c_literal(csview){s->sml.data, cstr_s_size(s)}; +} + +STC_INLINE cstr cstr_from_n(const char* str, const isize len) { + cstr s; + c_memcpy(_cstr_init(&s, len, len), str, len); + return s; +} + +STC_INLINE cstr cstr_from(const char* str) + { return cstr_from_n(str, c_strlen(str)); } + +STC_INLINE cstr cstr_from_sv(csview sv) + { return cstr_from_n(sv.buf, sv.size); } + +STC_INLINE cstr cstr_from_zv(zsview zv) + { return cstr_from_n(zv.str, zv.size); } + +STC_INLINE cstr cstr_with_size(const isize size, const char value) { + cstr s; + c_memset(_cstr_init(&s, size, size), value, size); + return s; +} + +STC_INLINE cstr cstr_with_capacity(const isize cap) { + cstr s; + _cstr_init(&s, 0, cap); + return s; +} + +STC_INLINE cstr cstr_move(cstr* self) { + cstr tmp = *self; + *self = cstr_init(); + return tmp; +} + +STC_INLINE cstr cstr_clone(cstr s) { + csview sv = cstr_sv(&s); + return cstr_from_n(sv.buf, sv.size); +} + +#define SSO_CALL(s, call) (cstr_is_long(s) ? cstr_l_##call : cstr_s_##call) + +STC_INLINE void _cstr_set_size(cstr* self, isize len) + { SSO_CALL(self, set_size(self, len)); } + +STC_INLINE void cstr_clear(cstr* self) + { _cstr_set_size(self, 0); } + +STC_INLINE char* cstr_data(cstr* self) + { return SSO_CALL(self, data(self)); } + +STC_INLINE const char* cstr_str(const cstr* self) + { return SSO_CALL(self, data(self)); } + +STC_INLINE const char* cstr_toraw(const cstr* self) + { return SSO_CALL(self, data(self)); } + +STC_INLINE isize cstr_size(const cstr* self) + { return SSO_CALL(self, size(self)); } + +STC_INLINE bool cstr_is_empty(const cstr* self) + { return cstr_size(self) == 0; } + +STC_INLINE isize cstr_capacity(const cstr* self) + { return cstr_is_long(self) ? cstr_l_cap(self) : cstr_s_cap; } + +STC_INLINE isize cstr_to_index(const cstr* self, cstr_iter it) + { return it.ref - cstr_str(self); } + +STC_INLINE cstr cstr_from_s(cstr s, isize pos, isize len) + { return cstr_from_n(cstr_str(&s) + pos, len); } + +STC_INLINE csview cstr_subview(const cstr* self, isize pos, isize len) { + csview sv = cstr_sv(self); + c_assert(((size_t)pos <= (size_t)sv.size) & (len >= 0)); + if (pos + len > sv.size) len = sv.size - pos; + return c_literal(csview){sv.buf + pos, len}; +} + +STC_INLINE zsview cstr_tail(const cstr* self, isize len) { + c_assert(len >= 0); + csview sv = cstr_sv(self); + if (len > sv.size) len = sv.size; + return c_literal(zsview){&sv.buf[sv.size - len], len}; +} + +// BEGIN utf8 functions ===== + +STC_INLINE cstr cstr_u8_from(const char* str, isize u8pos, isize u8len) + { str = utf8_at(str, u8pos); return cstr_from_n(str, utf8_to_index(str, u8len)); } + +STC_INLINE isize cstr_u8_size(const cstr* self) + { return utf8_count(cstr_str(self)); } + +STC_INLINE isize cstr_u8_to_index(const cstr* self, isize u8pos) + { return utf8_to_index(cstr_str(self), u8pos); } + +STC_INLINE zsview cstr_u8_tail(const cstr* self, isize u8len) { + csview sv = cstr_sv(self); + const char* p = &sv.buf[sv.size]; + while (u8len && p != sv.buf) + u8len -= (*--p & 0xC0) != 0x80; + return c_literal(zsview){p, sv.size - (p - sv.buf)}; +} + +STC_INLINE csview cstr_u8_subview(const cstr* self, isize u8pos, isize u8len) + { return utf8_subview(cstr_str(self), u8pos, u8len); } + +STC_INLINE cstr_iter cstr_u8_at(const cstr* self, isize u8pos) { + csview sv; + sv.buf = utf8_at(cstr_str(self), u8pos); + sv.size = utf8_chr_size(sv.buf); + c_assert(sv.size); + return c_literal(cstr_iter){.chr = sv}; +} + +// utf8 iterator + +STC_INLINE cstr_iter cstr_begin(const cstr* self) { + csview sv = cstr_sv(self); + cstr_iter it = {.chr = {sv.buf, utf8_chr_size(sv.buf)}}; + return it; +} +STC_INLINE cstr_iter cstr_end(const cstr* self) { + (void)self; cstr_iter it = {0}; return it; +} +STC_INLINE void cstr_next(cstr_iter* it) { + it->ref += it->chr.size; + it->chr.size = utf8_chr_size(it->ref); + if (*it->ref == '\0') it->ref = NULL; +} + +STC_INLINE cstr_iter cstr_advance(cstr_iter it, isize u8pos) { + it.ref = utf8_offset(it.ref, u8pos); + it.chr.size = utf8_chr_size(it.ref); + if (*it.ref == '\0') it.ref = NULL; + return it; +} + +// utf8 case conversion: requires `#define i_import` before including cstr.h in one TU. +extern cstr cstr_tocase_sv(csview sv, int k); + +STC_INLINE cstr cstr_casefold_sv(csview sv) + { return cstr_tocase_sv(sv, 0); } + +STC_INLINE cstr cstr_tolower_sv(csview sv) + { return cstr_tocase_sv(sv, 1); } + +STC_INLINE cstr cstr_toupper_sv(csview sv) + { return cstr_tocase_sv(sv, 2); } + +STC_INLINE cstr cstr_tolower(const char* str) + { return cstr_tolower_sv(c_sv(str, c_strlen(str))); } + +STC_INLINE cstr cstr_toupper(const char* str) + { return cstr_toupper_sv(c_sv(str, c_strlen(str))); } + +STC_INLINE void cstr_lowercase(cstr* self) + { cstr_take(self, cstr_tolower_sv(cstr_sv(self))); } + +STC_INLINE void cstr_uppercase(cstr* self) + { cstr_take(self, cstr_toupper_sv(cstr_sv(self))); } + +STC_INLINE bool cstr_istarts_with(const cstr* self, const char* sub) { + csview sv = cstr_sv(self); + isize len = c_strlen(sub); + return len <= sv.size && !utf8_icompare((sv.size = len, sv), c_sv(sub, len)); +} + +STC_INLINE bool cstr_iends_with(const cstr* self, const char* sub) { + csview sv = cstr_sv(self); + isize len = c_strlen(sub); + return len <= sv.size && !utf8_icmp(sv.buf + sv.size - len, sub); +} + +STC_INLINE int cstr_icmp(const cstr* s1, const cstr* s2) + { return utf8_icmp(cstr_str(s1), cstr_str(s2)); } + +STC_INLINE bool cstr_ieq(const cstr* s1, const cstr* s2) { + csview x = cstr_sv(s1), y = cstr_sv(s2); + return x.size == y.size && !utf8_icompare(x, y); +} + +STC_INLINE bool cstr_iequals(const cstr* self, const char* str) + { return !utf8_icmp(cstr_str(self), str); } + +// END utf8 ===== + +STC_INLINE int cstr_cmp(const cstr* s1, const cstr* s2) + { return strcmp(cstr_str(s1), cstr_str(s2)); } + +STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) { + csview x = cstr_sv(s1), y = cstr_sv(s2); + return x.size == y.size && !c_memcmp(x.buf, y.buf, x.size); +} + +STC_INLINE bool cstr_equals(const cstr* self, const char* str) + { return !strcmp(cstr_str(self), str); } + +STC_INLINE bool cstr_equals_sv(const cstr* self, csview sv) + { return sv.size == cstr_size(self) && !c_memcmp(cstr_str(self), sv.buf, sv.size); } + +STC_INLINE isize cstr_find(const cstr* self, const char* search) { + const char *str = cstr_str(self), *res = strstr((char*)str, search); + return res ? (res - str) : c_NPOS; +} + +STC_INLINE bool cstr_contains(const cstr* self, const char* search) + { return strstr((char*)cstr_str(self), search) != NULL; } + +STC_INLINE bool cstr_contains_sv(const cstr* self, csview search) + { return cstr_find_sv(self, search) != c_NPOS; } + + +STC_INLINE bool cstr_starts_with_sv(const cstr* self, csview sub) { + if (sub.size > cstr_size(self)) return false; + return !c_memcmp(cstr_str(self), sub.buf, sub.size); +} + +STC_INLINE bool cstr_starts_with(const cstr* self, const char* sub) { + const char* str = cstr_str(self); + while (*sub && *str == *sub) ++str, ++sub; + return !*sub; +} + +STC_INLINE bool cstr_ends_with_sv(const cstr* self, csview sub) { + csview sv = cstr_sv(self); + if (sub.size > sv.size) return false; + return !c_memcmp(sv.buf + sv.size - sub.size, sub.buf, sub.size); +} + +STC_INLINE bool cstr_ends_with(const cstr* self, const char* sub) + { return cstr_ends_with_sv(self, c_sv(sub, c_strlen(sub))); } + +STC_INLINE char* cstr_assign(cstr* self, const char* str) + { return cstr_assign_n(self, str, c_strlen(str)); } + +STC_INLINE char* cstr_assign_sv(cstr* self, csview sv) + { return cstr_assign_n(self, sv.buf, sv.size); } + +STC_INLINE char* cstr_copy(cstr* self, cstr s) { + csview sv = cstr_sv(&s); + return cstr_assign_n(self, sv.buf, sv.size); +} + + +STC_INLINE char* cstr_push(cstr* self, const char* chr) + { return cstr_append_n(self, chr, utf8_chr_size(chr)); } + +STC_INLINE void cstr_pop(cstr* self) { + csview sv = cstr_sv(self); + const char* s = sv.buf + sv.size; + while ((*--s & 0xC0) == 0x80) ; + _cstr_set_size(self, (s - sv.buf)); +} + +STC_INLINE char* cstr_append(cstr* self, const char* str) + { return cstr_append_n(self, str, c_strlen(str)); } + +STC_INLINE char* cstr_append_sv(cstr* self, csview sv) + { return cstr_append_n(self, sv.buf, sv.size); } + +STC_INLINE char* cstr_append_s(cstr* self, cstr s) + { return cstr_append_sv(self, cstr_sv(&s)); } + +#define cstr_join(self, sep, vec) do { \ + struct _vec_s { cstr* data; ptrdiff_t size; } \ + *_vec = (struct _vec_s*)&(vec); \ + (void)sizeof((vec).data == _vec->data && &(vec).size == &_vec->size); \ + cstr_join_sn(self, sep, _vec->data, _vec->size); \ +} while (0); + +#define cstr_join_items(self, sep, ...) \ + cstr_join_n(self, sep, c_make_array(const char*, __VA_ARGS__), c_sizeof((const char*[])__VA_ARGS__)/c_sizeof(char*)) + +STC_INLINE void cstr_join_n(cstr* self, const char* sep, const char* arr[], isize n) { + const char* _sep = cstr_is_empty(self) ? "" : sep; + while (n--) { cstr_append(self, _sep); cstr_append(self, *arr++); _sep = sep; } +} +STC_INLINE void cstr_join_sn(cstr* self, const char* sep, const cstr arr[], isize n) { + const char* _sep = cstr_is_empty(self) ? "" : sep; + while (n--) { cstr_append(self, _sep); cstr_append_s(self, *arr++); _sep = sep; } +} + + +STC_INLINE void cstr_replace_sv(cstr* self, csview search, csview repl, int32_t count) + { cstr_take(self, cstr_from_replace(cstr_sv(self), search, repl, count)); } + +STC_INLINE void cstr_replace_nfirst(cstr* self, const char* search, const char* repl, int32_t count) + { cstr_replace_sv(self, c_sv(search, c_strlen(search)), c_sv(repl, c_strlen(repl)), count); } + +STC_INLINE void cstr_replace(cstr* self, const char* search, const char* repl) + { cstr_replace_nfirst(self, search, repl, INT32_MAX); } + + +STC_INLINE void cstr_replace_at_sv(cstr* self, isize pos, isize len, const csview repl) { + char* d = _cstr_internal_move(self, pos + len, pos + repl.size); + c_memcpy(d + pos, repl.buf, repl.size); +} +STC_INLINE void cstr_replace_at(cstr* self, isize pos, isize len, const char* repl) + { cstr_replace_at_sv(self, pos, len, c_sv(repl, c_strlen(repl))); } + +STC_INLINE void cstr_u8_replace(cstr* self, isize u8pos, isize u8len, const char* repl) { + const char* s = cstr_str(self); csview span = utf8_subview(s, u8pos, u8len); + cstr_replace_at(self, span.buf - s, span.size, repl); +} + + +STC_INLINE void cstr_insert_sv(cstr* self, isize pos, csview sv) + { cstr_replace_at_sv(self, pos, 0, sv); } + +STC_INLINE void cstr_insert(cstr* self, isize pos, const char* str) + { cstr_replace_at_sv(self, pos, 0, c_sv(str, c_strlen(str))); } + +STC_INLINE void cstr_u8_insert(cstr* self, isize u8pos, const char* str) + { cstr_insert(self, utf8_to_index(cstr_str(self), u8pos), str); } + +STC_INLINE bool cstr_getline(cstr *self, FILE *fp) + { return cstr_getdelim(self, '\n', fp); } + +#endif // STC_CSTR_PRV_H_INCLUDED diff --git a/stc/priv/linkage.h b/stc/priv/linkage.h new file mode 100644 index 0000000..ed18c72 --- /dev/null +++ b/stc/priv/linkage.h @@ -0,0 +1,77 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#undef STC_API +#undef STC_DEF + +#if !defined i_static && !defined STC_STATIC && (defined i_header || defined STC_HEADER || \ + defined i_implement || defined STC_IMPLEMENT) + #define STC_API extern + #define STC_DEF +#else + #define i_implement + #if defined __GNUC__ || defined __clang__ || defined __INTEL_LLVM_COMPILER + #define STC_API static __attribute__((unused)) + #else + #define STC_API static inline + #endif + #define STC_DEF static +#endif +#if defined STC_IMPLEMENT || defined i_import + #define i_implement +#endif + +#if defined i_aux && defined i_allocator + #define _i_aux_alloc +#endif +#ifndef i_allocator + #define i_allocator c +#endif +#ifndef i_free + #define i_malloc c_JOIN(i_allocator, _malloc) + #define i_calloc c_JOIN(i_allocator, _calloc) + #define i_realloc c_JOIN(i_allocator, _realloc) + #define i_free c_JOIN(i_allocator, _free) +#endif + +#if defined __clang__ && !defined __cplusplus + #pragma clang diagnostic push + #pragma clang diagnostic warning "-Wall" + #pragma clang diagnostic warning "-Wextra" + #pragma clang diagnostic warning "-Wpedantic" + #pragma clang diagnostic warning "-Wconversion" + #pragma clang diagnostic warning "-Wwrite-strings" + // ignored + #pragma clang diagnostic ignored "-Wmissing-field-initializers" +#elif defined __GNUC__ && !defined __cplusplus + #pragma GCC diagnostic push + #pragma GCC diagnostic warning "-Wall" + #pragma GCC diagnostic warning "-Wextra" + #pragma GCC diagnostic warning "-Wpedantic" + #pragma GCC diagnostic warning "-Wconversion" + #pragma GCC diagnostic warning "-Wwrite-strings" + // ignored + #pragma GCC diagnostic ignored "-Wclobbered" + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough=3" + #pragma GCC diagnostic ignored "-Wstringop-overflow=" + #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif diff --git a/stc/priv/linkage2.h b/stc/priv/linkage2.h new file mode 100644 index 0000000..d99dd23 --- /dev/null +++ b/stc/priv/linkage2.h @@ -0,0 +1,42 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#undef i_aux +#undef _i_aux_alloc + +#undef i_allocator +#undef i_malloc +#undef i_calloc +#undef i_realloc +#undef i_free + +#undef i_static +#undef i_header +#undef i_implement +#undef i_import + +#if defined __clang__ && !defined __cplusplus + #pragma clang diagnostic pop +#elif defined __GNUC__ && !defined __cplusplus + #pragma GCC diagnostic pop +#endif diff --git a/stc/priv/queue_prv.h b/stc/priv/queue_prv.h new file mode 100644 index 0000000..3645fc0 --- /dev/null +++ b/stc/priv/queue_prv.h @@ -0,0 +1,285 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// IWYU pragma: private +#ifndef i_declared +_c_DEFTYPES(_declare_queue, Self, i_key, _i_aux_def); +#endif +typedef i_keyraw _m_raw; + +STC_API bool _c_MEMB(_reserve)(Self* self, const isize cap); +STC_API void _c_MEMB(_clear)(Self* self); +STC_API void _c_MEMB(_drop)(const Self* cself); +STC_API _m_value* _c_MEMB(_push)(Self* self, _m_value value); // push_back +STC_API void _c_MEMB(_shrink_to_fit)(Self *self); +STC_API _m_iter _c_MEMB(_advance)(_m_iter it, isize n); + +#define _cbuf_toidx(self, pos) (((pos) - (self)->start) & (self)->capmask) +#define _cbuf_topos(self, idx) (((self)->start + (idx)) & (self)->capmask) + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) + { while (n--) _c_MEMB(_push)(self, i_keyfrom((*raw))), ++raw; } + +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* val) + { (void)self; i_keydrop(val); } + +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) + { Self out = {0}; return out; } + +STC_INLINE Self _c_MEMB(_with_capacity)(isize cap) { + cap = c_next_pow2(cap + 1); + Self out = {_i_new_n(_m_value, cap), 0, 0, cap - 1}; + return out; +} +STC_INLINE Self _c_MEMB(_with_size_uninit)(isize size) + { Self out = _c_MEMB(_with_capacity)(size); out.end = size; return out; } + +STC_INLINE Self _c_MEMB(_with_size)(isize size, _m_raw default_raw) { + Self out = _c_MEMB(_with_capacity)(size); + while (out.end < size) out.cbuf[out.end++] = i_keyfrom(default_raw); + return out; +} +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) { + Self out = _c_MEMB(_with_capacity)(n); + _c_MEMB(_put_n)(&out, raw, n); return out; +} +#endif + +#if !defined i_no_emplace +STC_INLINE _m_value* _c_MEMB(_emplace)(Self* self, _m_raw raw) + { return _c_MEMB(_push)(self, i_keyfrom(raw)); } +#endif + +#if defined _i_has_eq +STC_API bool _c_MEMB(_eq)(const Self* self, const Self* other); +#endif + +#if !defined i_no_clone +STC_API Self _c_MEMB(_clone)(Self q); + +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val) + { (void)self; return i_keyclone(val); } + +STC_INLINE void _c_MEMB(_copy)(Self* self, const Self* other) { + if (self == other) return; + _c_MEMB(_drop)(self); + *self = _c_MEMB(_clone)(*other); +} +#endif // !i_no_clone + +STC_INLINE isize _c_MEMB(_size)(const Self* self) + { return _cbuf_toidx(self, self->end); } + +STC_INLINE isize _c_MEMB(_capacity)(const Self* self) + { return self->capmask; } + +STC_INLINE bool _c_MEMB(_is_empty)(const Self* self) + { return self->start == self->end; } + +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* pval) + { return i_keytoraw(pval); } + +STC_INLINE const _m_value* _c_MEMB(_front)(const Self* self) + { return self->cbuf + self->start; } + +STC_INLINE _m_value* _c_MEMB(_front_mut)(Self* self) + { return self->cbuf + self->start; } + +STC_INLINE const _m_value* _c_MEMB(_back)(const Self* self) + { return self->cbuf + ((self->end - 1) & self->capmask); } + +STC_INLINE _m_value* _c_MEMB(_back_mut)(Self* self) + { return (_m_value*)_c_MEMB(_back)(self); } + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->capmask = self->start = self->end = 0; + self->cbuf = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) + { _c_MEMB(_drop)(self); *self = unowned; } + +STC_INLINE void _c_MEMB(_pop)(Self* self) { // pop_front + c_assert(!_c_MEMB(_is_empty)(self)); + i_keydrop((self->cbuf + self->start)); + self->start = (self->start + 1) & self->capmask; +} + +STC_INLINE _m_value _c_MEMB(_pull)(Self* self) { // move front out of queue + c_assert(!_c_MEMB(_is_empty)(self)); + isize s = self->start; + self->start = (s + 1) & self->capmask; + return self->cbuf[s]; +} + +STC_INLINE _m_iter _c_MEMB(_begin)(const Self* self) { + return c_literal(_m_iter){ + .ref=_c_MEMB(_is_empty)(self) ? NULL : self->cbuf + self->start, + .pos=self->start, ._s=self}; +} + +STC_INLINE _m_iter _c_MEMB(_rbegin)(const Self* self) { + isize pos = (self->end - 1) & self->capmask; + return c_literal(_m_iter){ + .ref=_c_MEMB(_is_empty)(self) ? NULL : self->cbuf + pos, + .pos=pos, ._s=self}; +} + +STC_INLINE _m_iter _c_MEMB(_end)(const Self* self) + { (void)self; return c_literal(_m_iter){0}; } + +STC_INLINE _m_iter _c_MEMB(_rend)(const Self* self) + { (void)self; return c_literal(_m_iter){0}; } + +STC_INLINE void _c_MEMB(_next)(_m_iter* it) { + if (it->pos != it->_s->capmask) { ++it->ref; ++it->pos; } + else { it->ref -= it->pos; it->pos = 0; } + if (it->pos == it->_s->end) it->ref = NULL; +} + +STC_INLINE void _c_MEMB(_rnext)(_m_iter* it) { + if (it->pos == it->_s->start) it->ref = NULL; + else if (it->pos != 0) { --it->ref; --it->pos; } + else it->ref += (it->pos = it->_s->capmask); +} + +STC_INLINE isize _c_MEMB(_index)(const Self* self, _m_iter it) + { return _cbuf_toidx(self, it.pos); } + +STC_INLINE void _c_MEMB(_adjust_end_)(Self* self, isize n) + { self->end = (self->end + n) & self->capmask; } + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF _m_iter _c_MEMB(_advance)(_m_iter it, isize n) { + isize len = _c_MEMB(_size)(it._s); + isize pos = it.pos, idx = _cbuf_toidx(it._s, pos); + it.pos = (pos + n) & it._s->capmask; + it.ref += it.pos - pos; + if (!c_uless(idx + n, len)) it.ref = NULL; + return it; +} + +STC_DEF void +_c_MEMB(_clear)(Self* self) { + for (c_each(i, Self, *self)) + { i_keydrop(i.ref); } + self->start = 0, self->end = 0; +} + +STC_DEF void +_c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + _c_MEMB(_clear)(self); + _i_free_n(self->cbuf, self->capmask + 1); +} + +STC_DEF bool +_c_MEMB(_reserve)(Self* self, const isize cap) { + isize oldpow2 = self->capmask + (self->capmask & 1); // handle capmask = 0 + isize newpow2 = c_next_pow2(cap + 1); + if (newpow2 <= oldpow2) + return self->cbuf != NULL; + _m_value* d = (_m_value *)_i_realloc_n(self->cbuf, oldpow2, newpow2); + if (d == NULL) + return false; + isize head = oldpow2 - self->start; + if (self->start <= self->end) // [..S########E....|................] + ; + else if (head < self->end) { // [#######E.....S##|.............s!!] + c_memcpy(d + newpow2 - head, d + self->start, head*c_sizeof *d); + self->start = newpow2 - head; + } else { // [##E.....S#######|!!e.............] + c_memcpy(d + oldpow2, d, self->end*c_sizeof *d); + self->end += oldpow2; + } + self->capmask = newpow2 - 1; + self->cbuf = d; + return true; +} + +STC_DEF _m_value* +_c_MEMB(_push)(Self* self, _m_value value) { // push_back + isize end = (self->end + 1) & self->capmask; + if (end == self->start) { // full + if (!_c_MEMB(_reserve)(self, self->capmask + 3)) // => 2x expand + return NULL; + end = (self->end + 1) & self->capmask; + } + _m_value *v = self->cbuf + self->end; + self->end = end; + *v = value; + return v; +} + +STC_DEF void +_c_MEMB(_shrink_to_fit)(Self *self) { + isize sz = _c_MEMB(_size)(self); + isize newpow2 = c_next_pow2(sz + 1); + if (newpow2 > self->capmask) + return; + if (self->start <= self->end) { + c_memmove(self->cbuf, self->cbuf + self->start, sz*c_sizeof *self->cbuf); + self->start = 0, self->end = sz; + } else { + isize n = self->capmask - self->start + 1; + c_memmove(self->cbuf + (newpow2 - n), self->cbuf + self->start, n*c_sizeof *self->cbuf); + self->start = newpow2 - n; + } + self->cbuf = (_m_value *)_i_realloc_n(self->cbuf, self->capmask + 1, newpow2); + self->capmask = newpow2 - 1; +} + +#if !defined i_no_clone +STC_DEF Self +_c_MEMB(_clone)(Self q) { + Self out = q, *self = &out; (void)self; // may be used by _i_new_n/i_keyclone via i_aux. + out.start = 0; out.end = _c_MEMB(_size)(&q); + out.capmask = c_next_pow2(out.end + 1) - 1; + out.cbuf = _i_new_n(_m_value, out.capmask + 1); + isize i = 0; + if (out.cbuf) + for (c_each(it, Self, q)) + out.cbuf[i++] = i_keyclone((*it.ref)); + return out; +} +#endif // i_no_clone + +#if defined _i_has_eq +STC_DEF bool +_c_MEMB(_eq)(const Self* self, const Self* other) { + if (_c_MEMB(_size)(self) != _c_MEMB(_size)(other)) return false; + for (_m_iter i = _c_MEMB(_begin)(self), j = _c_MEMB(_begin)(other); + i.ref; _c_MEMB(_next)(&i), _c_MEMB(_next)(&j)) + { + const _m_raw _rx = i_keytoraw(i.ref), _ry = i_keytoraw(j.ref); + if (!(i_eq((&_rx), (&_ry)))) return false; + } + return true; +} +#endif // _i_has_eq +#endif // IMPLEMENTATION diff --git a/stc/priv/sort_prv.h b/stc/priv/sort_prv.h new file mode 100644 index 0000000..6a9f509 --- /dev/null +++ b/stc/priv/sort_prv.h @@ -0,0 +1,136 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// IWYU pragma: private +#ifdef _i_is_list + #define i_at(self, idx) (&((_m_value *)(self)->last)[idx]) + #define i_at_mut i_at +#elif !defined i_at + #define i_at(self, idx) _c_MEMB(_at)(self, idx) + #define i_at_mut(self, idx) _c_MEMB(_at_mut)(self, idx) +#endif + +STC_API void _c_MEMB(_sort_lowhigh)(Self* self, isize lo, isize hi); + +#ifdef _i_is_array +STC_API isize _c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end); +STC_API isize _c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end); + +static inline void _c_MEMB(_sort)(Self* arr, isize n) + { _c_MEMB(_sort_lowhigh)(arr, 0, n - 1); } + +static inline isize // c_NPOS = not found +_c_MEMB(_lower_bound)(const Self* arr, const _m_raw raw, isize n) + { return _c_MEMB(_lower_bound_range)(arr, raw, 0, n); } + +static inline isize // c_NPOS = not found +_c_MEMB(_binary_search)(const Self* arr, const _m_raw raw, isize n) + { return _c_MEMB(_binary_search_range)(arr, raw, 0, n); } + +#elif !defined _i_is_list +STC_API isize _c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end); +STC_API isize _c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end); + +static inline void _c_MEMB(_sort)(Self* self) + { _c_MEMB(_sort_lowhigh)(self, 0, _c_MEMB(_size)(self) - 1); } + +static inline isize // c_NPOS = not found +_c_MEMB(_lower_bound)(const Self* self, const _m_raw raw) + { return _c_MEMB(_lower_bound_range)(self, raw, 0, _c_MEMB(_size)(self)); } + +static inline isize // c_NPOS = not found +_c_MEMB(_binary_search)(const Self* self, const _m_raw raw) + { return _c_MEMB(_binary_search_range)(self, raw, 0, _c_MEMB(_size)(self)); } +#endif + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +static void _c_MEMB(_insertsort_lowhigh)(Self* self, isize lo, isize hi) { + for (isize j = lo, i = lo + 1; i <= hi; j = i, ++i) { + _m_value x = *i_at(self, i); + _m_raw rx = i_keytoraw((&x)); + while (j >= 0) { + _m_raw ry = i_keytoraw(i_at(self, j)); + if (!(i_less((&rx), (&ry)))) break; + *i_at_mut(self, j + 1) = *i_at(self, j); + --j; + } + *i_at_mut(self, j + 1) = x; + } +} + +STC_DEF void _c_MEMB(_sort_lowhigh)(Self* self, isize lo, isize hi) { + isize i = lo, j; + while (lo < hi) { + _m_raw pivot = i_keytoraw(i_at(self, (isize)(lo + (hi - lo)*7LL/16))), rx; + j = hi; + do { + do { rx = i_keytoraw(i_at(self, i)); } while ((i_less((&rx), (&pivot))) && ++i); + do { rx = i_keytoraw(i_at(self, j)); } while ((i_less((&pivot), (&rx))) && --j); + if (i > j) break; + c_swap(i_at_mut(self, i), i_at_mut(self, j)); + ++i; --j; + } while (i <= j); + + if (j - lo > hi - i) { + c_swap(&lo, &i); + c_swap(&hi, &j); + } + if (j - lo > 64) _c_MEMB(_sort_lowhigh)(self, lo, j); + else if (j > lo) _c_MEMB(_insertsort_lowhigh)(self, lo, j); + lo = i; + } +} + +#ifndef _i_is_list +STC_DEF isize // c_NPOS = not found +_c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end) { + isize count = end - start, step = count/2; + while (count > 0) { + const _m_raw rx = i_keytoraw(i_at(self, start + step)); + if (i_less((&rx), (&raw))) { + start += step + 1; + count -= step + 1; + step = count*7/8; + } else { + count = step; + step = count/8; + } + } + return start >= end ? c_NPOS : start; +} + +STC_DEF isize // c_NPOS = not found +_c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end) { + isize res = _c_MEMB(_lower_bound_range)(self, raw, start, end); + if (res != c_NPOS) { + const _m_raw rx = i_keytoraw(i_at(self, res)); + if (i_less((&raw), (&rx))) res = c_NPOS; + } + return res; +} +#endif // !_i_is_list +#endif // IMPLEMENTATION +#undef i_at +#undef i_at_mut diff --git a/stc/priv/template.h b/stc/priv/template.h new file mode 100644 index 0000000..cc34c6c --- /dev/null +++ b/stc/priv/template.h @@ -0,0 +1,297 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// IWYU pragma: private +#ifndef _i_template +#define _i_template + +#ifndef STC_TEMPLATE_H_INCLUDED +#define STC_TEMPLATE_H_INCLUDED + + #define _c_MEMB(name) c_JOIN(Self, name) + #define _c_DEFTYPES(macro, SELF, ...) macro(SELF, __VA_ARGS__) + #define _m_value _c_MEMB(_value) + #define _m_key _c_MEMB(_key) + #define _m_mapped _c_MEMB(_mapped) + #define _m_rmapped _c_MEMB(_rmapped) + #define _m_raw _c_MEMB(_raw) + #define _m_keyraw _c_MEMB(_keyraw) + #define _m_iter _c_MEMB(_iter) + #define _m_result _c_MEMB(_result) + #define _m_node _c_MEMB(_node) + + #define c_OPTION(flag) ((i_opt) & (flag)) + #define c_declared (1<<0) + #define c_no_atomic (1<<1) + #define c_arc2 (1<<2) + #define c_no_clone (1<<3) + #define c_no_hash (1<<4) + #define c_use_cmp (1<<5) + #define c_use_eq (1<<6) + #define c_cmpclass (1<<7) + #define c_keyclass (1<<8) + #define c_valclass (1<<9) + #define c_keypro (1<<10) + #define c_valpro (1<<11) +#endif + +#if defined i_rawclass // [deprecated] + #define i_cmpclass i_rawclass +#endif + +#if defined T && !defined i_type + #define i_type T +#endif +#if defined i_type && c_NUMARGS(i_type) > 1 + #define Self c_GETARG(1, i_type) + #define i_key c_GETARG(2, i_type) + #if c_NUMARGS(i_type) == 3 + #if defined _i_is_map + #define i_val c_GETARG(3, i_type) + #else + #define i_opt c_GETARG(3, i_type) + #endif + #elif c_NUMARGS(i_type) == 4 + #define i_val c_GETARG(3, i_type) + #define i_opt c_GETARG(4, i_type) + #endif +#elif !defined Self && defined i_type + #define Self i_type +#elif !defined Self + #define Self c_JOIN(_i_prefix, i_tag) +#endif + +#if defined i_aux && c_NUMARGS(i_aux) == 2 + // shorthand for defining i_aux AND i_allocator as a one-liner combo. + #define _i_aux_alloc + #define _i_aux_def c_GETARG(1, i_aux) aux; + #undef i_allocator // override: + #define i_allocator c_GETARG(2, i_aux) +#elif defined i_aux + #define _i_aux_def i_aux aux; +#else + #define _i_aux_def +#endif + +#if c_OPTION(c_declared) + #define i_declared +#endif +#if c_OPTION(c_no_hash) + #define i_no_hash +#endif +#if c_OPTION(c_use_cmp) + #define i_use_cmp +#endif +#if c_OPTION(c_use_eq) + #define i_use_eq +#endif +#if c_OPTION(c_no_clone) || defined _i_is_arc + #define i_no_clone +#endif +#if c_OPTION(c_keyclass) + #define i_keyclass i_key +#endif +#if c_OPTION(c_valclass) + #define i_valclass i_val +#endif +#if c_OPTION(c_cmpclass) + #define i_cmpclass i_key + #define i_use_cmp +#endif +#if c_OPTION(c_keypro) + #define i_keypro i_key +#endif +#if c_OPTION(c_valpro) + #define i_valpro i_val +#endif + +#if defined i_keypro + #define i_keyclass i_keypro + #define i_cmpclass c_JOIN(i_keypro, _raw) +#endif + +#if defined i_cmpclass + #define i_keyraw i_cmpclass + #if !(defined i_key || defined i_keyclass) + #define i_key i_cmpclass + #endif +#elif defined i_keyclass && !defined i_keyraw + // Special: When only i_keyclass is defined, also define i_cmpclass to the same. + // Do not define i_keyraw here, otherwise _from() / _toraw() is expected to exist. + #define i_cmpclass i_key +#endif + +// Bind to i_key "class members": _clone, _drop, _from and _toraw (when conditions are met). +#if defined i_keyclass + #ifndef i_key + #define i_key i_keyclass + #endif + #if !defined i_keyclone && !defined i_no_clone + #define i_keyclone c_JOIN(i_keyclass, _clone) + #endif + #ifndef i_keydrop + #define i_keydrop c_JOIN(i_keyclass, _drop) + #endif + #if !defined i_keyfrom && defined i_keyraw + #define i_keyfrom c_JOIN(i_keyclass, _from) + #endif + #if !defined i_keytoraw && defined i_keyraw + #define i_keytoraw c_JOIN(i_keyclass, _toraw) + #endif +#endif + +// Define when container has support for sorting (cmp) and linear search (eq) +#if defined i_use_cmp || defined i_cmp || defined i_less + #define _i_has_cmp +#endif +#if defined i_use_cmp || defined i_cmp || defined i_use_eq || defined i_eq + #define _i_has_eq +#endif + +// Bind to i_cmpclass "class members": _cmp, _eq and _hash (when conditions are met). +#if defined i_cmpclass + #if !(defined i_cmp || defined i_less) && (defined i_use_cmp || defined _i_sorted) + #define i_cmp c_JOIN(i_cmpclass, _cmp) + #endif + #if !defined i_eq && (defined i_use_eq || defined i_hash || defined _i_is_hash) + #define i_eq c_JOIN(i_cmpclass, _eq) + #endif + #if !(defined i_hash || defined i_no_hash) + #define i_hash c_JOIN(i_cmpclass, _hash) + #endif +#endif + +#if !defined i_key + #error "No i_key defined" +#elif defined i_keyraw && !(c_OPTION(c_cmpclass) || defined i_keytoraw) + #error "If i_cmpclass / i_keyraw is defined, i_keytoraw must be defined too" +#elif !defined i_no_clone && (defined i_keyclone ^ defined i_keydrop) + #error "Both i_keyclone and i_keydrop must be defined, if any (unless i_no_clone defined)." +#elif defined i_from || defined i_drop + #error "i_from / i_drop not supported. Use i_keyfrom/i_keydrop" +#elif defined i_keyto || defined i_valto + #error i_keyto / i_valto not supported. Use i_keytoraw / i_valtoraw +#elif defined i_keyraw && defined i_use_cmp && !defined _i_has_cmp + #error "For smap / sset / pqueue, i_cmp or i_less must be defined when i_keyraw is defined." +#endif + +// Fill in missing i_eq, i_less, i_cmp functions with defaults. +#if !defined i_eq && defined i_cmp + #define i_eq(x, y) (i_cmp(x, y)) == 0 +#elif !defined i_eq + #define i_eq(x, y) *x == *y // works for integral types +#endif +#if !defined i_less && defined i_cmp + #define i_less(x, y) (i_cmp(x, y)) < 0 +#elif !defined i_less + #define i_less(x, y) *x < *y // works for integral types +#endif +#if !defined i_cmp && defined i_less + #define i_cmp(x, y) (i_less(y, x)) - (i_less(x, y)) +#endif +#if !(defined i_hash || defined i_no_hash) + #define i_hash c_default_hash +#endif + +#define i_no_emplace + +#ifndef i_tag + #define i_tag i_key +#endif +#if !defined i_keyfrom + #define i_keyfrom c_default_clone +#else + #undef i_no_emplace +#endif +#ifndef i_keyraw + #define i_keyraw i_key +#endif +#ifndef i_keytoraw + #define i_keytoraw c_default_toraw +#endif +#ifndef i_keyclone + #define i_keyclone c_default_clone +#endif +#ifndef i_keydrop + #define i_keydrop c_default_drop +#endif + +#if defined _i_is_map // ---- process hashmap/sortedmap value i_val, ... ---- + +#if defined i_valpro + #define i_valclass i_valpro + #define i_valraw c_JOIN(i_valpro, _raw) +#endif + +#ifdef i_valclass + #ifndef i_val + #define i_val i_valclass + #endif + #if !defined i_valclone && !defined i_no_clone + #define i_valclone c_JOIN(i_valclass, _clone) + #endif + #ifndef i_valdrop + #define i_valdrop c_JOIN(i_valclass, _drop) + #endif + #if !defined i_valfrom && defined i_valraw + #define i_valfrom c_JOIN(i_valclass, _from) + #endif + #if !defined i_valtoraw && defined i_valraw + #define i_valtoraw c_JOIN(i_valclass, _toraw) + #endif +#endif + +#ifndef i_val + #error "i_val* must be defined for maps" +#elif defined i_valraw && !defined i_valtoraw + #error "If i_valraw is defined, i_valtoraw must be defined too" +#elif !defined i_no_clone && (defined i_valclone ^ defined i_valdrop) + #error "Both i_valclone and i_valdrop must be defined, if any" +#endif + +#if !defined i_valfrom + #define i_valfrom c_default_clone +#else + #undef i_no_emplace +#endif +#ifndef i_valraw + #define i_valraw i_val +#endif +#ifndef i_valtoraw + #define i_valtoraw c_default_toraw +#endif +#ifndef i_valclone + #define i_valclone c_default_clone +#endif +#ifndef i_valdrop + #define i_valdrop c_default_drop +#endif + +#endif // !_i_is_map + +#ifndef i_val + #define i_val i_key +#endif +#ifndef i_valraw + #define i_valraw i_keyraw +#endif +#endif // STC_TEMPLATE_H_INCLUDED diff --git a/stc/priv/template2.h b/stc/priv/template2.h new file mode 100644 index 0000000..c5a506b --- /dev/null +++ b/stc/priv/template2.h @@ -0,0 +1,71 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// IWYU pragma: private +#undef T // alias for i_type +#undef i_type +#undef i_class +#undef i_tag +#undef i_opt +#undef i_capacity + +#undef i_key +#undef i_keypro // Replaces next two +#undef i_key_str // [deprecated] +#undef i_key_arcbox // [deprecated] +#undef i_keyclass +#undef i_cmpclass // define i_keyraw, and bind i_cmp, i_eq, i_hash "class members" +#undef i_rawclass // [deprecated] for i_cmpclass +#undef i_keyclone +#undef i_keydrop +#undef i_keyraw +#undef i_keyfrom +#undef i_keytoraw +#undef i_cmp +#undef i_less +#undef i_eq +#undef i_hash + +#undef i_val +#undef i_valpro // Replaces next two +#undef i_val_str // [deprecated] +#undef i_val_arcbox // [deprecated] +#undef i_valclass +#undef i_valclone +#undef i_valdrop +#undef i_valraw +#undef i_valfrom +#undef i_valtoraw + +#undef i_use_cmp +#undef i_use_eq +#undef i_no_hash +#undef i_no_clone +#undef i_no_emplace +#undef i_declared + +#undef _i_aux_def +#undef _i_has_cmp +#undef _i_has_eq +#undef _i_prefix +#undef _i_template +#undef Self diff --git a/stc/priv/ucd_prv.c b/stc/priv/ucd_prv.c new file mode 100644 index 0000000..2d8044c --- /dev/null +++ b/stc/priv/ucd_prv.c @@ -0,0 +1,482 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STC_UCD_PRV_C_INCLUDED +#define STC_UCD_PRV_C_INCLUDED + +#include + +// ------------------------------------------------------ +// The following requires linking with utf8 symbols. +// To call them, either define i_import before including +// one of cstr, csview, zsview, or link with src/libstc.o. + +enum { + U8G_Cc, U8G_Lt, U8G_Nd, U8G_Nl, + U8G_Pc, U8G_Pd, U8G_Pf, U8G_Pi, + U8G_Sc, U8G_Zl, U8G_Zp, U8G_Zs, + U8G_Arabic, U8G_Bengali, U8G_Cyrillic, + U8G_Devanagari, U8G_Georgian, U8G_Greek, + U8G_Han, U8G_Hiragana, U8G_Katakana, + U8G_Latin, U8G_Thai, + U8G_SIZE +}; + +static bool utf8_isgroup(int group, uint32_t c); + +static bool utf8_isalpha(uint32_t c) { + static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Cyrillic, U8G_Han, U8G_Devanagari, + U8G_Arabic, U8G_Bengali, U8G_Hiragana, U8G_Katakana, + U8G_Thai, U8G_Greek, U8G_Georgian}; + if (c < 128) return isalpha((int)c) != 0; + for (int j=0; j < (int)(sizeof groups/sizeof groups[0]); ++j) + if (utf8_isgroup(groups[j], c)) + return true; + return false; +} + +static bool utf8_iscased(uint32_t c) { + if (c < 128) return isalpha((int)c) != 0; + return utf8_islower(c) || utf8_isupper(c) || + utf8_isgroup(U8G_Lt, c); +} + +static bool utf8_isalnum(uint32_t c) { + if (c < 128) return isalnum((int)c) != 0; + return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c); +} + +static bool utf8_isword(uint32_t c) { + if (c < 128) return (isalnum((int)c) != 0) | (c == '_'); + return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) || + utf8_isgroup(U8G_Pc, c); +} + +static bool utf8_isblank(uint32_t c) { + if (c < 128) return (c == ' ') | (c == '\t'); + return utf8_isgroup(U8G_Zs, c); +} + +static bool utf8_isspace(uint32_t c) { + if (c < 128) return isspace((int)c) != 0; + return ((c == 8232) | (c == 8233)) || utf8_isgroup(U8G_Zs, c); +} + +/* The tables below are extracted from the RE2 library */ +typedef struct { + uint16_t lo; + uint16_t hi; +} URange16; + +static const URange16 Cc_range16[] = { // Control + { 0, 31 }, + { 127, 159 }, +}; + +static const URange16 Lt_range16[] = { // Title case + { 453, 453 }, + { 456, 456 }, + { 459, 459 }, + { 498, 498 }, + { 8072, 8079 }, + { 8088, 8095 }, + { 8104, 8111 }, + { 8124, 8124 }, + { 8140, 8140 }, + { 8188, 8188 }, +}; + +static const URange16 Nd_range16[] = { // Decimal number + { 48, 57 }, + { 1632, 1641 }, + { 1776, 1785 }, + { 1984, 1993 }, + { 2406, 2415 }, + { 2534, 2543 }, + { 2662, 2671 }, + { 2790, 2799 }, + { 2918, 2927 }, + { 3046, 3055 }, + { 3174, 3183 }, + { 3302, 3311 }, + { 3430, 3439 }, + { 3558, 3567 }, + { 3664, 3673 }, + { 3792, 3801 }, + { 3872, 3881 }, + { 4160, 4169 }, + { 4240, 4249 }, + { 6112, 6121 }, + { 6160, 6169 }, + { 6470, 6479 }, + { 6608, 6617 }, + { 6784, 6793 }, + { 6800, 6809 }, + { 6992, 7001 }, + { 7088, 7097 }, + { 7232, 7241 }, + { 7248, 7257 }, + { 42528, 42537 }, + { 43216, 43225 }, + { 43264, 43273 }, + { 43472, 43481 }, + { 43504, 43513 }, + { 43600, 43609 }, + { 44016, 44025 }, + { 65296, 65305 }, +}; + +static const URange16 Nl_range16[] = { // Number letter + { 5870, 5872 }, + { 8544, 8578 }, + { 8581, 8584 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12346 }, + { 42726, 42735 }, +}; + +static const URange16 Pc_range16[] = { // Connector punctuation + { 95, 95 }, + { 8255, 8256 }, + { 8276, 8276 }, + { 65075, 65076 }, + { 65101, 65103 }, + { 65343, 65343 }, +}; + +static const URange16 Pd_range16[] = { // Dash punctuation + { 45, 45 }, + { 1418, 1418 }, + { 1470, 1470 }, + { 5120, 5120 }, + { 6150, 6150 }, + { 8208, 8213 }, + { 11799, 11799 }, + { 11802, 11802 }, + { 11834, 11835 }, + { 11840, 11840 }, + { 11869, 11869 }, + { 12316, 12316 }, + { 12336, 12336 }, + { 12448, 12448 }, + { 65073, 65074 }, + { 65112, 65112 }, + { 65123, 65123 }, + { 65293, 65293 }, +}; + +static const URange16 Pf_range16[] = { // Final punctuation + { 187, 187 }, + { 8217, 8217 }, + { 8221, 8221 }, + { 8250, 8250 }, + { 11779, 11779 }, + { 11781, 11781 }, + { 11786, 11786 }, + { 11789, 11789 }, + { 11805, 11805 }, + { 11809, 11809 }, +}; + +static const URange16 Pi_range16[] = { // Initial punctuation + { 171, 171 }, + { 8216, 8216 }, + { 8219, 8220 }, + { 8223, 8223 }, + { 8249, 8249 }, + { 11778, 11778 }, + { 11780, 11780 }, + { 11785, 11785 }, + { 11788, 11788 }, + { 11804, 11804 }, + { 11808, 11808 }, +}; + +static const URange16 Sc_range16[] = { // Currency symbol + { 36, 36 }, + { 162, 165 }, + { 1423, 1423 }, + { 1547, 1547 }, + { 2046, 2047 }, + { 2546, 2547 }, + { 2555, 2555 }, + { 2801, 2801 }, + { 3065, 3065 }, + { 3647, 3647 }, + { 6107, 6107 }, + { 8352, 8384 }, + { 43064, 43064 }, + { 65020, 65020 }, + { 65129, 65129 }, + { 65284, 65284 }, + { 65504, 65505 }, + { 65509, 65510 }, +}; + +static const URange16 Zl_range16[] = { // Line separator + { 8232, 8232 }, +}; + +static const URange16 Zp_range16[] = { // Paragraph separator + { 8233, 8233 }, +}; + +static const URange16 Zs_range16[] = { // Space separator + { 32, 32 }, + { 160, 160 }, + { 5760, 5760 }, + { 8192, 8202 }, + { 8239, 8239 }, + { 8287, 8287 }, + { 12288, 12288 }, +}; + +static const URange16 Arabic_range16[] = { + { 1536, 1540 }, + { 1542, 1547 }, + { 1549, 1562 }, + { 1564, 1566 }, + { 1568, 1599 }, + { 1601, 1610 }, + { 1622, 1647 }, + { 1649, 1756 }, + { 1758, 1791 }, + { 1872, 1919 }, + { 2160, 2190 }, + { 2192, 2193 }, + { 2200, 2273 }, + { 2275, 2303 }, + { 64336, 64450 }, + { 64467, 64829 }, + { 64832, 64911 }, + { 64914, 64967 }, + { 64975, 64975 }, + { 65008, 65023 }, + { 65136, 65140 }, + { 65142, 65276 }, +}; + +static const URange16 Bengali_range16[] = { + { 2432, 2435 }, + { 2437, 2444 }, + { 2447, 2448 }, + { 2451, 2472 }, + { 2474, 2480 }, + { 2482, 2482 }, + { 2486, 2489 }, + { 2492, 2500 }, + { 2503, 2504 }, + { 2507, 2510 }, + { 2519, 2519 }, + { 2524, 2525 }, + { 2527, 2531 }, + { 2534, 2558 }, +}; + +static const URange16 Cyrillic_range16[] = { + { 1024, 1156 }, + { 1159, 1327 }, + { 7296, 7304 }, + { 7467, 7467 }, + { 7544, 7544 }, + { 11744, 11775 }, + { 42560, 42655 }, + { 65070, 65071 }, +}; + +static const URange16 Devanagari_range16[] = { + { 2304, 2384 }, + { 2389, 2403 }, + { 2406, 2431 }, + { 43232, 43263 }, +}; + +static const URange16 Georgian_range16[] = { + { 4256, 4293 }, + { 4295, 4295 }, + { 4301, 4301 }, + { 4304, 4346 }, + { 4348, 4351 }, + { 7312, 7354 }, + { 7357, 7359 }, + { 11520, 11557 }, + { 11559, 11559 }, + { 11565, 11565 }, +}; + +static const URange16 Greek_range16[] = { + { 880, 883 }, + { 885, 887 }, + { 890, 893 }, + { 895, 895 }, + { 900, 900 }, + { 902, 902 }, + { 904, 906 }, + { 908, 908 }, + { 910, 929 }, + { 931, 993 }, + { 1008, 1023 }, + { 7462, 7466 }, + { 7517, 7521 }, + { 7526, 7530 }, + { 7615, 7615 }, + { 7936, 7957 }, + { 7960, 7965 }, + { 7968, 8005 }, + { 8008, 8013 }, + { 8016, 8023 }, + { 8025, 8025 }, + { 8027, 8027 }, + { 8029, 8029 }, + { 8031, 8061 }, + { 8064, 8116 }, + { 8118, 8132 }, + { 8134, 8147 }, + { 8150, 8155 }, + { 8157, 8175 }, + { 8178, 8180 }, + { 8182, 8190 }, + { 8486, 8486 }, + { 43877, 43877 }, +}; + +static const URange16 Han_range16[] = { + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12293, 12293 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12347 }, + { 13312, 19903 }, + { 19968, 40959 }, + { 63744, 64109 }, + { 64112, 64217 }, +}; + +static const URange16 Hiragana_range16[] = { + { 12353, 12438 }, + { 12445, 12447 }, +}; + +static const URange16 Katakana_range16[] = { + { 12449, 12538 }, + { 12541, 12543 }, + { 12784, 12799 }, + { 13008, 13054 }, + { 13056, 13143 }, + { 65382, 65391 }, + { 65393, 65437 }, +}; + +static const URange16 Latin_range16[] = { + { 65, 90 }, + { 97, 122 }, + { 170, 170 }, + { 186, 186 }, + { 192, 214 }, + { 216, 246 }, + { 248, 696 }, + { 736, 740 }, + { 7424, 7461 }, + { 7468, 7516 }, + { 7522, 7525 }, + { 7531, 7543 }, + { 7545, 7614 }, + { 7680, 7935 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 8490, 8491 }, + { 8498, 8498 }, + { 8526, 8526 }, + { 8544, 8584 }, + { 11360, 11391 }, + { 42786, 42887 }, + { 42891, 42954 }, + { 42960, 42961 }, + { 42963, 42963 }, + { 42965, 42969 }, + { 42994, 43007 }, + { 43824, 43866 }, + { 43868, 43876 }, + { 43878, 43881 }, + { 64256, 64262 }, + { 65313, 65338 }, + { 65345, 65370 }, +}; + +static const URange16 Thai_range16[] = { + { 3585, 3642 }, + { 3648, 3675 }, +}; + +#ifdef __cplusplus + #define _e_arg(k, v) v +#else + #define _e_arg(k, v) [k] = v +#endif +#define UNI_ENTRY(Code) { Code##_range16, sizeof(Code##_range16)/sizeof(URange16) } + +typedef struct { + const URange16 *r16; + int nr16; +} UGroup; + +static const UGroup _utf8_unicode_groups[U8G_SIZE] = { + _e_arg(U8G_Cc, UNI_ENTRY(Cc)), + _e_arg(U8G_Lt, UNI_ENTRY(Lt)), + _e_arg(U8G_Nd, UNI_ENTRY(Nd)), + _e_arg(U8G_Nl, UNI_ENTRY(Nl)), + _e_arg(U8G_Pc, UNI_ENTRY(Pc)), + _e_arg(U8G_Pd, UNI_ENTRY(Pd)), + _e_arg(U8G_Pf, UNI_ENTRY(Pf)), + _e_arg(U8G_Pi, UNI_ENTRY(Pi)), + _e_arg(U8G_Sc, UNI_ENTRY(Sc)), + _e_arg(U8G_Zl, UNI_ENTRY(Zl)), + _e_arg(U8G_Zp, UNI_ENTRY(Zp)), + _e_arg(U8G_Zs, UNI_ENTRY(Zs)), + _e_arg(U8G_Arabic, UNI_ENTRY(Arabic)), + _e_arg(U8G_Bengali, UNI_ENTRY(Bengali)), + _e_arg(U8G_Cyrillic, UNI_ENTRY(Cyrillic)), + _e_arg(U8G_Devanagari, UNI_ENTRY(Devanagari)), + _e_arg(U8G_Georgian, UNI_ENTRY(Georgian)), + _e_arg(U8G_Greek, UNI_ENTRY(Greek)), + _e_arg(U8G_Han, UNI_ENTRY(Han)), + _e_arg(U8G_Hiragana, UNI_ENTRY(Hiragana)), + _e_arg(U8G_Katakana, UNI_ENTRY(Katakana)), + _e_arg(U8G_Latin, UNI_ENTRY(Latin)), + _e_arg(U8G_Thai, UNI_ENTRY(Thai)), +}; + +static bool utf8_isgroup(int group, uint32_t c) { + for (int j=0; j<_utf8_unicode_groups[group].nr16; ++j) { + if (c < _utf8_unicode_groups[group].r16[j].lo) + return false; + if (c <= _utf8_unicode_groups[group].r16[j].hi) + return true; + } + return false; +} + +#endif // STC_UCD_PRV_C_INCLUDED diff --git a/stc/priv/utf8_prv.c b/stc/priv/utf8_prv.c new file mode 100644 index 0000000..adc4d51 --- /dev/null +++ b/stc/priv/utf8_prv.c @@ -0,0 +1,177 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STC_UTF8_PRV_C_INCLUDED +#define STC_UTF8_PRV_C_INCLUDED + +#include "utf8_tab.c" + +const uint8_t utf8_dtab[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +int utf8_encode(char *out, uint32_t c) { + if (c < 0x80U) { + out[0] = (char) c; + return 1; + } else if (c < 0x0800U) { + out[0] = (char) ((c>>6 & 0x1F) | 0xC0); + out[1] = (char) ((c & 0x3F) | 0x80); + return 2; + } else if (c < 0x010000U) { + if ((c < 0xD800U) | (c >= 0xE000U)) { + out[0] = (char) ((c>>12 & 0x0F) | 0xE0); + out[1] = (char) ((c>>6 & 0x3F) | 0x80); + out[2] = (char) ((c & 0x3F) | 0x80); + return 3; + } + } else if (c < 0x110000U) { + out[0] = (char) ((c>>18 & 0x07) | 0xF0); + out[1] = (char) ((c>>12 & 0x3F) | 0x80); + out[2] = (char) ((c>>6 & 0x3F) | 0x80); + out[3] = (char) ((c & 0x3F) | 0x80); + return 4; + } + return 0; +} + +uint32_t utf8_peek_at(const char* s, isize offset) { + return utf8_peek(utf8_offset(s, offset)); +} + +bool utf8_valid(const char* s) { + utf8_decode_t d = {.state=0}; + while ((utf8_decode(&d, (uint8_t)*s) != utf8_REJECT) & (*s != '\0')) + ++s; + return d.state == utf8_ACCEPT; +} + +bool utf8_valid_n(const char* s, isize nbytes) { + utf8_decode_t d = {.state=0}; + for (; nbytes-- != 0; ++s) + if ((utf8_decode(&d, (uint8_t)*s) == utf8_REJECT) | (*s == '\0')) + break; + return d.state == utf8_ACCEPT; +} + +#define _binsearch(c, at, N, ret) do { \ + int _n = N, _i = 0, _mid = _n/2; \ + while (_n > 0) { \ + if (at(_i + _mid) < c) { \ + _i += _mid + 1; \ + _n -= _mid + 1; \ + _mid = _n*7/8; \ + } else { \ + _n = _mid; \ + _mid = _n/8; \ + } \ + } \ + ret = (_i >= N || at(_i) < c) ? N : _i; \ +} while (0) + +uint32_t utf8_casefold(uint32_t c) { + #define _at_fold(idx) casemappings[idx].c2 + int i; + _binsearch(c, _at_fold, casefold_len, i); + if (i < casefold_len && casemappings[i].c1 <= c && c <= casemappings[i].c2) { + const struct CaseMapping entry = casemappings[i]; + int d = entry.m2 - entry.c2; + if (d == 1) return c + ((entry.c2 & 1U) == (c & 1U)); + return (uint32_t)((int)c + d); + } + return c; +} + +uint32_t utf8_tolower(uint32_t c) { + #define _at_upper(idx) casemappings[upcase_ind[idx]].c2 + int i, n = c_countof(upcase_ind); + _binsearch(c, _at_upper, n, i); + if (i < n) { + const struct CaseMapping entry = casemappings[upcase_ind[i]]; + if (entry.c1 <= c && c <= entry.c2) { + int d = entry.m2 - entry.c2; + if (d == 1) return c + ((entry.c2 & 1U) == (c & 1U)); + return (uint32_t)((int)c + d); + } + } + return c; +} + +uint32_t utf8_toupper(uint32_t c) { + #define _at_lower(idx) casemappings[lowcase_ind[idx]].m2 + int i, n = c_countof(lowcase_ind); + _binsearch(c, _at_lower, n, i); + if (i < n) { + const struct CaseMapping entry = casemappings[lowcase_ind[i]]; + int d = entry.m2 - entry.c2; + if (entry.c1 + (uint32_t)d <= c && c <= entry.m2) { + if (d == 1) return c - ((entry.m2 & 1U) == (c & 1U)); + return (uint32_t)((int)c - d); + } + } + return c; +} + +int utf8_decode_codepoint(utf8_decode_t* d, const char* s, const char* end) { // s < end + const char* start = s; + do switch (utf8_decode(d, (uint8_t)*s++)) { + case utf8_ACCEPT: return (int)(s - start); + case utf8_REJECT: goto recover; + } while (s != end); + + recover: // non-complete utf8 is also treated as utf8_REJECT + d->state = utf8_ACCEPT; + d->codep = 0xFFFD; + //return 1; + int n = (int)(s - start); + return n > 2 ? n - 1 : 1; +} + +int utf8_icompare(const csview s1, const csview s2) { + utf8_decode_t d1 = {.state=0}, d2 = {.state=0}; + const char *e1 = s1.buf + s1.size, *e2 = s2.buf + s2.size; + isize j1 = 0, j2 = 0; + while ((j1 < s1.size) & (j2 < s2.size)) { + if (s2.buf[j2] == '\0') return s1.buf[j1]; + + j1 += utf8_decode_codepoint(&d1, s1.buf + j1, e1); + j2 += utf8_decode_codepoint(&d2, s2.buf + j2, e2); + + int32_t c = (int32_t)utf8_casefold(d1.codep) - (int32_t)utf8_casefold(d2.codep); + if (c != 0) return (int)c; + } + return (int)(s1.size - s2.size); +} + +#endif // STC_UTF8_PRV_C_INCLUDED diff --git a/stc/priv/utf8_prv.h b/stc/priv/utf8_prv.h new file mode 100644 index 0000000..af260f9 --- /dev/null +++ b/stc/priv/utf8_prv.h @@ -0,0 +1,127 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// IWYU pragma: private, include "stc/utf8.h" +#ifndef STC_UTF8_PRV_H_INCLUDED +#define STC_UTF8_PRV_H_INCLUDED + +// The following functions assume valid utf8 strings: + +/* number of bytes in the utf8 codepoint from s */ +STC_INLINE int utf8_chr_size(const char *s) { + unsigned b = (uint8_t)*s; + if (b < 0x80) return 1; + /*if (b < 0xC2) return 0;*/ + if (b < 0xE0) return 2; + if (b < 0xF0) return 3; + /*if (b < 0xF5)*/ return 4; + /*return 0;*/ +} + +/* number of codepoints in the utf8 string s */ +STC_INLINE isize utf8_count(const char *s) { + isize size = 0; + while (*s) + size += (*++s & 0xC0) != 0x80; + return size; +} + +STC_INLINE isize utf8_count_n(const char *s, isize nbytes) { + isize size = 0; + while ((nbytes-- != 0) & (*s != 0)) { + size += (*++s & 0xC0) != 0x80; + } + return size; +} + +STC_INLINE const char* utf8_at(const char *s, isize u8pos) { + while ((u8pos > 0) & (*s != 0)) + u8pos -= (*++s & 0xC0) != 0x80; + return s; +} + +STC_INLINE const char* utf8_offset(const char* s, isize u8pos) { + int inc = 1; + if (u8pos < 0) u8pos = -u8pos, inc = -1; + while (u8pos && *s) + u8pos -= (*(s += inc) & 0xC0) != 0x80; + return s; +} + +STC_INLINE isize utf8_to_index(const char* s, isize u8pos) + { return utf8_at(s, u8pos) - s; } + +STC_INLINE csview utf8_subview(const char *s, isize u8pos, isize u8len) { + csview span; + span.buf = utf8_at(s, u8pos); + span.size = utf8_to_index(span.buf, u8len); + return span; +} + +// ------------------------------------------------------ +// The following requires linking with utf8 symbols. +// To call them, either define i_import before including +// one of cstr, csview, zsview, or link with src/libstc.o. + +/* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */ +typedef struct { uint32_t state, codep; } utf8_decode_t; +extern const uint8_t utf8_dtab[]; /* utf8code.c */ +#define utf8_ACCEPT 0 +#define utf8_REJECT 12 + +extern bool utf8_valid(const char* s); +extern bool utf8_valid_n(const char* s, isize nbytes); +extern int utf8_encode(char *out, uint32_t c); +extern int utf8_decode_codepoint(utf8_decode_t* d, const char* s, const char* end); +extern int utf8_icompare(const csview s1, const csview s2); +extern uint32_t utf8_peek_at(const char* s, isize u8offset); +extern uint32_t utf8_casefold(uint32_t c); +extern uint32_t utf8_tolower(uint32_t c); +extern uint32_t utf8_toupper(uint32_t c); + +STC_INLINE bool utf8_isupper(uint32_t c) + { return utf8_tolower(c) != c; } + +STC_INLINE bool utf8_islower(uint32_t c) + { return utf8_toupper(c) != c; } + +STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) { + const uint32_t type = utf8_dtab[byte]; + d->codep = d->state ? (byte & 0x3fu) | (d->codep << 6) + : (0xffU >> type) & byte; + return d->state = utf8_dtab[256 + d->state + type]; +} + +STC_INLINE uint32_t utf8_peek(const char* s) { + utf8_decode_t d = {.state=0}; + do { + utf8_decode(&d, (uint8_t)*s++); + } while (d.state > utf8_REJECT); + return d.state == utf8_ACCEPT ? d.codep : 0xFFFD; +} + +/* case-insensitive utf8 string comparison */ +STC_INLINE int utf8_icmp(const char* s1, const char* s2) { + return utf8_icompare(c_sv(s1, INTPTR_MAX), c_sv(s2, INTPTR_MAX)); +} + +#endif // STC_UTF8_PRV_H_INCLUDED diff --git a/stc/priv/utf8_tab.c b/stc/priv/utf8_tab.c new file mode 100644 index 0000000..f854254 --- /dev/null +++ b/stc/priv/utf8_tab.c @@ -0,0 +1,250 @@ + +struct CaseMapping { uint16_t c1, c2, m2; }; + +static struct CaseMapping casemappings[] = { + {0x0041, 0x005A, 0x007A}, // A a (26) LATIN CAPITAL LETTER A + {0x00B5, 0x00B5, 0x03BC}, // µ μ ( 1) MICRO SIGN + {0x00C0, 0x00D6, 0x00F6}, // À à (23) LATIN CAPITAL LETTER A WITH GRAVE + {0x00D8, 0x00DE, 0x00FE}, // Ø ø ( 7) LATIN CAPITAL LETTER O WITH STROKE + {0x0100, 0x012E, 0x012F}, // Ā ā (24) LATIN CAPITAL LETTER A WITH MACRON + {0x0132, 0x0136, 0x0137}, // IJ ij ( 3) LATIN CAPITAL LIGATURE IJ + {0x0139, 0x0147, 0x0148}, // Ĺ ĺ ( 8) LATIN CAPITAL LETTER L WITH ACUTE + {0x014A, 0x0176, 0x0177}, // Ŋ ŋ (23) LATIN CAPITAL LETTER ENG + {0x0178, 0x0178, 0x00FF}, // Ÿ ÿ ( 1) LATIN CAPITAL LETTER Y WITH DIAERESIS + {0x0179, 0x017D, 0x017E}, // Ź ź ( 3) LATIN CAPITAL LETTER Z WITH ACUTE + {0x017F, 0x017F, 0x0073}, // ſ s ( 1) LATIN SMALL LETTER LONG S + {0x0181, 0x0181, 0x0253}, // Ɓ ɓ ( 1) LATIN CAPITAL LETTER B WITH HOOK + {0x0182, 0x0184, 0x0185}, // Ƃ ƃ ( 2) LATIN CAPITAL LETTER B WITH TOPBAR + {0x0186, 0x0186, 0x0254}, // Ɔ ɔ ( 1) LATIN CAPITAL LETTER OPEN O + {0x0187, 0x0187, 0x0188}, // Ƈ ƈ ( 1) LATIN CAPITAL LETTER C WITH HOOK + {0x0189, 0x018A, 0x0257}, // Ɖ ɖ ( 2) LATIN CAPITAL LETTER AFRICAN D + {0x018B, 0x018B, 0x018C}, // Ƌ ƌ ( 1) LATIN CAPITAL LETTER D WITH TOPBAR + {0x018E, 0x018E, 0x01DD}, // Ǝ ǝ ( 1) LATIN CAPITAL LETTER REVERSED E + {0x018F, 0x018F, 0x0259}, // Ə ə ( 1) LATIN CAPITAL LETTER SCHWA + {0x0190, 0x0190, 0x025B}, // Ɛ ɛ ( 1) LATIN CAPITAL LETTER OPEN E + {0x0191, 0x0191, 0x0192}, // Ƒ ƒ ( 1) LATIN CAPITAL LETTER F WITH HOOK + {0x0193, 0x0193, 0x0260}, // Ɠ ɠ ( 1) LATIN CAPITAL LETTER G WITH HOOK + {0x0194, 0x0194, 0x0263}, // Ɣ ɣ ( 1) LATIN CAPITAL LETTER GAMMA + {0x0196, 0x0196, 0x0269}, // Ɩ ɩ ( 1) LATIN CAPITAL LETTER IOTA + {0x0197, 0x0197, 0x0268}, // Ɨ ɨ ( 1) LATIN CAPITAL LETTER I WITH STROKE + {0x0198, 0x0198, 0x0199}, // Ƙ ƙ ( 1) LATIN CAPITAL LETTER K WITH HOOK + {0x019C, 0x019C, 0x026F}, // Ɯ ɯ ( 1) LATIN CAPITAL LETTER TURNED M + {0x019D, 0x019D, 0x0272}, // Ɲ ɲ ( 1) LATIN CAPITAL LETTER N WITH LEFT HOOK + {0x019F, 0x019F, 0x0275}, // Ɵ ɵ ( 1) LATIN CAPITAL LETTER O WITH MIDDLE TILDE + {0x01A0, 0x01A4, 0x01A5}, // Ơ ơ ( 3) LATIN CAPITAL LETTER O WITH HORN + {0x01A6, 0x01A6, 0x0280}, // Ʀ ʀ ( 1) LATIN LETTER YR + {0x01A7, 0x01A7, 0x01A8}, // Ƨ ƨ ( 1) LATIN CAPITAL LETTER TONE TWO + {0x01A9, 0x01A9, 0x0283}, // Ʃ ʃ ( 1) LATIN CAPITAL LETTER ESH + {0x01AC, 0x01AC, 0x01AD}, // Ƭ ƭ ( 1) LATIN CAPITAL LETTER T WITH HOOK + {0x01AE, 0x01AE, 0x0288}, // Ʈ ʈ ( 1) LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + {0x01AF, 0x01AF, 0x01B0}, // Ư ư ( 1) LATIN CAPITAL LETTER U WITH HORN + {0x01B1, 0x01B2, 0x028B}, // Ʊ ʊ ( 2) LATIN CAPITAL LETTER UPSILON + {0x01B3, 0x01B5, 0x01B6}, // Ƴ ƴ ( 2) LATIN CAPITAL LETTER Y WITH HOOK + {0x01B7, 0x01B7, 0x0292}, // Ʒ ʒ ( 1) LATIN CAPITAL LETTER EZH + {0x01B8, 0x01B8, 0x01B9}, // Ƹ ƹ ( 1) LATIN CAPITAL LETTER EZH REVERSED + {0x01BC, 0x01BC, 0x01BD}, // Ƽ ƽ ( 1) LATIN CAPITAL LETTER TONE FIVE + {0x01C4, 0x01C4, 0x01C6}, // DŽ dž ( 1) LATIN CAPITAL LETTER DZ WITH CARON + {0x01C5, 0x01C5, 0x01C6}, // Dž dž ( 1) LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + {0x01C7, 0x01C7, 0x01C9}, // LJ lj ( 1) LATIN CAPITAL LETTER LJ + {0x01C8, 0x01C8, 0x01C9}, // Lj lj ( 1) LATIN CAPITAL LETTER L WITH SMALL LETTER J + {0x01CA, 0x01CA, 0x01CC}, // NJ nj ( 1) LATIN CAPITAL LETTER NJ + {0x01CB, 0x01DB, 0x01DC}, // Nj nj ( 9) LATIN CAPITAL LETTER N WITH SMALL LETTER J + {0x01DE, 0x01EE, 0x01EF}, // Ǟ ǟ ( 9) LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + {0x01F1, 0x01F1, 0x01F3}, // DZ dz ( 1) LATIN CAPITAL LETTER DZ + {0x01F2, 0x01F4, 0x01F5}, // Dz dz ( 2) LATIN CAPITAL LETTER D WITH SMALL LETTER Z + {0x01F6, 0x01F6, 0x0195}, // Ƕ ƕ ( 1) LATIN CAPITAL LETTER HWAIR + {0x01F7, 0x01F7, 0x01BF}, // Ƿ ƿ ( 1) LATIN CAPITAL LETTER WYNN + {0x01F8, 0x021E, 0x021F}, // Ǹ ǹ (20) LATIN CAPITAL LETTER N WITH GRAVE + {0x0220, 0x0220, 0x019E}, // Ƞ ƞ ( 1) LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + {0x0222, 0x0232, 0x0233}, // Ȣ ȣ ( 9) LATIN CAPITAL LETTER OU + {0x023A, 0x023A, 0x2C65}, // Ⱥ ⱥ ( 1) LATIN CAPITAL LETTER A WITH STROKE + {0x023B, 0x023B, 0x023C}, // Ȼ ȼ ( 1) LATIN CAPITAL LETTER C WITH STROKE + {0x023D, 0x023D, 0x019A}, // Ƚ ƚ ( 1) LATIN CAPITAL LETTER L WITH BAR + {0x023E, 0x023E, 0x2C66}, // Ⱦ ⱦ ( 1) LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + {0x0241, 0x0241, 0x0242}, // Ɂ ɂ ( 1) LATIN CAPITAL LETTER GLOTTAL STOP + {0x0243, 0x0243, 0x0180}, // Ƀ ƀ ( 1) LATIN CAPITAL LETTER B WITH STROKE + {0x0244, 0x0244, 0x0289}, // Ʉ ʉ ( 1) LATIN CAPITAL LETTER U BAR + {0x0245, 0x0245, 0x028C}, // Ʌ ʌ ( 1) LATIN CAPITAL LETTER TURNED V + {0x0246, 0x024E, 0x024F}, // Ɇ ɇ ( 5) LATIN CAPITAL LETTER E WITH STROKE + {0x0345, 0x0345, 0x03B9}, // ͅ ι ( 1) COMBINING GREEK YPOGEGRAMMENI + {0x0370, 0x0372, 0x0373}, // Ͱ ͱ ( 2) GREEK CAPITAL LETTER HETA + {0x0376, 0x0376, 0x0377}, // Ͷ ͷ ( 1) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + {0x037F, 0x037F, 0x03F3}, // Ϳ ϳ ( 1) GREEK CAPITAL LETTER YOT + {0x0386, 0x0386, 0x03AC}, // Ά ά ( 1) GREEK CAPITAL LETTER ALPHA WITH TONOS + {0x0388, 0x038A, 0x03AF}, // Έ έ ( 3) GREEK CAPITAL LETTER EPSILON WITH TONOS + {0x038C, 0x038C, 0x03CC}, // Ό ό ( 1) GREEK CAPITAL LETTER OMICRON WITH TONOS + {0x038E, 0x038F, 0x03CE}, // Ύ ύ ( 2) GREEK CAPITAL LETTER UPSILON WITH TONOS + {0x0391, 0x03A1, 0x03C1}, // Α α (17) GREEK CAPITAL LETTER ALPHA + {0x03A3, 0x03AB, 0x03CB}, // Σ σ ( 9) GREEK CAPITAL LETTER SIGMA + {0x03C2, 0x03C2, 0x03C3}, // ς σ ( 1) GREEK SMALL LETTER FINAL SIGMA + {0x03CF, 0x03CF, 0x03D7}, // Ϗ ϗ ( 1) GREEK CAPITAL KAI SYMBOL + {0x03D0, 0x03D0, 0x03B2}, // ϐ β ( 1) GREEK BETA SYMBOL + {0x03D1, 0x03D1, 0x03B8}, // ϑ θ ( 1) GREEK THETA SYMBOL + {0x03D5, 0x03D5, 0x03C6}, // ϕ φ ( 1) GREEK PHI SYMBOL + {0x03D6, 0x03D6, 0x03C0}, // ϖ π ( 1) GREEK PI SYMBOL + {0x03D8, 0x03EE, 0x03EF}, // Ϙ ϙ (12) GREEK LETTER ARCHAIC KOPPA + {0x03F0, 0x03F0, 0x03BA}, // ϰ κ ( 1) GREEK KAPPA SYMBOL + {0x03F1, 0x03F1, 0x03C1}, // ϱ ρ ( 1) GREEK RHO SYMBOL + {0x03F4, 0x03F4, 0x03B8}, // ϴ θ ( 1) GREEK CAPITAL THETA SYMBOL + {0x03F5, 0x03F5, 0x03B5}, // ϵ ε ( 1) GREEK LUNATE EPSILON SYMBOL + {0x03F7, 0x03F7, 0x03F8}, // Ϸ ϸ ( 1) GREEK CAPITAL LETTER SHO + {0x03F9, 0x03F9, 0x03F2}, // Ϲ ϲ ( 1) GREEK CAPITAL LUNATE SIGMA SYMBOL + {0x03FA, 0x03FA, 0x03FB}, // Ϻ ϻ ( 1) GREEK CAPITAL LETTER SAN + {0x03FD, 0x03FF, 0x037D}, // Ͻ ͻ ( 3) GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + {0x0400, 0x040F, 0x045F}, // Ѐ ѐ (16) CYRILLIC CAPITAL LETTER IE WITH GRAVE + {0x0410, 0x042F, 0x044F}, // А а (32) CYRILLIC CAPITAL LETTER A + {0x0460, 0x0480, 0x0481}, // Ѡ ѡ (17) CYRILLIC CAPITAL LETTER OMEGA + {0x048A, 0x04BE, 0x04BF}, // Ҋ ҋ (27) CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + {0x04C0, 0x04C0, 0x04CF}, // Ӏ ӏ ( 1) CYRILLIC LETTER PALOCHKA + {0x04C1, 0x04CD, 0x04CE}, // Ӂ ӂ ( 7) CYRILLIC CAPITAL LETTER ZHE WITH BREVE + {0x04D0, 0x052E, 0x052F}, // Ӑ ӑ (48) CYRILLIC CAPITAL LETTER A WITH BREVE + {0x0531, 0x0556, 0x0586}, // Ա ա (38) ARMENIAN CAPITAL LETTER AYB + {0x10A0, 0x10C5, 0x2D25}, // Ⴀ ⴀ (38) GEORGIAN CAPITAL LETTER AN + {0x10C7, 0x10C7, 0x2D27}, // Ⴧ ⴧ ( 1) GEORGIAN CAPITAL LETTER YN + {0x10CD, 0x10CD, 0x2D2D}, // Ⴭ ⴭ ( 1) GEORGIAN CAPITAL LETTER AEN + {0x13F8, 0x13FD, 0x13F5}, // ᏸ Ᏸ ( 6) CHEROKEE SMALL LETTER YE + {0x1C80, 0x1C80, 0x0432}, // ᲀ в ( 1) CYRILLIC SMALL LETTER ROUNDED VE + {0x1C81, 0x1C81, 0x0434}, // ᲁ д ( 1) CYRILLIC SMALL LETTER LONG-LEGGED DE + {0x1C82, 0x1C82, 0x043E}, // ᲂ о ( 1) CYRILLIC SMALL LETTER NARROW O + {0x1C83, 0x1C84, 0x0442}, // ᲃ с ( 2) CYRILLIC SMALL LETTER WIDE ES + {0x1C85, 0x1C85, 0x0442}, // ᲅ т ( 1) CYRILLIC SMALL LETTER THREE-LEGGED TE + {0x1C86, 0x1C86, 0x044A}, // ᲆ ъ ( 1) CYRILLIC SMALL LETTER TALL HARD SIGN + {0x1C87, 0x1C87, 0x0463}, // ᲇ ѣ ( 1) CYRILLIC SMALL LETTER TALL YAT + {0x1C88, 0x1C88, 0xA64B}, // ᲈ ꙋ ( 1) CYRILLIC SMALL LETTER UNBLENDED UK + {0x1C90, 0x1CBA, 0x10FA}, // Ა ა (43) GEORGIAN MTAVRULI CAPITAL LETTER AN + {0x1CBD, 0x1CBF, 0x10FF}, // Ჽ ჽ ( 3) GEORGIAN MTAVRULI CAPITAL LETTER AEN + {0x1E00, 0x1E94, 0x1E95}, // Ḁ ḁ (75) LATIN CAPITAL LETTER A WITH RING BELOW + {0x1E9B, 0x1E9B, 0x1E61}, // ẛ ṡ ( 1) LATIN SMALL LETTER LONG S WITH DOT ABOVE + {0x1E9E, 0x1E9E, 0x00DF}, // ẞ ß ( 1) LATIN CAPITAL LETTER SHARP S + {0x1EA0, 0x1EFE, 0x1EFF}, // Ạ ạ (48) LATIN CAPITAL LETTER A WITH DOT BELOW + {0x1F08, 0x1F0F, 0x1F07}, // Ἀ ἀ ( 8) GREEK CAPITAL LETTER ALPHA WITH PSILI + {0x1F18, 0x1F1D, 0x1F15}, // Ἐ ἐ ( 6) GREEK CAPITAL LETTER EPSILON WITH PSILI + {0x1F28, 0x1F2F, 0x1F27}, // Ἠ ἠ ( 8) GREEK CAPITAL LETTER ETA WITH PSILI + {0x1F38, 0x1F3F, 0x1F37}, // Ἰ ἰ ( 8) GREEK CAPITAL LETTER IOTA WITH PSILI + {0x1F48, 0x1F4D, 0x1F45}, // Ὀ ὀ ( 6) GREEK CAPITAL LETTER OMICRON WITH PSILI + {0x1F59, 0x1F5F, 0x1F57}, // Ὑ ὑ ( 7) GREEK CAPITAL LETTER UPSILON WITH DASIA + {0x1F68, 0x1F6F, 0x1F67}, // Ὠ ὠ ( 8) GREEK CAPITAL LETTER OMEGA WITH PSILI + {0x1F88, 0x1F8F, 0x1F87}, // ᾈ ᾀ ( 8) GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + {0x1F98, 0x1F9F, 0x1F97}, // ᾘ ᾐ ( 8) GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + {0x1FA8, 0x1FAF, 0x1FA7}, // ᾨ ᾠ ( 8) GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + {0x1FB8, 0x1FB9, 0x1FB1}, // Ᾰ ᾰ ( 2) GREEK CAPITAL LETTER ALPHA WITH VRACHY + {0x1FBA, 0x1FBB, 0x1F71}, // Ὰ ὰ ( 2) GREEK CAPITAL LETTER ALPHA WITH VARIA + {0x1FBC, 0x1FBC, 0x1FB3}, // ᾼ ᾳ ( 1) GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + {0x1FBE, 0x1FBE, 0x03B9}, // ι ι ( 1) GREEK PROSGEGRAMMENI + {0x1FC8, 0x1FCB, 0x1F75}, // Ὲ ὲ ( 4) GREEK CAPITAL LETTER EPSILON WITH VARIA + {0x1FCC, 0x1FCC, 0x1FC3}, // ῌ ῃ ( 1) GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + {0x1FD8, 0x1FD9, 0x1FD1}, // Ῐ ῐ ( 2) GREEK CAPITAL LETTER IOTA WITH VRACHY + {0x1FDA, 0x1FDB, 0x1F77}, // Ὶ ὶ ( 2) GREEK CAPITAL LETTER IOTA WITH VARIA + {0x1FE8, 0x1FE9, 0x1FE1}, // Ῠ ῠ ( 2) GREEK CAPITAL LETTER UPSILON WITH VRACHY + {0x1FEA, 0x1FEB, 0x1F7B}, // Ὺ ὺ ( 2) GREEK CAPITAL LETTER UPSILON WITH VARIA + {0x1FEC, 0x1FEC, 0x1FE5}, // Ῥ ῥ ( 1) GREEK CAPITAL LETTER RHO WITH DASIA + {0x1FF8, 0x1FF9, 0x1F79}, // Ὸ ὸ ( 2) GREEK CAPITAL LETTER OMICRON WITH VARIA + {0x1FFA, 0x1FFB, 0x1F7D}, // Ὼ ὼ ( 2) GREEK CAPITAL LETTER OMEGA WITH VARIA + {0x1FFC, 0x1FFC, 0x1FF3}, // ῼ ῳ ( 1) GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + {0x2126, 0x2126, 0x03C9}, // Ω ω ( 1) OHM SIGN + {0x212A, 0x212A, 0x006B}, // K k ( 1) KELVIN SIGN + {0x212B, 0x212B, 0x00E5}, // Å å ( 1) ANGSTROM SIGN + {0x2132, 0x2132, 0x214E}, // Ⅎ ⅎ ( 1) TURNED CAPITAL F + {0x2160, 0x216F, 0x217F}, // Ⅰ ⅰ (16) ROMAN NUMERAL ONE + {0x2183, 0x2183, 0x2184}, // Ↄ ↄ ( 1) ROMAN NUMERAL REVERSED ONE HUNDRED + {0x24B6, 0x24CF, 0x24E9}, // Ⓐ ⓐ (26) CIRCLED LATIN CAPITAL LETTER A + {0x2C00, 0x2C2F, 0x2C5F}, // Ⰰ ⰰ (48) GLAGOLITIC CAPITAL LETTER AZU + {0x2C60, 0x2C60, 0x2C61}, // Ⱡ ⱡ ( 1) LATIN CAPITAL LETTER L WITH DOUBLE BAR + {0x2C62, 0x2C62, 0x026B}, // Ɫ ɫ ( 1) LATIN CAPITAL LETTER L WITH MIDDLE TILDE + {0x2C63, 0x2C63, 0x1D7D}, // Ᵽ ᵽ ( 1) LATIN CAPITAL LETTER P WITH STROKE + {0x2C64, 0x2C64, 0x027D}, // Ɽ ɽ ( 1) LATIN CAPITAL LETTER R WITH TAIL + {0x2C67, 0x2C6B, 0x2C6C}, // Ⱨ ⱨ ( 3) LATIN CAPITAL LETTER H WITH DESCENDER + {0x2C6D, 0x2C6D, 0x0251}, // Ɑ ɑ ( 1) LATIN CAPITAL LETTER ALPHA + {0x2C6E, 0x2C6E, 0x0271}, // Ɱ ɱ ( 1) LATIN CAPITAL LETTER M WITH HOOK + {0x2C6F, 0x2C6F, 0x0250}, // Ɐ ɐ ( 1) LATIN CAPITAL LETTER TURNED A + {0x2C70, 0x2C70, 0x0252}, // Ɒ ɒ ( 1) LATIN CAPITAL LETTER TURNED ALPHA + {0x2C72, 0x2C72, 0x2C73}, // Ⱳ ⱳ ( 1) LATIN CAPITAL LETTER W WITH HOOK + {0x2C75, 0x2C75, 0x2C76}, // Ⱶ ⱶ ( 1) LATIN CAPITAL LETTER HALF H + {0x2C7E, 0x2C7F, 0x0240}, // Ȿ ȿ ( 2) LATIN CAPITAL LETTER S WITH SWASH TAIL + {0x2C80, 0x2CE2, 0x2CE3}, // Ⲁ ⲁ (50) COPTIC CAPITAL LETTER ALFA + {0x2CEB, 0x2CED, 0x2CEE}, // Ⳬ ⳬ ( 2) COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + {0x2CF2, 0x2CF2, 0x2CF3}, // Ⳳ ⳳ ( 1) COPTIC CAPITAL LETTER BOHAIRIC KHEI + {0xA640, 0xA66C, 0xA66D}, // Ꙁ ꙁ (23) CYRILLIC CAPITAL LETTER ZEMLYA + {0xA680, 0xA69A, 0xA69B}, // Ꚁ ꚁ (14) CYRILLIC CAPITAL LETTER DWE + {0xA722, 0xA72E, 0xA72F}, // Ꜣ ꜣ ( 7) LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + {0xA732, 0xA76E, 0xA76F}, // Ꜳ ꜳ (31) LATIN CAPITAL LETTER AA + {0xA779, 0xA77B, 0xA77C}, // Ꝺ ꝺ ( 2) LATIN CAPITAL LETTER INSULAR D + {0xA77D, 0xA77D, 0x1D79}, // Ᵹ ᵹ ( 1) LATIN CAPITAL LETTER INSULAR G + {0xA77E, 0xA786, 0xA787}, // Ꝿ ꝿ ( 5) LATIN CAPITAL LETTER TURNED INSULAR G + {0xA78B, 0xA78B, 0xA78C}, // Ꞌ ꞌ ( 1) LATIN CAPITAL LETTER SALTILLO + {0xA78D, 0xA78D, 0x0265}, // Ɥ ɥ ( 1) LATIN CAPITAL LETTER TURNED H + {0xA790, 0xA792, 0xA793}, // Ꞑ ꞑ ( 2) LATIN CAPITAL LETTER N WITH DESCENDER + {0xA796, 0xA7A8, 0xA7A9}, // Ꞗ ꞗ (10) LATIN CAPITAL LETTER B WITH FLOURISH + {0xA7AA, 0xA7AA, 0x0266}, // Ɦ ɦ ( 1) LATIN CAPITAL LETTER H WITH HOOK + {0xA7AB, 0xA7AB, 0x025C}, // Ɜ ɜ ( 1) LATIN CAPITAL LETTER REVERSED OPEN E + {0xA7AC, 0xA7AC, 0x0261}, // Ɡ ɡ ( 1) LATIN CAPITAL LETTER SCRIPT G + {0xA7AD, 0xA7AD, 0x026C}, // Ɬ ɬ ( 1) LATIN CAPITAL LETTER L WITH BELT + {0xA7AE, 0xA7AE, 0x026A}, // Ɪ ɪ ( 1) LATIN CAPITAL LETTER SMALL CAPITAL I + {0xA7B0, 0xA7B0, 0x029E}, // Ʞ ʞ ( 1) LATIN CAPITAL LETTER TURNED K + {0xA7B1, 0xA7B1, 0x0287}, // Ʇ ʇ ( 1) LATIN CAPITAL LETTER TURNED T + {0xA7B2, 0xA7B2, 0x029D}, // Ʝ ʝ ( 1) LATIN CAPITAL LETTER J WITH CROSSED-TAIL + {0xA7B3, 0xA7B3, 0xAB53}, // Ꭓ ꭓ ( 1) LATIN CAPITAL LETTER CHI + {0xA7B4, 0xA7C2, 0xA7C3}, // Ꞵ ꞵ ( 8) LATIN CAPITAL LETTER BETA + {0xA7C4, 0xA7C4, 0xA794}, // Ꞔ ꞔ ( 1) LATIN CAPITAL LETTER C WITH PALATAL HOOK + {0xA7C5, 0xA7C5, 0x0282}, // Ʂ ʂ ( 1) LATIN CAPITAL LETTER S WITH HOOK + {0xA7C6, 0xA7C6, 0x1D8E}, // Ᶎ ᶎ ( 1) LATIN CAPITAL LETTER Z WITH PALATAL HOOK + {0xA7C7, 0xA7C9, 0xA7CA}, // Ꟈ ꟈ ( 2) LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY + {0xA7D0, 0xA7D0, 0xA7D1}, // Ꟑ ꟑ ( 1) LATIN CAPITAL LETTER CLOSED INSULAR G + {0xA7D6, 0xA7D8, 0xA7D9}, // Ꟗ ꟗ ( 2) LATIN CAPITAL LETTER MIDDLE SCOTS S + {0xA7F5, 0xA7F5, 0xA7F6}, // Ꟶ ꟶ ( 1) LATIN CAPITAL LETTER REVERSED HALF H + {0xAB70, 0xABBF, 0x13EF}, // ꭰ Ꭰ (80) CHEROKEE SMALL LETTER A + {0xFF21, 0xFF3A, 0xFF5A}, // A a (26) FULLWIDTH LATIN CAPITAL LETTER A + {0x0130, 0x0130, 0x0069}, // İ i ( 1) LATIN CAPITAL LETTER I WITH DOT ABOVE + {0x01CD, 0x01DB, 0x01DC}, // Ǎ ǎ ( 8) LATIN CAPITAL LETTER A WITH CARON + {0x01F4, 0x01F4, 0x01F5}, // Ǵ ǵ ( 1) LATIN CAPITAL LETTER G WITH ACUTE + {0x13A0, 0x13EF, 0xABBF}, // Ꭰ ꭰ (80) CHEROKEE LETTER A + {0x13F0, 0x13F5, 0x13FD}, // Ᏸ ᏸ ( 6) CHEROKEE LETTER YE + {0x039C, 0x039C, 0x00B5}, // Μ µ ( 1) + {0x0049, 0x0049, 0x0131}, // I ı ( 1) + {0x0053, 0x0053, 0x017F}, // S ſ ( 1) + {0x03A3, 0x03A3, 0x03C2}, // Σ ς ( 1) + {0x0392, 0x0392, 0x03D0}, // Β ϐ ( 1) + {0x0398, 0x0398, 0x03D1}, // Θ ϑ ( 1) + {0x03A6, 0x03A6, 0x03D5}, // Φ ϕ ( 1) + {0x03A0, 0x03A0, 0x03D6}, // Π ϖ ( 1) + {0x039A, 0x039A, 0x03F0}, // Κ ϰ ( 1) + {0x03A1, 0x03A1, 0x03F1}, // Ρ ϱ ( 1) + {0x0395, 0x0395, 0x03F5}, // Ε ϵ ( 1) + {0x0412, 0x0412, 0x1C80}, // В ᲀ ( 1) + {0x0414, 0x0414, 0x1C81}, // Д ᲁ ( 1) + {0x041E, 0x041E, 0x1C82}, // О ᲂ ( 1) + {0x0421, 0x0422, 0x1C84}, // С ᲃ ( 2) + {0x0422, 0x0422, 0x1C85}, // Т ᲅ ( 1) + {0x042A, 0x042A, 0x1C86}, // Ъ ᲆ ( 1) + {0x0462, 0x0462, 0x1C87}, // Ѣ ᲇ ( 1) + {0xA64A, 0xA64A, 0x1C88}, // Ꙋ ᲈ ( 1) + {0x1E60, 0x1E60, 0x1E9B}, // Ṡ ẛ ( 1) + {0x0399, 0x0399, 0x1FBE}, // Ι ι ( 1) +}; // 218 + +enum { casefold_len = 192 }; + +static uint8_t upcase_ind[162] = { + 0, 2, 3, 4, 192, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 43, 45, 193, 47, 48, 194, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 80, 83, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 195, 196, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 125, 126, 129, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 142, 144, 146, 147, 148, + 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, + 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, + 189, 191, +}; + +static uint8_t lowcase_ind[184] = { + 0, 197, 113, 2, 3, 8, 4, 198, 5, 6, 7, 9, 199, 60, 12, 14, 16, 20, 50, 25, + 57, 53, 29, 31, 33, 35, 37, 39, 40, 51, 41, 43, 45, 193, 17, 47, 48, 194, 52, 54, + 56, 158, 59, 63, 154, 152, 155, 11, 13, 15, 18, 19, 174, 21, 175, 22, 170, 173, 24, 23, + 177, 148, 176, 26, 153, 27, 28, 150, 30, 184, 32, 179, 34, 61, 36, 62, 38, 180, 178, 65, + 66, 88, 68, 69, 72, 200, 73, 70, 71, 201, 202, 203, 204, 75, 80, 205, 206, 86, 67, 207, + 85, 87, 90, 89, 91, 92, 94, 93, 95, 96, 109, 110, 196, 208, 209, 210, 211, 212, 213, 214, + 215, 167, 149, 185, 111, 216, 114, 115, 116, 117, 118, 119, 120, 121, 126, 129, 132, 136, 134, 137, + 122, 123, 124, 125, 127, 217, 130, 131, 133, 135, 138, 142, 144, 146, 147, 55, 58, 151, 156, 157, + 159, 160, 161, 97, 98, 99, 162, 163, 164, 165, 166, 168, 169, 171, 183, 172, 182, 186, 187, 188, + 189, 181, 195, 191, +}; diff --git a/stc/queue.h b/stc/queue.h new file mode 100644 index 0000000..507cf8a --- /dev/null +++ b/stc/queue.h @@ -0,0 +1,39 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Queue. Implemented as a ring buffer. +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_QUEUE_H_INCLUDED +#define STC_QUEUE_H_INCLUDED +#include "common.h" +#include +#endif // STC_QUEUE_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix queue_ +#endif +#include "priv/template.h" +#include "priv/queue_prv.h" +#include "sys/finalize.h" diff --git a/stc/random.h b/stc/random.h new file mode 100644 index 0000000..33ddda1 --- /dev/null +++ b/stc/random.h @@ -0,0 +1,251 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +*/ +#define i_header // external linkage of normal_dist by default. +#include "priv/linkage.h" + +#ifndef STC_RANDOM_H_INCLUDED +#define STC_RANDOM_H_INCLUDED + +#include "common.h" + +// ===== crand64 =================================== + +typedef struct { + uint64_t data[4]; +} crand64; + +typedef struct { + double mean, stddev; + double _next; + int _has_next; +} crand64_normal_dist; + +STC_API double crand64_normal(crand64_normal_dist* d); +STC_API double crand64_normal_r(crand64* rng, uint64_t stream, crand64_normal_dist* d); + +#if INTPTR_MAX == INT64_MAX + #define crandWS crand64 +#else + #define crandWS crand32 +#endif + +#define c_shuffle_seed(s) \ + c_JOIN(crandWS, _seed)(s) + +#define c_shuffle_array(array, n) do { \ + typedef struct { char d[sizeof 0[array]]; } _etype; \ + _etype* _arr = (_etype *)(array); \ + for (isize _i = (n) - 1; _i > 0; --_i) { \ + isize _j = (isize)(c_JOIN(crandWS, _uint)() % (_i + 1)); \ + c_swap(_arr + _i, _arr + _j); \ + } \ +} while (0) + +// Compiles with vec, stack, and deque container types: +#define c_shuffle(CntType, self) do { \ + CntType* _self = self; \ + for (isize _i = CntType##_size(_self) - 1; _i > 0; --_i) { \ + isize _j = (isize)(c_JOIN(crandWS, _uint)() % (_i + 1)); \ + c_swap(CntType##_at_mut(_self, _i), CntType##_at_mut(_self, _j)); \ + } \ +} while (0) + +STC_INLINE void crand64_seed_r(crand64* rng, uint64_t seed) { + uint64_t* s = rng->data; + s[0] = seed*0x9e3779b97f4a7c15; s[0] ^= s[0] >> 30; + s[1] = s[0]*0xbf58476d1ce4e5b9; s[1] ^= s[1] >> 27; + s[2] = s[1]*0x94d049bb133111eb; s[2] ^= s[2] >> 31; + s[3] = seed; +} + +// Minimum period length 2^64 per stream. 2^63 streams (odd numbers only) +STC_INLINE uint64_t crand64_uint_r(crand64* rng, uint64_t stream) { + uint64_t* s = rng->data; + const uint64_t result = (s[0] ^ (s[3] += stream)) + s[1]; + s[0] = s[1] ^ (s[1] >> 11); + s[1] = s[2] + (s[2] << 3); + s[2] = ((s[2] << 24) | (s[2] >> 40)) + result; + return result; +} + +STC_INLINE double crand64_real_r(crand64* rng, uint64_t stream) + { return (double)(crand64_uint_r(rng, stream) >> 11) * 0x1.0p-53; } + +STC_INLINE crand64* _stc64(void) { + static crand64 rng = {{0x9e3779bb07979af0,0x6f682616bae3641a,0xe220a8397b1dcdaf,0x1}}; + return &rng; +} + +STC_INLINE void crand64_seed(uint64_t seed) + { crand64_seed_r(_stc64(), seed); } + +STC_INLINE crand64 crand64_from(uint64_t seed) + { crand64 rng; crand64_seed_r(&rng, seed); return rng; } + +STC_INLINE uint64_t crand64_uint(void) + { return crand64_uint_r(_stc64(), 1); } + +STC_INLINE double crand64_real(void) + { return crand64_real_r(_stc64(), 1); } + +// --- crand64_uniform --- + +typedef struct { + int64_t low; + uint64_t range, threshold; +} crand64_uniform_dist; + +STC_INLINE crand64_uniform_dist +crand64_make_uniform(int64_t low, int64_t high) { + crand64_uniform_dist d = {low, (uint64_t)(high - low + 1)}; + d.threshold = (uint64_t)(0 - d.range) % d.range; + return d; +} + +// 128-bit multiplication +#if defined(__SIZEOF_INT128__) + #define c_umul128(a, b, lo, hi) \ + do { __uint128_t _z = (__uint128_t)(a)*(b); \ + *(lo) = (uint64_t)_z, *(hi) = (uint64_t)(_z >> 64U); } while(0) +#elif defined(_MSC_VER) && defined(_WIN64) + #include + #define c_umul128(a, b, lo, hi) ((void)(*(lo) = _umul128(a, b, hi))) +#elif defined(__x86_64__) + #define c_umul128(a, b, lo, hi) \ + asm("mulq %3" : "=a"(*(lo)), "=d"(*(hi)) : "a"(a), "rm"(b)) +#endif + +STC_INLINE int64_t +crand64_uniform_r(crand64* rng, uint64_t stream, crand64_uniform_dist* d) { + uint64_t lo, hi; + #ifdef c_umul128 + do { c_umul128(crand64_uint_r(rng, stream), d->range, &lo, &hi); } while (lo < d->threshold); + #else + do { lo = crand64_uint_r(rng, stream); hi = lo % d->range; } while (lo - hi > -d->range); + #endif + return d->low + (int64_t)hi; +} + +STC_INLINE int64_t crand64_uniform(crand64_uniform_dist* d) + { return crand64_uniform_r(_stc64(), 1, d); } + +// ===== crand32 =================================== + +typedef struct { uint32_t data[4]; } crand32; + +STC_INLINE void crand32_seed_r(crand32* rng, uint32_t seed) { + uint32_t* s = rng->data; + s[0] = seed*0x9e3779b9; s[0] ^= s[0] >> 16; + s[1] = s[0]*0x21f0aaad; s[1] ^= s[1] >> 15; + s[2] = s[1]*0x735a2d97; s[2] ^= s[2] >> 15; + s[3] = seed; +} + +// Minimum period length 2^32 per stream. 2^31 streams (odd numbers only) +STC_INLINE uint32_t crand32_uint_r(crand32* rng, uint32_t stream) { + uint32_t* s = rng->data; + const uint32_t result = (s[0] ^ (s[3] += stream)) + s[1]; + s[0] = s[1] ^ (s[1] >> 9); + s[1] = s[2] + (s[2] << 3); + s[2] = ((s[2] << 21) | (s[2] >> 11)) + result; + return result; +} + +STC_INLINE double crand32_real_r(crand32* rng, uint32_t stream) + { return crand32_uint_r(rng, stream) * 0x1.0p-32; } + +STC_INLINE crand32* _stc32(void) { + static crand32 rng = {{0x9e37e78e,0x6eab1ba1,0x64625032,0x1}}; + return &rng; +} + +STC_INLINE void crand32_seed(uint32_t seed) + { crand32_seed_r(_stc32(), seed); } + +STC_INLINE crand32 crand32_from(uint32_t seed) + { crand32 rng; crand32_seed_r(&rng, seed); return rng; } + +STC_INLINE uint32_t crand32_uint(void) + { return crand32_uint_r(_stc32(), 1); } + +STC_INLINE double crand32_real(void) + { return crand32_real_r(_stc32(), 1); } + +// --- crand32_uniform --- + +typedef struct { + int32_t low; + uint32_t range, threshold; +} crand32_uniform_dist; + +STC_INLINE crand32_uniform_dist +crand32_make_uniform(int32_t low, int32_t high) { + crand32_uniform_dist d = {low, (uint32_t)(high - low + 1)}; + d.threshold = (uint32_t)(0 - d.range) % d.range; + return d; +} + +STC_INLINE int32_t +crand32_uniform_r(crand32* rng, uint32_t stream, crand32_uniform_dist* d) { + uint64_t r; + do { + r = crand32_uint_r(rng, stream) * (uint64_t)d->range; + } while ((uint32_t)r < d->threshold); + return d->low + (int32_t)(r >> 32); +} + +STC_INLINE int64_t crand32_uniform(crand32_uniform_dist* d) + { return crand32_uniform_r(_stc32(), 1, d); } + +#endif // STC_RANDOM_H_INCLUDED + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +#ifndef STC_RANDOM_C_INCLUDED +#define STC_RANDOM_C_INCLUDED +#include + +STC_DEF double +crand64_normal_r(crand64* rng, uint64_t stream, crand64_normal_dist* d) { + double v1, v2, sq, rt; + if (d->_has_next++ & 1) + return d->_next*d->stddev + d->mean; + do { + // range (-1.0, 1.0): + v1 = (double)((int64_t)crand64_uint_r(rng, stream) >> 11) * 0x1.0p-52; + v2 = (double)((int64_t)crand64_uint_r(rng, stream) >> 11) * 0x1.0p-52; + + sq = v1*v1 + v2*v2; + } while (sq >= 1.0 || sq == 0.0); + rt = sqrt(-2.0 * log(sq) / sq); + d->_next = v2*rt; + return (v1*rt)*d->stddev + d->mean; +} + +STC_DEF double crand64_normal(crand64_normal_dist* d) + { return crand64_normal_r(_stc64(), 1, d); } + +#endif // STC_RANDOM_C_INCLUDED +#endif // i_implement +#include "priv/linkage2.h" diff --git a/stc/rc.h b/stc/rc.h new file mode 100644 index 0000000..98ec6e0 --- /dev/null +++ b/stc/rc.h @@ -0,0 +1,38 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvmap + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Unordered map - implemented with the robin-hood hashing scheme. +/* +#define T IRefc, int +#include +#include + +int main(void) { + IRefc rc = IRefc_make(42); + IRefc_drop(&rc); +} +*/ + +#define i_no_atomic +#define _i_prefix rc_ +#include "arc.h" diff --git a/stc/smap.h b/stc/smap.h new file mode 100644 index 0000000..ed1e4ff --- /dev/null +++ b/stc/smap.h @@ -0,0 +1,606 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Sorted/Ordered set and map - implemented as an AA-tree. +/* +#include +#include + +#define T SMap, cstr, double, (c_keypro) // Sorted map +#include + +int main(void) { + SMap m = {0}; + SMap_emplace(&m, "Testing one", 1.234); + SMap_emplace(&m, "Testing two", 12.34); + SMap_emplace(&m, "Testing three", 123.4); + + SMap_value *v = SMap_get(&m, "Testing five"); // NULL + double num = *SMap_at(&m, "Testing one"); + SMap_emplace_or_assign(&m, "Testing three", 1000.0); // update + SMap_erase(&m, "Testing two"); + + for (c_each(i, SMap, m)) + printf("map %s: %g\n", cstr_str(&i.ref->first), i.ref->second); + + SMap_drop(&m); +} +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_SMAP_H_INCLUDED +#define STC_SMAP_H_INCLUDED +#include "common.h" +#include +#endif // STC_SMAP_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix smap_ +#endif +#ifndef _i_is_set + #define _i_is_map + #define _i_MAP_ONLY c_true + #define _i_SET_ONLY c_false + #define _i_keyref(vp) (&(vp)->first) +#else + #define _i_MAP_ONLY c_false + #define _i_SET_ONLY c_true + #define _i_keyref(vp) (vp) +#endif +#define _i_sorted +#include "priv/template.h" +#ifndef i_declared + _c_DEFTYPES(_declare_aatree, Self, i_key, i_val, _i_MAP_ONLY, _i_SET_ONLY, _i_aux_def); +#endif + +_i_MAP_ONLY( struct _m_value { + _m_key first; + _m_mapped second; +}; ) +struct _m_node { + int32_t link[2]; + int8_t level; + _m_value value; +}; + +typedef i_keyraw _m_keyraw; +typedef i_valraw _m_rmapped; +typedef _i_SET_ONLY( _m_keyraw ) + _i_MAP_ONLY( struct { _m_keyraw first; _m_rmapped second; } ) + _m_raw; + +#if !defined i_no_emplace +STC_API _m_result _c_MEMB(_emplace)(Self* self, _m_keyraw rkey _i_MAP_ONLY(, _m_rmapped rmapped)); +#endif // !i_no_emplace +#if !defined i_no_clone +STC_API Self _c_MEMB(_clone)(Self tree); +#endif // !i_no_clone +STC_API void _c_MEMB(_drop)(const Self* cself); +STC_API bool _c_MEMB(_reserve)(Self* self, isize cap); +STC_API _m_value* _c_MEMB(_find_it)(const Self* self, _m_keyraw rkey, _m_iter* out); +STC_API _m_iter _c_MEMB(_lower_bound)(const Self* self, _m_keyraw rkey); +STC_API _m_value* _c_MEMB(_front)(const Self* self); +STC_API _m_value* _c_MEMB(_back)(const Self* self); +STC_API int _c_MEMB(_erase)(Self* self, _m_keyraw rkey); +STC_API _m_iter _c_MEMB(_erase_at)(Self* self, _m_iter it); +STC_API _m_iter _c_MEMB(_erase_range)(Self* self, _m_iter it1, _m_iter it2); +STC_API _m_iter _c_MEMB(_begin)(const Self* self); +STC_API void _c_MEMB(_next)(_m_iter* it); + +STC_INLINE bool _c_MEMB(_is_empty)(const Self* self) { return self->size == 0; } +STC_INLINE isize _c_MEMB(_size)(const Self* self) { return self->size; } +STC_INLINE isize _c_MEMB(_capacity)(const Self* self) { return self->capacity; } +STC_INLINE _m_iter _c_MEMB(_find)(const Self* self, _m_keyraw rkey) + { _m_iter it; _c_MEMB(_find_it)(self, rkey, &it); return it; } +STC_INLINE bool _c_MEMB(_contains)(const Self* self, _m_keyraw rkey) + { _m_iter it; return _c_MEMB(_find_it)(self, rkey, &it) != NULL; } +STC_INLINE const _m_value* _c_MEMB(_get)(const Self* self, _m_keyraw rkey) + { _m_iter it; return _c_MEMB(_find_it)(self, rkey, &it); } +STC_INLINE _m_value* _c_MEMB(_get_mut)(Self* self, _m_keyraw rkey) + { _m_iter it; return _c_MEMB(_find_it)(self, rkey, &it); } + +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* val) { + return _i_SET_ONLY( i_keytoraw(val) ) + _i_MAP_ONLY( c_literal(_m_raw){i_keytoraw((&val->first)), + i_valtoraw((&val->second))} ); +} + +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* val) { + (void)self; + i_keydrop(_i_keyref(val)); + _i_MAP_ONLY( i_valdrop((&val->second)); ) +} + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->capacity = self->size = self->root = self->disp = self->head = 0; + self->nodes = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_clear)(Self* self) { + _c_MEMB(_drop)(self); + (void)_c_MEMB(_move)(self); +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +#if !defined i_no_clone +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value _val) { + (void)self; + *_i_keyref(&_val) = i_keyclone((*_i_keyref(&_val))); + _i_MAP_ONLY( _val.second = i_valclone(_val.second); ) + return _val; +} + +STC_INLINE void _c_MEMB(_copy)(Self *self, const Self* other) { + if (self == other) + return; + _c_MEMB(_drop)(self); + *self = _c_MEMB(_clone)(*other); +} + +STC_INLINE void _c_MEMB(_shrink_to_fit)(Self *self) { + Self tmp = _c_MEMB(_clone)(*self); + _c_MEMB(_drop)(self); *self = tmp; +} +#endif // !i_no_clone + +STC_API _m_result _c_MEMB(_insert_entry_)(Self* self, _m_keyraw rkey); + +#ifdef _i_is_map + STC_API _m_result _c_MEMB(_insert_or_assign)(Self* self, _m_key key, _m_mapped mapped); + #ifndef i_no_emplace + STC_API _m_result _c_MEMB(_emplace_or_assign)(Self* self, _m_keyraw rkey, _m_rmapped rmapped); + #endif + + STC_INLINE const _m_mapped* _c_MEMB(_at)(const Self* self, _m_keyraw rkey) + { _m_iter it; return &_c_MEMB(_find_it)(self, rkey, &it)->second; } + + STC_INLINE _m_mapped* _c_MEMB(_at_mut)(Self* self, _m_keyraw rkey) + { _m_iter it; return &_c_MEMB(_find_it)(self, rkey, &it)->second; } +#endif // _i_is_map + +STC_INLINE _m_iter _c_MEMB(_end)(const Self* self) { + _m_iter it; (void)self; + it.ref = NULL, it._top = 0, it._tn = 0; + return it; +} + +STC_INLINE _m_iter _c_MEMB(_advance)(_m_iter it, size_t n) { + while (n-- && it.ref) + _c_MEMB(_next)(&it); + return it; +} + +#if defined _i_has_eq +STC_INLINE bool +_c_MEMB(_eq)(const Self* self, const Self* other) { + if (_c_MEMB(_size)(self) != _c_MEMB(_size)(other)) return false; + _m_iter i = _c_MEMB(_begin)(self), j = _c_MEMB(_begin)(other); + for (; i.ref; _c_MEMB(_next)(&i), _c_MEMB(_next)(&j)) { + const _m_keyraw _rx = i_keytoraw(_i_keyref(i.ref)), _ry = i_keytoraw(_i_keyref(j.ref)); + if (!(i_eq((&_rx), (&_ry)))) return false; + } + return true; +} +#endif + +STC_INLINE _m_result +_c_MEMB(_insert)(Self* self, _m_key _key _i_MAP_ONLY(, _m_mapped _mapped)) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw((&_key))); + if (_res.inserted) + { *_i_keyref(_res.ref) = _key; _i_MAP_ONLY( _res.ref->second = _mapped; )} + else + { i_keydrop((&_key)); _i_MAP_ONLY( i_valdrop((&_mapped)); )} + return _res; +} + +STC_INLINE _m_value* _c_MEMB(_push)(Self* self, _m_value _val) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw(_i_keyref(&_val))); + if (_res.inserted) + *_res.ref = _val; + else + _c_MEMB(_value_drop)(self, &_val); + return _res.ref; +} + +#ifdef _i_is_map +STC_INLINE _m_result _c_MEMB(_put)(Self* self, _m_keyraw rkey, _m_rmapped rmapped) { + #ifdef i_no_emplace + return _c_MEMB(_insert_or_assign)(self, rkey, rmapped); + #else + return _c_MEMB(_emplace_or_assign)(self, rkey, rmapped); + #endif +} +#endif + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) { + while (n--) + #if defined _i_is_set && defined i_no_emplace + _c_MEMB(_insert)(self, *raw++); + #elif defined _i_is_set + _c_MEMB(_emplace)(self, *raw++); + #else + _c_MEMB(_put)(self, raw->first, raw->second), ++raw; + #endif +} + +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) + { Self cx = {0}; return cx; } + +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) + { Self cx = {0}; _c_MEMB(_put_n)(&cx, raw, n); return cx; } + +STC_INLINE Self _c_MEMB(_with_capacity)(const isize cap) + { Self cx = {0}; _c_MEMB(_reserve)(&cx, cap); return cx; } +#endif + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF void +_c_MEMB(_next)(_m_iter *it) { + int32_t tn = it->_tn; + if (it->_top || tn) { + while (tn) { + it->_st[it->_top++] = tn; + tn = it->_d[tn].link[0]; + } + tn = it->_st[--it->_top]; + it->_tn = it->_d[tn].link[1]; + it->ref = &it->_d[tn].value; + } else + it->ref = NULL; +} + +STC_DEF _m_iter +_c_MEMB(_begin)(const Self* self) { + _m_iter it; + it.ref = NULL; + it._d = self->nodes, it._top = 0; + it._tn = self->root; + if (it._tn) + _c_MEMB(_next)(&it); + return it; +} + +STC_DEF bool +_c_MEMB(_reserve)(Self* self, const isize cap) { + if (cap <= self->capacity) + return false; + _m_node* nodes = (_m_node*)_i_realloc_n(self->nodes, self->capacity + 1, cap + 1); + if (nodes == NULL) + return false; + nodes[0] = c_literal(_m_node){0}; + self->nodes = nodes; + self->capacity = (int32_t)cap; + return true; +} + +STC_DEF _m_value* +_c_MEMB(_front)(const Self* self) { + _m_node *d = self->nodes; + int32_t tn = self->root; + while (d[tn].link[0]) + tn = d[tn].link[0]; + return &d[tn].value; +} + +STC_DEF _m_value* +_c_MEMB(_back)(const Self* self) { + _m_node *d = self->nodes; + int32_t tn = self->root; + while (d[tn].link[1]) + tn = d[tn].link[1]; + return &d[tn].value; +} + +static int32_t +_c_MEMB(_new_node_)(Self* self, int level) { + int32_t tn; + if (self->disp != 0) { + tn = self->disp; + self->disp = self->nodes[tn].link[1]; + } else { + if (self->head == self->capacity) + if (!_c_MEMB(_reserve)(self, self->head*3/2 + 4)) + return 0; + tn = ++self->head; /* start with 1, 0 is nullnode. */ + } + _m_node* dn = &self->nodes[tn]; + dn->link[0] = dn->link[1] = 0; dn->level = (int8_t)level; + return tn; +} + +#ifdef _i_is_map + STC_DEF _m_result + _c_MEMB(_insert_or_assign)(Self* self, _m_key _key, _m_mapped _mapped) { + _m_result _res = _c_MEMB(_insert_entry_)(self, i_keytoraw((&_key))); + _m_mapped* _mp = _res.ref ? &_res.ref->second : &_mapped; + if (_res.inserted) + _res.ref->first = _key; + else + { i_keydrop((&_key)); i_valdrop(_mp); } + *_mp = _mapped; + return _res; + } + + #if !defined i_no_emplace + STC_DEF _m_result + _c_MEMB(_emplace_or_assign)(Self* self, _m_keyraw rkey, _m_rmapped rmapped) { + _m_result _res = _c_MEMB(_insert_entry_)(self, rkey); + if (_res.inserted) + _res.ref->first = i_keyfrom(rkey); + else { + if (_res.ref == NULL) return _res; + i_valdrop((&_res.ref->second)); + } + _res.ref->second = i_valfrom(rmapped); + return _res; + } + #endif // !i_no_emplace +#endif // !_i_is_map + +STC_DEF _m_value* +_c_MEMB(_find_it)(const Self* self, _m_keyraw rkey, _m_iter* out) { + int32_t tn = self->root; + _m_node *d = out->_d = self->nodes; + out->_top = 0; + while (tn) { + int c; const _m_keyraw _raw = i_keytoraw(_i_keyref(&d[tn].value)); + if ((c = i_cmp((&_raw), (&rkey))) < 0) + tn = d[tn].link[1]; + else if (c > 0) + { out->_st[out->_top++] = tn; tn = d[tn].link[0]; } + else + { out->_tn = d[tn].link[1]; return (out->ref = &d[tn].value); } + } + return (out->ref = NULL); +} + +STC_DEF _m_iter +_c_MEMB(_lower_bound)(const Self* self, _m_keyraw rkey) { + _m_iter it; + _c_MEMB(_find_it)(self, rkey, &it); + if (it.ref == NULL && it._top != 0) { + int32_t tn = it._st[--it._top]; + it._tn = it._d[tn].link[1]; + it.ref = &it._d[tn].value; + } + return it; +} + +STC_DEF int32_t +_c_MEMB(_skew_)(_m_node *d, int32_t tn) { + if (tn != 0 && d[d[tn].link[0]].level == d[tn].level) { + int32_t tmp = d[tn].link[0]; + d[tn].link[0] = d[tmp].link[1]; + d[tmp].link[1] = tn; + tn = tmp; + } + return tn; +} + +STC_DEF int32_t +_c_MEMB(_split_)(_m_node *d, int32_t tn) { + if (d[d[d[tn].link[1]].link[1]].level == d[tn].level) { + int32_t tmp = d[tn].link[1]; + d[tn].link[1] = d[tmp].link[0]; + d[tmp].link[0] = tn; + tn = tmp; + ++d[tn].level; + } + return tn; +} + +STC_DEF int32_t +_c_MEMB(_insert_entry_i_)(Self* self, int32_t tn, const _m_keyraw* rkey, _m_result* _res) { + int32_t up[64], tx = tn; + _m_node* d = self->nodes; + int c, top = 0, dir = 0; + while (tx) { + up[top++] = tx; + const _m_keyraw _raw = i_keytoraw(_i_keyref(&d[tx].value)); + if ((c = i_cmp((&_raw), rkey)) == 0) + { _res->ref = &d[tx].value; return tn; } + dir = (c < 0); + tx = d[tx].link[dir]; + } + if ((tx = _c_MEMB(_new_node_)(self, 1)) == 0) + return 0; + d = self->nodes; + _res->ref = &d[tx].value; + _res->inserted = true; + if (top == 0) + return tx; + d[up[top - 1]].link[dir] = tx; + while (top--) { + if (top != 0) + dir = (d[up[top - 1]].link[1] == up[top]); + up[top] = _c_MEMB(_skew_)(d, up[top]); + up[top] = _c_MEMB(_split_)(d, up[top]); + if (top) + d[up[top - 1]].link[dir] = up[top]; + } + return up[0]; +} + +STC_DEF _m_result +_c_MEMB(_insert_entry_)(Self* self, _m_keyraw rkey) { + _m_result res = {0}; + int32_t tn = _c_MEMB(_insert_entry_i_)(self, self->root, &rkey, &res); + self->root = tn; + self->size += res.inserted; + return res; +} + +STC_DEF int32_t +_c_MEMB(_erase_r_)(Self *self, int32_t tn, const _m_keyraw* rkey, int *erased) { + _m_node *d = self->nodes; + if (tn == 0) + return 0; + _m_keyraw raw = i_keytoraw(_i_keyref(&d[tn].value)); + int32_t tx; int c = i_cmp((&raw), rkey); + if (c != 0) + d[tn].link[c < 0] = _c_MEMB(_erase_r_)(self, d[tn].link[c < 0], rkey, erased); + else { + if ((*erased)++ == 0) + _c_MEMB(_value_drop)(self, &d[tn].value); // drop first time, not second. + if (d[tn].link[0] && d[tn].link[1]) { + tx = d[tn].link[0]; + while (d[tx].link[1]) + tx = d[tx].link[1]; + d[tn].value = d[tx].value; /* move */ + raw = i_keytoraw(_i_keyref(&d[tn].value)); + d[tn].link[0] = _c_MEMB(_erase_r_)(self, d[tn].link[0], &raw, erased); + } else { /* unlink node */ + tx = tn; + tn = d[tn].link[ d[tn].link[0] == 0 ]; + /* move it to disposed nodes list */ + d[tx].link[1] = self->disp; + self->disp = tx; + } + } + tx = d[tn].link[1]; + if (d[d[tn].link[0]].level < d[tn].level - 1 || d[tx].level < d[tn].level - 1) { + if (d[tx].level > --d[tn].level) + d[tx].level = d[tn].level; + tn = _c_MEMB(_skew_)(d, tn); + tx = d[tn].link[1] = _c_MEMB(_skew_)(d, d[tn].link[1]); + d[tx].link[1] = _c_MEMB(_skew_)(d, d[tx].link[1]); + tn = _c_MEMB(_split_)(d, tn); + d[tn].link[1] = _c_MEMB(_split_)(d, d[tn].link[1]); + } + return tn; +} + +STC_DEF int +_c_MEMB(_erase)(Self* self, _m_keyraw rkey) { + int erased = 0; + int32_t root = _c_MEMB(_erase_r_)(self, self->root, &rkey, &erased); + if (erased == 0) + return 0; + self->root = root; + --self->size; + return 1; +} + +STC_DEF _m_iter +_c_MEMB(_erase_at)(Self* self, _m_iter it) { + _m_keyraw raw = i_keytoraw(_i_keyref(it.ref)); + _c_MEMB(_next)(&it); + if (it.ref != NULL) { + _m_keyraw nxt = i_keytoraw(_i_keyref(it.ref)); + _c_MEMB(_erase)(self, raw); + _c_MEMB(_find_it)(self, nxt, &it); + } else + _c_MEMB(_erase)(self, raw); + return it; +} + +STC_DEF _m_iter +_c_MEMB(_erase_range)(Self* self, _m_iter it1, _m_iter it2) { + if (it2.ref == NULL) { + while (it1.ref != NULL) + it1 = _c_MEMB(_erase_at)(self, it1); + return it1; + } + _m_key k1 = *_i_keyref(it1.ref), k2 = *_i_keyref(it2.ref); + _m_keyraw r1 = i_keytoraw((&k1)); + for (;;) { + if (memcmp(&k1, &k2, sizeof k1) == 0) + return it1; + _c_MEMB(_next)(&it1); + k1 = *_i_keyref(it1.ref); + _c_MEMB(_erase)(self, r1); + r1 = i_keytoraw((&k1)); + _c_MEMB(_find_it)(self, r1, &it1); + } +} + +#if !defined i_no_clone +STC_DEF int32_t +_c_MEMB(_clone_r_)(Self* self, _m_node* src, int32_t sn) { + if (sn == 0) + return 0; + int32_t tx, tn = _c_MEMB(_new_node_)(self, src[sn].level); + self->nodes[tn].value = _c_MEMB(_value_clone)(self, src[sn].value); + tx = _c_MEMB(_clone_r_)(self, src, src[sn].link[0]); self->nodes[tn].link[0] = tx; + tx = _c_MEMB(_clone_r_)(self, src, src[sn].link[1]); self->nodes[tn].link[1] = tx; + return tn; +} + +STC_DEF Self +_c_MEMB(_clone)(Self tree) { + Self out = tree; + out.root = out.disp = out.head = out.size = out.capacity = 0; + out.nodes = NULL; _c_MEMB(_reserve)(&out, tree.size); + out.root = _c_MEMB(_clone_r_)(&out, tree.nodes, tree.root); + return out; +} +#endif // !i_no_clone + +#if !defined i_no_emplace +STC_DEF _m_result +_c_MEMB(_emplace)(Self* self, _m_keyraw rkey _i_MAP_ONLY(, _m_rmapped rmapped)) { + _m_result res = _c_MEMB(_insert_entry_)(self, rkey); + if (res.inserted) { + *_i_keyref(res.ref) = i_keyfrom(rkey); + _i_MAP_ONLY(res.ref->second = i_valfrom(rmapped);) + } + return res; +} +#endif // i_no_emplace + +static void +_c_MEMB(_drop_r_)(Self* s, int32_t tn) { + if (tn != 0) { + _c_MEMB(_drop_r_)(s, s->nodes[tn].link[0]); + _c_MEMB(_drop_r_)(s, s->nodes[tn].link[1]); + _c_MEMB(_value_drop)(s, &s->nodes[tn].value); + } +} + +STC_DEF void +_c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + if (self->capacity != 0) { + _c_MEMB(_drop_r_)(self, self->root); + _i_free_n(self->nodes, self->capacity + 1); + } +} + +#endif // i_implement +#undef _i_is_set +#undef _i_is_map +#undef _i_sorted +#undef _i_keyref +#undef _i_MAP_ONLY +#undef _i_SET_ONLY +#include "sys/finalize.h" diff --git a/stc/sort.h b/stc/sort.h new file mode 100644 index 0000000..a2f95a5 --- /dev/null +++ b/stc/sort.h @@ -0,0 +1,109 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* Generic Quicksort in C, performs as fast as c++ std::sort(), and more robust. +template params: +#define i_key keytype - [required] (or use i_type, see below) +#define i_less(xp, yp) - optional less function. default: *xp < *yp +#define i_cmp(xp, yp) - alternative 3-way comparison. c_default_cmp(xp, yp) +#define T name - optional, defines {name}_sort(), else {i_key}s_sort(). +#define T name, key - alternative one-liner to define both i_type and i_key. + +// ex1: +#include +#define i_key int +#include + +int main(void) { + int nums[] = {23, 321, 5434, 25, 245, 1, 654, 33, 543, 21}; + + ints_sort(nums, c_arraylen(nums)); + + for (int i = 0; i < c_arraylen(nums); i++) + printf(" %d", nums[i]); + puts(""); + + isize idx = ints_binary_search(nums, 25, c_arraylen(nums)); + if (idx != c_NPOS) printf("found: %d\n", nums[idx]); + + idx = ints_lower_bound(nums, 200, c_arraylen(nums)); + if (idx != c_NPOS) printf("found lower 200: %d\n", nums[idx]); +} + +// ex2: Test on a deque !! +#include +#define T IDeq, int, (c_use_cmp) // enable comparison functions +#include + +int main(void) { + IDeq nums = c_make(IDeq, {5434, 25, 245, 1, 654, 33, 543, 21}); + IDeq_push_front(&nums, 23); + IDeq_push_front(&nums, 321); + + IDeq_sort(&nums); + + for (c_each (i, IDeq, nums)) + printf(" %d", *i.ref); + puts(""); + + isize idx = IDeq_binary_search(&nums, 25); + if (idx != c_NPOS) printf("found: %d\n", *IDeq_at(&nums, idx)); + + idx = IDeq_lower_bound(&nums, 200); + if (idx != c_NPOS) printf("found lower 200: %d\n", *IDeq_at(&nums, idx)); + + IDeq_drop(&nums); +} +*/ +#ifndef _i_template + #include "priv/linkage.h" + #include "common.h" + + #define _i_is_array + #if defined T && !defined i_type + #define i_type T + #endif + #if defined i_type && !defined i_key + #define Self c_GETARG(1, i_type) + #define i_key c_GETARG(2, i_type) + #elif defined i_type + #define Self i_type + #else + #define Self c_JOIN(i_key, s) + #endif + + typedef i_key Self; + typedef Self c_JOIN(Self, _value), c_JOIN(Self, _raw); + #define i_at(arr, idx) (&(arr)[idx]) + #define i_at_mut i_at + #include "priv/template.h" // IWYU pragma: keep +#endif + +#include "priv/sort_prv.h" + +#ifdef _i_is_array + #undef _i_is_array + #include "priv/linkage2.h" + #include "priv/template2.h" +#endif +#undef i_at +#undef i_at_mut diff --git a/stc/sortedmap.h b/stc/sortedmap.h new file mode 100644 index 0000000..f293122 --- /dev/null +++ b/stc/sortedmap.h @@ -0,0 +1,46 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Sorted map - implemented as an AA-tree (balanced binary tree). +/* +#include + +#define T Intmap, int, int +#include // sorted map of int + +int main(void) { + Intmap map = {0}; + Intmap_insert(&map, 5, 25); + Intmap_insert(&map, 8, 38); + Intmap_insert(&map, 3, 43); + Intmap_insert(&map, 5, 55); + + for (c_each_kv(k, v, Intmap, map)) + printf(" %d -> %d\n", *k, *v); + + Intmap_drop(&map); +} +*/ + +#define _i_prefix smap_ +#include "smap.h" diff --git a/stc/sortedset.h b/stc/sortedset.h new file mode 100644 index 0000000..17c847e --- /dev/null +++ b/stc/sortedset.h @@ -0,0 +1,47 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Sorted set - implemented as an AA-tree (balanced binary tree). +/* +#include + +#define T Intset, int +#include // sorted set of int + +int main(void) { + Intset set = {0}; + Intset_insert(&set, 5); + Intset_insert(&set, 8); + Intset_insert(&set, 3); + Intset_insert(&set, 5); + + for (c_each(k, Intset, set)) + printf(" %d\n", *k.ref); + + Intset_drop(&set); +} +*/ + +#define _i_prefix sset_ +#define _i_is_set +#include "smap.h" diff --git a/stc/sset.h b/stc/sset.h new file mode 100644 index 0000000..6558a1a --- /dev/null +++ b/stc/sset.h @@ -0,0 +1,46 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// Sorted set - implemented as an AA-tree (balanced binary tree). +/* +#include + +#define T Intset, int +#include // sorted set of int + +int main(void) { + Intset s = {0}; + Intset_insert(&s, 5); + Intset_insert(&s, 8); + Intset_insert(&s, 3); + Intset_insert(&s, 5); + + for (c_each(k, Intset, s)) + printf("set %d\n", *k.ref); + Intset_drop(&s); +} +*/ + +#define _i_prefix sset_ +#define _i_is_set +#include "smap.h" diff --git a/stc/stack.h b/stc/stack.h new file mode 100644 index 0000000..2d891e1 --- /dev/null +++ b/stc/stack.h @@ -0,0 +1,282 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "priv/linkage.h" +#include "types.h" + +// Stack - a simplified vec type without linear search and insert/erase inside the stack. + +#ifndef STC_STACK_H_INCLUDED +#define STC_STACK_H_INCLUDED +#include "common.h" +#include +#endif // STC_STACK_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix stack_ +#endif +#include "priv/template.h" +#ifndef i_declared +#if c_NUMARGS(i_type) == 4 + #define i_capacity i_val +#endif +#ifdef i_capacity + #define i_no_clone + _c_DEFTYPES(declare_stack_fixed, Self, i_key, i_capacity); +#else + _c_DEFTYPES(_declare_stack, Self, i_key, _i_aux_def); +#endif +#endif +typedef i_keyraw _m_raw; + +#ifdef i_capacity +STC_INLINE void _c_MEMB(_init)(Self* news) + { news->size = 0; } + +STC_INLINE isize _c_MEMB(_capacity)(const Self* self) + { (void)self; return i_capacity; } + +STC_INLINE bool _c_MEMB(_reserve)(Self* self, isize n) + { (void)self; return n <= i_capacity; } + +#else + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->capacity = self->size = 0; + self->data = NULL; + return m; +} + +STC_INLINE isize _c_MEMB(_capacity)(const Self* self) + { return self->capacity; } + +STC_INLINE bool _c_MEMB(_reserve)(Self* self, isize n) { + if (n > self->capacity || (n && n == self->size)) { + _m_value *d = (_m_value *)_i_realloc_n(self->data, self->capacity, n); + if (d == NULL) + return false; + self->data = d; + self->capacity = n; + } + return self->data != NULL; +} +#endif // i_capacity + +STC_INLINE void _c_MEMB(_clear)(Self* self) { + if (self->size == 0) return; + _m_value *p = self->data + self->size; + while (p-- != self->data) { i_keydrop(p); } + self->size = 0; +} + +STC_INLINE void _c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + _c_MEMB(_clear)(self); +#ifndef i_capacity + _i_free_n(self->data, self->capacity); +#endif +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +STC_INLINE isize _c_MEMB(_size)(const Self* self) + { return self->size; } + +STC_INLINE bool _c_MEMB(_is_empty)(const Self* self) + { return !self->size; } + +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* val) + { (void)self; i_keydrop(val); } + +STC_INLINE _m_value* _c_MEMB(_append_uninit)(Self *self, isize n) { + isize len = self->size; + if (len + n >= _c_MEMB(_capacity)(self)) + if (!_c_MEMB(_reserve)(self, len*3/2 + n)) + return NULL; + self->size += n; + return self->data + len; +} + +STC_INLINE void _c_MEMB(_shrink_to_fit)(Self* self) + { _c_MEMB(_reserve)(self, self->size); } + +STC_INLINE const _m_value* _c_MEMB(_front)(const Self* self) + { return &self->data[0]; } +STC_INLINE _m_value* _c_MEMB(_front_mut)(Self* self) + { return &self->data[0]; } + +STC_INLINE const _m_value* _c_MEMB(_back)(const Self* self) + { return &self->data[self->size - 1]; } +STC_INLINE _m_value* _c_MEMB(_back_mut)(Self* self) + { return &self->data[self->size - 1]; } + +STC_INLINE const _m_value* _c_MEMB(_top)(const Self* self) + { return _c_MEMB(_back)(self); } +STC_INLINE _m_value* _c_MEMB(_top_mut)(Self* self) + { return _c_MEMB(_back_mut)(self); } + +STC_INLINE _m_value* _c_MEMB(_push)(Self* self, _m_value val) { + if (self->size == _c_MEMB(_capacity)(self)) + if (!_c_MEMB(_reserve)(self, self->size*3/2 + 4)) + return NULL; + _m_value* vp = self->data + self->size++; + *vp = val; return vp; +} + +STC_INLINE void _c_MEMB(_pop)(Self* self) + { c_assert(self->size); _m_value* p = &self->data[--self->size]; i_keydrop(p); } + +STC_INLINE _m_value _c_MEMB(_pull)(Self* self) + { c_assert(self->size); return self->data[--self->size]; } + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) + { while (n--) _c_MEMB(_push)(self, i_keyfrom((*raw))), ++raw; } + +#if !defined _i_aux_alloc && !defined i_capacity +STC_INLINE Self _c_MEMB(_init)(void) + { Self out = {0}; return out; } + +STC_INLINE Self _c_MEMB(_with_capacity)(isize cap) + { Self out = {_i_new_n(_m_value, cap), 0, cap}; return out; } + +STC_INLINE Self _c_MEMB(_with_size_uninit)(isize size) + { Self out = {_i_new_n(_m_value, size), size, size}; return out; } + +STC_INLINE Self _c_MEMB(_with_size)(isize size, _m_raw default_raw) { + Self out = {_i_new_n(_m_value, size), size, size}; + while (size) out.data[--size] = i_keyfrom(default_raw); + return out; +} +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) { + Self out = _c_MEMB(_with_capacity)(n); + _c_MEMB(_put_n)(&out, raw, n); return out; +} +#endif + +STC_INLINE const _m_value* _c_MEMB(_at)(const Self* self, isize idx) + { c_assert(c_uless(idx, self->size)); return self->data + idx; } + +STC_INLINE _m_value* _c_MEMB(_at_mut)(Self* self, isize idx) + { c_assert(c_uless(idx, self->size)); return self->data + idx; } + +#if !defined i_no_emplace +STC_INLINE _m_value* _c_MEMB(_emplace)(Self* self, _m_raw raw) + { return _c_MEMB(_push)(self, i_keyfrom(raw)); } +#endif // !i_no_emplace + +#if !defined i_no_clone +STC_INLINE Self _c_MEMB(_clone)(Self stk) { + Self out = stk, *self = &out; (void)self; // i_keyclone may use self via i_aux + out.data = NULL; out.size = out.capacity = 0; + _c_MEMB(_reserve)(&out, stk.size); + out.size = stk.size; + for (c_range(i, stk.size)) + out.data[i] = i_keyclone(stk.data[i]); + return out; +} + +STC_INLINE void _c_MEMB(_copy)(Self *self, const Self* other) { + if (self == other) return; + _c_MEMB(_clear)(self); + _c_MEMB(_reserve)(self, other->size); + for (c_range(i, other->size)) + self->data[self->size++] = i_keyclone((other->data[i])); +} + +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val) + { (void)self; return i_keyclone(val); } + +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* val) + { return i_keytoraw(val); } +#endif // !i_no_clone + +// iteration + +STC_INLINE _m_iter _c_MEMB(_begin)(const Self* self) { + _m_iter it = {(_m_value*)self->data, (_m_value*)self->data}; + if (self->size) it.end += self->size; + else it.ref = NULL; + return it; +} + +STC_INLINE _m_iter _c_MEMB(_rbegin)(const Self* self) { + _m_iter it = {(_m_value*)self->data, (_m_value*)self->data}; + if (self->size) { it.ref += self->size - 1; it.end -= 1; } + else it.ref = NULL; + return it; +} + +STC_INLINE _m_iter _c_MEMB(_end)(const Self* self) + { (void)self; _m_iter it = {0}; return it; } + +STC_INLINE _m_iter _c_MEMB(_rend)(const Self* self) + { (void)self; _m_iter it = {0}; return it; } + +STC_INLINE void _c_MEMB(_next)(_m_iter* it) + { if (++it->ref == it->end) it->ref = NULL; } + +STC_INLINE void _c_MEMB(_rnext)(_m_iter* it) + { if (--it->ref == it->end) it->ref = NULL; } + +STC_INLINE _m_iter _c_MEMB(_advance)(_m_iter it, size_t n) + { if ((it.ref += n) >= it.end) it.ref = NULL ; return it; } + +STC_INLINE isize _c_MEMB(_index)(const Self* self, _m_iter it) + { return (it.ref - self->data); } + +STC_INLINE void _c_MEMB(_adjust_end_)(Self* self, isize n) + { self->size += n; } + +#if defined _i_has_cmp +#include "priv/sort_prv.h" +#endif // _i_has_cmp + +#if defined _i_has_eq +STC_INLINE _m_iter _c_MEMB(_find_in)(const Self* self, _m_iter i1, _m_iter i2, _m_raw raw) { + (void)self; + const _m_value* p2 = i2.ref ? i2.ref : i1.end; + for (; i1.ref != p2; ++i1.ref) { + const _m_raw r = i_keytoraw(i1.ref); + if (i_eq((&raw), (&r))) + return i1; + } + i2.ref = NULL; + return i2; +} + +STC_INLINE _m_iter _c_MEMB(_find)(const Self* self, _m_raw raw) + { return _c_MEMB(_find_in)(self, _c_MEMB(_begin)(self), _c_MEMB(_end)(self), raw); } + +STC_INLINE bool _c_MEMB(_eq)(const Self* self, const Self* other) { + if (self->size != other->size) return false; + for (isize i = 0; i < self->size; ++i) { + const _m_raw _rx = i_keytoraw((self->data+i)), _ry = i_keytoraw((other->data+i)); + if (!(i_eq((&_rx), (&_ry)))) return false; + } + return true; +} +#endif +#include "sys/finalize.h" diff --git a/stc/sys/crange.h b/stc/sys/crange.h new file mode 100644 index 0000000..c71cef6 --- /dev/null +++ b/stc/sys/crange.h @@ -0,0 +1,118 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +*/ +/* +#include +#include + +int main(void) +{ + crange r1 = crange_make(80, 90); + for (c_each(i, crange, r1)) + printf(" %d", *i.ref); + puts(""); + + c_filter(crange, c_iota(100, INT_MAX, 10), true + && c_flt_skip(25) + && c_flt_take(3) + && printf(" %d", *value) + ); + puts(""); +} +*/ +// IWYU pragma: private, include "stc/algorithm.h" +#ifndef STC_CRANGE_H_INCLUDED +#define STC_CRANGE_H_INCLUDED + +#include "../priv/linkage.h" +#include "../common.h" + +// crange: isize range ----- + +typedef isize crange_value; +typedef struct { crange_value start, end, step, value; } crange; +typedef struct { crange_value *ref, end, step; } crange_iter; + +STC_INLINE crange crange_make_3(crange_value start, crange_value stop, crange_value step) + { crange r = {start, stop - (step > 0), step}; return r; } + +#define crange_make(...) c_MACRO_OVERLOAD(crange_make, __VA_ARGS__) +#define crange_make_1(stop) crange_make_3(0, stop, 1) // NB! arg is stop +#define crange_make_2(start, stop) crange_make_3(start, stop, 1) + +STC_INLINE crange_iter crange_begin(crange* self) { + self->value = self->start; + crange_iter it = {&self->value, self->end, self->step}; + return it; +} + +STC_INLINE void crange_next(crange_iter* it) { + if ((it->step > 0) == ((*it->ref += it->step) > it->end)) + it->ref = NULL; +} + +STC_INLINE crange_iter crange_advance(crange_iter it, size_t n) { + if ((it.step > 0) == ((*it.ref += it.step*(isize)n) > it.end)) + it.ref = NULL; + return it; +} + +// iota: c++-like std::iota, use in iterations on-the-fly ----- +// Note: c_iota() does not compile with c++, crange does. +#define c_iota(...) c_MACRO_OVERLOAD(c_iota, __VA_ARGS__) +#define c_iota_1(start) c_iota_3(start, INTPTR_MAX, 1) // NB! arg is start. +#define c_iota_2(start, stop) c_iota_3(start, stop, 1) +#define c_iota_3(start, stop, step) ((crange[]){crange_make_3(start, stop, step)})[0] + + +// crange32 ----- + +typedef int32_t crange32_value; +typedef struct { crange32_value start, end, step, value; } crange32; +typedef struct { crange32_value *ref, end, step; } crange32_iter; + +STC_INLINE crange32 crange32_make_3(crange32_value start, crange32_value stop, crange32_value step) + { crange32 r = {start, stop - (step > 0), step}; return r; } + +#define crange32_make(...) c_MACRO_OVERLOAD(crange32_make, __VA_ARGS__) +#define crange32_make_1(stop) crange32_make_3(0, stop, 1) // NB! arg is stop +#define crange32_make_2(start, stop) crange32_make_3(start, stop, 1) + +STC_INLINE crange32_iter crange32_begin(crange32* self) { + self->value = self->start; + crange32_iter it = {&self->value, self->end, self->step}; + return it; +} + +STC_INLINE void crange32_next(crange32_iter* it) { + if ((it->step > 0) == ((*it->ref += it->step) > it->end)) + it->ref = NULL; +} + +STC_INLINE crange32_iter crange32_advance(crange32_iter it, uint32_t n) { + if ((it.step > 0) == ((*it.ref += it.step*(int32_t)n) > it.end)) + it.ref = NULL; + return it; +} + +#include "../priv/linkage2.h" +#endif // STC_CRANGE_H_INCLUDE diff --git a/stc/sys/filter.h b/stc/sys/filter.h new file mode 100644 index 0000000..512e68d --- /dev/null +++ b/stc/sys/filter.h @@ -0,0 +1,185 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +*/ +/* +#include +#define T Vec, int +#include +#include + +int main(void) +{ + Vec vec = c_make(Vec, {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 9, 10, 11, 12, 5}); + + c_filter(Vec, vec, true + && c_flt_skipwhile(*value < 3) // skip leading values < 3 + && (*value & 1) == 1 // then use odd values only + && c_flt_map(*value * 2) // multiply by 2 + && c_flt_takewhile(*value < 20) // stop if mapped *value >= 20 + && printf(" %d", *value) // print value + ); + // 6 10 14 2 6 18 + puts(""); + Vec_drop(&vec); +} +*/ +// IWYU pragma: private, include "stc/algorithm.h" +#ifndef STC_FILTER_H_INCLUDED +#define STC_FILTER_H_INCLUDED + +#include "../common.h" + +// ------- c_filter -------- +#define c_flt_take(n) _flt_take(&fltbase, n) +#define c_flt_skip(n) (c_flt_counter() > (n)) +#define c_flt_takewhile(pred) _flt_takewhile(&fltbase, pred) +#define c_flt_skipwhile(pred) (fltbase.sb[fltbase.sb_top++] |= !(pred)) +#define c_flt_counter() (++fltbase.sn[++fltbase.sn_top]) +#define c_flt_getcount() (fltbase.sn[fltbase.sn_top]) +#define c_flt_map(expr) (_mapped = (expr), value = &_mapped) +#define c_flt_src _it.ref + +#define c_filter(C, cnt, pred) \ + _c_filter(C, C##_begin(&cnt), _, pred) + +#define c_filter_from(C, start, pred) \ + _c_filter(C, start, _, pred) + +#define c_filter_reverse(C, cnt, pred) \ + _c_filter(C, C##_rbegin(&cnt), _r, pred) + +#define c_filter_reverse_from(C, start, pred) \ + _c_filter(C, start, _r, pred) + +#define _c_filter(C, start, rev, pred) do { \ + struct _flt_base fltbase = {0}; \ + C##_iter _it = start; \ + C##_value *value = _it.ref, _mapped = {0}; \ + for ((void)_mapped ; !fltbase.done & (_it.ref != NULL) ; \ + C##rev##next(&_it), value = _it.ref, fltbase.sn_top=0, fltbase.sb_top=0) \ + (void)(pred); \ +} while (0) + +// ------- c_filter_zip -------- +#define c_filter_zip(...) c_MACRO_OVERLOAD(c_filter_zip, __VA_ARGS__) +#define c_filter_zip_4(C, cnt1, cnt2, pred) \ + c_filter_zip_5(C, cnt1, C, cnt2, pred) +#define c_filter_zip_5(C1, cnt1, C2, cnt2, pred) \ + _c_filter_zip(C1, C1##_begin(&cnt1), C2, C2##_begin(&cnt2), _, pred) + +#define c_filter_reverse_zip(...) c_MACRO_OVERLOAD(c_filter_reverse_zip, __VA_ARGS__) +#define c_filter_reverse_zip_4(C, cnt1, cnt2, pred) \ + c_filter_reverse_zip_5(C, cnt1, C, cnt2, pred) +#define c_filter_reverse_zip_5(C1, cnt1, C2, cnt2, pred) \ + _c_filter_zip(C1, C1##_rbegin(&cnt1), C2, C2##_rbegin(&cnt2), _r, pred) + +#define c_filter_pairwise(C, cnt, pred) \ + _c_filter_zip(C, C##_begin(&cnt), C, C##_advance(_it1, 1), _, pred) + +#define c_flt_map1(expr) (_mapped1 = (expr), value1 = &_mapped1) +#define c_flt_map2(expr) (_mapped2 = (expr), value2 = &_mapped2) +#define c_flt_src1 _it1.ref +#define c_flt_src2 _it2.ref + +#define _c_filter_zip(C1, start1, C2, start2, rev, pred) do { \ + struct _flt_base fltbase = {0}; \ + C1##_iter _it1 = start1; \ + C2##_iter _it2 = start2; \ + C1##_value* value1 = _it1.ref, _mapped1; (void)_mapped1; \ + C2##_value* value2 = _it2.ref, _mapped2; (void)_mapped2; \ + for (; !fltbase.done & (_it1.ref != NULL) & (_it2.ref != NULL); \ + C1##rev##next(&_it1), value1 = _it1.ref, C2##rev##next(&_it2), value2 = _it2.ref, \ + fltbase.sn_top=0, fltbase.sb_top=0) \ + (void)(pred); \ +} while (0) + +// ------- c_ffilter -------- +// c_ffilter allows to execute imperative statements for each element +// in a for-loop, e.g., calling nested generic statements instead +// of defining a function/expression for it: +/* + Vec vec = ..., vec2 = ...; + for (c_ffilter(i, Vec, vec, true + && c_fflt_skipwhile(i, *i.ref < 3) // skip leading values < 3 + && (*i.ref & 1) == 1 // then use odd values only + && c_fflt_map(i, *i.ref * 2) // multiply by 2 + && c_fflt_takewhile(i, *i.ref < 20) // stop if mapped *i.ref >= 20 + )){ + c_eraseremove_if(Vec, &vec2, *value == *i.ref); + } +*/ +#define c_fflt_take(i, n) _flt_take(&i.base, n) +#define c_fflt_skip(i, n) (c_fflt_counter(i) > (n)) +#define c_fflt_takewhile(i, pred) _flt_takewhile(&i.base, pred) +#define c_fflt_skipwhile(i, pred) (i.base.sb[i.base.sb_top++] |= !(pred)) +#define c_fflt_counter(i) (++i.base.sn[++i.base.sn_top]) +#define c_fflt_getcount(i) (i.base.sn[i.base.sn_top]) +#define c_fflt_map(i, expr) (i.mapped = (expr), i.ref = &i.mapped) +#define c_fflt_src(i) i.iter.ref + +#define c_forfilter(...) for (c_ffilter(__VA_ARGS__)) +#define c_forfilter_from(...) for (c_ffilter_from(__VA_ARGS__)) +#define c_forfilter_reverse(...) for (c_ffilter_reverse(__VA_ARGS__)) +#define c_forfilter_reverse_from(...) for (c_ffilter_reverse_from(__VA_ARGS__)) + +#define c_ffilter(i, C, cnt, pred) \ + _c_ffilter(i, C, C##_begin(&cnt), _, pred) + +#define c_ffilter_from(i, C, start, pred) \ + _c_ffilter(i, C, start, _, pred) + +#define c_ffilter_reverse(i, C, cnt,pred) \ + _c_ffilter(i, C, C##_rbegin(&cnt), _r, pred) + +#define c_ffilter_reverse_from(i, C, start, pred) \ + _c_ffilter(i, C, start, _r, pred) + +#define _c_ffilter(i, C, start, rev, pred) \ + struct {C##_iter iter; C##_value *ref, mapped; struct _flt_base base;} \ + i = {.iter=start, .ref=i.iter.ref} ; !i.base.done & (i.iter.ref != NULL) ; \ + C##rev##next(&i.iter), i.ref = i.iter.ref, i.base.sn_top=0, i.base.sb_top=0) \ + if (!(pred)) ; else if (1 + +// ------------------------ private ------------------------- +#ifndef c_NFILTERS +#define c_NFILTERS 20 +#endif + +struct _flt_base { + uint8_t sn_top, sb_top; + bool done, sb[c_NFILTERS]; + uint32_t sn[c_NFILTERS]; +}; + +static inline bool _flt_take(struct _flt_base* base, uint32_t n) { + uint32_t k = ++base->sn[++base->sn_top]; + base->done |= (k >= n); + return n > 0; +} + +static inline bool _flt_takewhile(struct _flt_base* base, bool pred) { + bool skip = (base->sb[base->sb_top++] |= !pred); + base->done |= skip; + return !skip; +} + +#endif // STC_FILTER_H_INCLUDED diff --git a/stc/sys/finalize.h b/stc/sys/finalize.h new file mode 100644 index 0000000..e7271fa --- /dev/null +++ b/stc/sys/finalize.h @@ -0,0 +1,5 @@ +#ifndef i_extend + #include "../priv/linkage2.h" + #include "../priv/template2.h" +#endif +#undef i_extend diff --git a/stc/sys/sumtype.h b/stc/sys/sumtype.h new file mode 100644 index 0000000..9f08495 --- /dev/null +++ b/stc/sys/sumtype.h @@ -0,0 +1,171 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* +// https://stackoverflow.com/questions/70935435/how-to-create-variants-in-rust +#include +#include +#include + +c_sumtype (Action, + (ActionSpeak, cstr), + (ActionQuit, bool), + (ActionRunFunc, struct { + int32_t (*func)(int32_t, int32_t); + int32_t v1, v2; + }) +); + +void Action_drop(Action* self) { + if (c_is(self, ActionSpeak, s)) + cstr_drop(s); +} + +void action(Action* action) { + c_when (action) { + c_is(ActionSpeak, s) { + printf("Asked to speak: %s\n", cstr_str(s)); + } + c_is(ActionQuit) { + printf("Asked to quit!\n"); + } + c_is(ActionRunFunc, r) { + int32_t res = r->func(r->v1, r->v2); + printf("v1: %d, v2: %d, res: %d\n", r->v1, r->v2, res); + } + c_otherwise assert(!"no match"); + } +} + +int32_t add(int32_t a, int32_t b) { + return a + b; +} + +int main(void) { + Action act1 = c_variant(ActionSpeak, cstr_from("Hello")); + Action act2 = c_variant(ActionQuit, 1); + Action act3 = c_variant(ActionRunFunc, {add, 5, 6}); + + action(&act1); + action(&act2); + action(&act3); + + c_drop(Action, &act1, &act2, &act3); +} +*/ +#ifndef STC_SUMTYPE_H_INCLUDED +#define STC_SUMTYPE_H_INCLUDED + +#include "../common.h" + +#define _c_EMPTY() +#define _c_LOOP_INDIRECTION() c_LOOP +#define _c_LOOP_END_1 ,_c_LOOP1 +#define _c_LOOP0(f,T,x,...) f c_EXPAND((T, c_EXPAND x)) _c_LOOP_INDIRECTION _c_EMPTY()()(f,T,__VA_ARGS__) +#define _c_LOOP1(...) +#define _c_CHECK(x,...) c_TUPLE_AT_1(__VA_ARGS__,x,) +#define _c_E0(...) __VA_ARGS__ +#define _c_E1(...) _c_E0(_c_E0(_c_E0(_c_E0(_c_E0(_c_E0(__VA_ARGS__)))))) +#define _c_E2(...) _c_E1(_c_E1(_c_E1(_c_E1(_c_E1(_c_E1(__VA_ARGS__)))))) +#define c_EVAL(...) _c_E2(_c_E2(_c_E2(__VA_ARGS__))) // currently supports up to 130 variants +#define c_LOOP(f,T,x,...) _c_CHECK(_c_LOOP0, c_JOIN(_c_LOOP_END_, c_NUMARGS(c_EXPAND x)))(f,T,x,__VA_ARGS__) + + +#define _c_enum_1(x,...) (x=__LINE__*1000, __VA_ARGS__) +#define _c_vartuple_tag(T, Tag, ...) Tag, +#define _c_vartuple_type(T, Tag, ...) typedef __VA_ARGS__ Tag##_type; typedef T Tag##_sumtype; +#define _c_vartuple_var(T, Tag, ...) struct { enum enum_##T tag; Tag##_type get; } Tag; + +#define c_sumtype(T, ...) \ + typedef union T T; \ + enum enum_##T { c_EVAL(c_LOOP(_c_vartuple_tag, T, _c_enum_1 __VA_ARGS__, (0),)) }; \ + c_EVAL(c_LOOP(_c_vartuple_type, T, __VA_ARGS__, (0),)) \ + union T { \ + struct { enum enum_##T tag; } _any_; \ + c_EVAL(c_LOOP(_c_vartuple_var, T, __VA_ARGS__, (0),)) \ + } + +#if defined STC_HAS_TYPEOF && STC_HAS_TYPEOF + #define c_when(varptr) \ + for (__typeof__(varptr) _vp1 = (varptr); _vp1; _vp1 = NULL) \ + switch (_vp1->_any_.tag) + + #define c_is_2(Tag, x) \ + break; case Tag: \ + for (__typeof__(_vp1->Tag.get)* x = &_vp1->Tag.get; x; x = NULL) + + #define c_is_3(varptr, Tag, x) \ + false) ; else for (__typeof__(varptr) _vp2 = (varptr); _vp2; _vp2 = NULL) \ + if (c_is_variant(_vp2, Tag)) \ + for (__typeof__(_vp2->Tag.get) *x = &_vp2->Tag.get; x; x = NULL +#else + typedef union { struct { int tag; } _any_; } _c_any_variant; + #define c_when(varptr) \ + for (_c_any_variant* _vp1 = (_c_any_variant *)(varptr); \ + _vp1; _vp1 = NULL, (void)sizeof((varptr)->_any_.tag)) \ + switch (_vp1->_any_.tag) + + #define c_is_2(Tag, x) \ + break; case Tag: \ + for (Tag##_type *x = &((Tag##_sumtype *)_vp1)->Tag.get; x; x = NULL) + + #define c_is_3(varptr, Tag, x) \ + false) ; else for (Tag##_sumtype* _vp2 = c_const_cast(Tag##_sumtype*, varptr); _vp2; _vp2 = NULL) \ + if (c_is_variant(_vp2, Tag)) \ + for (Tag##_type *x = &_vp2->Tag.get; x; x = NULL +#endif + +// Handling multiple tags with different payloads: +#define c_is(...) c_MACRO_OVERLOAD(c_is, __VA_ARGS__) +#define c_is_1(Tag) \ + break; case Tag: + +#define c_or_is(Tag) \ + ; case Tag: + +// Type checked multiple tags with same payload: +#define c_is_same(...) c_MACRO_OVERLOAD(c_is_same, __VA_ARGS__) +#define _c_chk(Tag1, Tag2) \ + case 1 ? Tag1 : sizeof((Tag1##_type*)0 == (Tag2##_type*)0): +#define c_is_same_2(Tag1, Tag2) \ + break; _c_chk(Tag1, Tag2) case Tag2: +#define c_is_same_3(Tag1, Tag2, Tag3) \ + break; _c_chk(Tag1, Tag2) _c_chk(Tag2, Tag3) case Tag3: +#define c_is_same_4(Tag1, Tag2, Tag3, Tag4) \ + break; _c_chk(Tag1, Tag2) _c_chk(Tag2, Tag3) _c_chk(Tag3, Tag4) case Tag4: + +#define c_otherwise \ + break; default: + +#define c_variant(Tag, ...) \ + (c_literal(Tag##_sumtype){.Tag={.tag=Tag, .get=__VA_ARGS__}}) + +#define c_is_variant(varptr, Tag) \ + ((varptr)->Tag.tag == Tag) + +#define c_get_if(varptr, Tag) \ + (c_is_variant(varptr, Tag) ? &(varptr)->Tag.get : NULL) + +#define c_variant_index(varptr) \ + ((int)(varptr)->_any_.tag) + +#endif // STC_SUMTYPE_H_INCLUDED diff --git a/stc/sys/utility.h b/stc/sys/utility.h new file mode 100644 index 0000000..b4ed1b6 --- /dev/null +++ b/stc/sys/utility.h @@ -0,0 +1,188 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +*/ +// IWYU pragma: private, include "stc/algorithm.h" +#ifndef STC_UTILITY_H_INCLUDED +#define STC_UTILITY_H_INCLUDED + +// -------------------------------- +// c_find_if, c_find_reverse_if +// -------------------------------- + +#define c_find_if(...) c_MACRO_OVERLOAD(c_find_if, __VA_ARGS__) +#define c_find_if_4(C, cnt, outit_ptr, pred) \ + _c_find(C, C##_begin(&cnt), NULL, _, outit_ptr, pred) + +#define c_find_if_5(C, start, finish, outit_ptr, pred) \ + _c_find(C, start, (finish).ref, _, outit_ptr, pred) + +#define c_find_reverse_if(...) c_MACRO_OVERLOAD(c_find_reverse_if, __VA_ARGS__) +#define c_find_reverse_if_4(C, cnt, outit_ptr, pred) \ + _c_find(C, C##_rbegin(&cnt), NULL, _r, outit_ptr, pred) + +#define c_find_reverse_if_5(C, rstart, rfinish, outit_ptr, pred) \ + _c_find(C, rstart, (rfinish).ref, _r, outit_ptr, pred) + +// private +#define _c_find(C, start, endref, rev, outit_ptr, pred) do { \ + C##_iter* _out = outit_ptr; \ + const C##_value *value, *_endref = endref; \ + for (*_out = start; (value = _out->ref) != _endref; C##rev##next(_out)) \ + if (pred) goto c_JOIN(findif_, __LINE__); \ + _out->ref = NULL; c_JOIN(findif_, __LINE__):; \ +} while (0) + +// -------------------------------- +// c_reverse +// -------------------------------- + +#define c_reverse_array(array, n) do { \ + typedef struct { char d[sizeof 0[array]]; } _etype; \ + _etype* _arr = (_etype *)(array); \ + for (isize _i = 0, _j = (n) - 1; _i < _j; ++_i, --_j) \ + c_swap(_arr + _i, _arr + _j); \ +} while (0) + +// Compiles with vec, stack, and deque, and cspan container types: +#define c_reverse(CntType, self) do { \ + CntType* _self = self; \ + for (isize _i = 0, _j = CntType##_size(_self) - 1; _i < _j; ++_i, --_j) \ + c_swap(CntType##_at_mut(_self, _i), CntType##_at_mut(_self, _j)); \ +} while (0) + +// -------------------------------- +// c_erase_if +// -------------------------------- + +// Use with: list, hashmap, hashset, sortedmap, sortedset: +#define c_erase_if(C, cnt_ptr, pred) do { \ + C* _cnt = cnt_ptr; \ + const C##_value* value; \ + for (C##_iter _it = C##_begin(_cnt); (value = _it.ref); ) { \ + if (pred) _it = C##_erase_at(_cnt, _it); \ + else C##_next(&_it); \ + } \ +} while (0) + +// -------------------------------- +// c_eraseremove_if +// -------------------------------- + +// Use with: stack, vec, deque, queue: +#define c_eraseremove_if(C, cnt_ptr, pred) do { \ + C* _cnt = cnt_ptr; \ + isize _n = 0; \ + const C##_value* value; \ + C##_iter _i, _it = C##_begin(_cnt); \ + while ((value = _it.ref) && !(pred)) \ + C##_next(&_it); \ + for (_i = _it; (value = _it.ref); C##_next(&_it)) { \ + if (pred) C##_value_drop(_cnt, _it.ref), ++_n; \ + else *_i.ref = *_it.ref, C##_next(&_i); \ + } \ + C##_adjust_end_(_cnt, -_n); \ +} while (0) + +// -------------------------------- +// c_copy_to, c_copy_if +// -------------------------------- + +#define c_copy_to(...) c_MACRO_OVERLOAD(c_copy_to, __VA_ARGS__) +#define c_copy_to_3(C, outcnt_ptr, cnt) \ + _c_copy_if(C, outcnt_ptr, _, C, cnt, true) + +#define c_copy_to_4(C_out, outcnt_ptr, C, cnt) \ + _c_copy_if(C_out, outcnt_ptr, _, C, cnt, true) + +#define c_copy_if(...) c_MACRO_OVERLOAD(c_copy_if, __VA_ARGS__) +#define c_copy_if_4(C, outcnt_ptr, cnt, pred) \ + _c_copy_if(C, outcnt_ptr, _, C, cnt, pred) + +#define c_copy_if_5(C_out, outcnt_ptr, C, cnt, pred) \ + _c_copy_if(C_out, outcnt_ptr, _, C, cnt, pred) + +// private +#define _c_copy_if(C_out, outcnt_ptr, rev, C, cnt, pred) do { \ + C_out *_out = outcnt_ptr; \ + C _cnt = cnt; \ + const C##_value* value; \ + for (C##_iter _it = C##rev##begin(&_cnt); (value = _it.ref); C##rev##next(&_it)) \ + if (pred) C_out##_push(_out, C_out##_value_clone(_out, *_it.ref)); \ +} while (0) + +// -------------------------------- +// c_all_of, c_any_of, c_none_of +// -------------------------------- + +#define c_all_of(C, cnt, outbool_ptr, pred) do { \ + C##_iter _it; \ + c_find_if_4(C, cnt, &_it, !(pred)); \ + *(outbool_ptr) = _it.ref == NULL; \ +} while (0) + +#define c_any_of(C, cnt, outbool_ptr, pred) do { \ + C##_iter _it; \ + c_find_if_4(C, cnt, &_it, pred); \ + *(outbool_ptr) = _it.ref != NULL; \ +} while (0) + +#define c_none_of(C, cnt, outbool_ptr, pred) do { \ + C##_iter _it; \ + c_find_if_4(C, cnt, &_it, pred); \ + *(outbool_ptr) = _it.ref == NULL; \ +} while (0) + +// -------------------------------- +// c_min, c_max, c_min_n, c_max_n +// -------------------------------- +#define _c_minmax_call(fn, T, ...) \ + fn(c_make_array(T, {__VA_ARGS__}), c_sizeof((T[]){__VA_ARGS__})/c_sizeof(T)) + +#define c_min(...) _c_minmax_call(c_min_n, isize, __VA_ARGS__) +#define c_umin(...) _c_minmax_call(c_umin_n, size_t, __VA_ARGS__) +#define c_min32(...) _c_minmax_call(c_min32_n, int32_t, __VA_ARGS__) +#define c_fmin(...) _c_minmax_call(c_fmin_n, float, __VA_ARGS__) +#define c_dmin(...) _c_minmax_call(c_dmin_n, double, __VA_ARGS__) +#define c_max(...) _c_minmax_call(c_max_n, isize, __VA_ARGS__) +#define c_umax(...) _c_minmax_call(c_umax_n, size_t, __VA_ARGS__) +#define c_max32(...) _c_minmax_call(c_max32_n, int32_t, __VA_ARGS__) +#define c_fmax(...) _c_minmax_call(c_fmax_n, float, __VA_ARGS__) +#define c_dmax(...) _c_minmax_call(c_dmax_n, double, __VA_ARGS__) + +#define _c_minmax_def(fn, T, opr) \ + static inline T fn(const T a[], isize n) { \ + T x = a[0]; \ + for (isize i = 1; i < n; ++i) if (a[i] opr x) x = a[i]; \ + return x; \ + } +_c_minmax_def(c_min32_n, int32_t, <) +_c_minmax_def(c_min_n, isize, <) +_c_minmax_def(c_umin_n, size_t, <) +_c_minmax_def(c_fmin_n, float, <) +_c_minmax_def(c_dmin_n, double, <) +_c_minmax_def(c_max32_n, int32_t, >) +_c_minmax_def(c_max_n, isize, >) +_c_minmax_def(c_umax_n, size_t, >) +_c_minmax_def(c_fmax_n, float, >) +_c_minmax_def(c_dmax_n, double, >) + +#endif // STC_UTILITY_H_INCLUDED diff --git a/stc/types.h b/stc/types.h new file mode 100644 index 0000000..73af610 --- /dev/null +++ b/stc/types.h @@ -0,0 +1,223 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef STC_TYPES_H_INCLUDED +#define STC_TYPES_H_INCLUDED + +#include +#include +#include + +#define declare_rc(C, KEY) declare_arc(C, KEY) +#define declare_list(C, KEY) _declare_list(C, KEY,) +#define declare_stack(C, KEY) _declare_stack(C, KEY,) +#define declare_vec(C, KEY) _declare_stack(C, KEY,) +#define declare_pqueue(C, KEY) _declare_stack(C, KEY,) +#define declare_queue(C, KEY) _declare_queue(C, KEY,) +#define declare_deque(C, KEY) _declare_queue(C, KEY,) +#define declare_hashmap(C, KEY, VAL) _declare_htable(C, KEY, VAL, c_true, c_false,) +#define declare_hashset(C, KEY) _declare_htable(C, KEY, KEY, c_false, c_true,) +#define declare_sortedmap(C, KEY, VAL) _declare_aatree(C, KEY, VAL, c_true, c_false,) +#define declare_sortedset(C, KEY) _declare_aatree(C, KEY, KEY, c_false, c_true,) + +#define declare_list_aux(C, KEY, AUX) _declare_list(C, KEY, AUX aux;) +#define declare_stack_aux(C, KEY, AUX) _declare_stack(C, KEY, AUX aux;) +#define declare_vec_aux(C, KEY, AUX) _declare_stack(C, KEY, AUX aux;) +#define declare_pqueue_aux(C, KEY, AUX) _declare_stack(C, KEY, AUX aux;) +#define declare_queue_aux(C, KEY, AUX) _declare_queue(C, KEY, AUX aux;) +#define declare_deque_aux(C, KEY, AUX) _declare_queue(C, KEY, AUX aux;) +#define declare_hashmap_aux(C, KEY, VAL, AUX) _declare_htable(C, KEY, VAL, c_true, c_false, AUX aux;) +#define declare_hashset_aux(C, KEY, AUX) _declare_htable(C, KEY, KEY, c_false, c_true, AUX aux;) +#define declare_sortedmap_aux(C, KEY, VAL, AUX) _declare_aatree(C, KEY, VAL, c_true, c_false, AUX aux;) +#define declare_sortedset_aux(C, KEY, AUX) _declare_aatree(C, KEY, KEY, c_false, c_true, AUX aux;) + +// csview : non-null terminated string view +typedef const char csview_value; +typedef struct csview { + csview_value* buf; + ptrdiff_t size; +} csview; + +typedef union { + csview_value* ref; + csview chr; + struct { csview chr; csview_value* end; } u8; +} csview_iter; + +#define c_sv(...) c_MACRO_OVERLOAD(c_sv, __VA_ARGS__) +#define c_sv_1(literal) c_sv_2(literal, c_litstrlen(literal)) +#define c_sv_2(str, n) (c_literal(csview){str, n}) +#define c_svfmt "%.*s" +#define c_svarg(sv) (int)(sv).size, (sv).buf // printf(c_svfmt "\n", c_svarg(sv)); + +// zsview : zero-terminated string view +typedef csview_value zsview_value; +typedef struct zsview { + zsview_value* str; + ptrdiff_t size; +} zsview; + +typedef union { + zsview_value* ref; + csview chr; +} zsview_iter; + +#define c_zv(literal) (c_literal(zsview){literal, c_litstrlen(literal)}) + +// cstr : zero-terminated owning string (short string optimized - sso) +typedef char cstr_value; +typedef struct { cstr_value* data; intptr_t size, cap; } cstr_buf; +typedef union cstr { + struct { cstr_buf *a, *b, *c; } _dummy; + struct { cstr_value* data; uintptr_t size; uintptr_t ncap; } lon; + struct { cstr_value data[ sizeof(cstr_buf) - 1 ]; uint8_t size; } sml; +} cstr; + +typedef union { + csview chr; // utf8 character/codepoint + const cstr_value* ref; +} cstr_iter; + +#define c_true(...) __VA_ARGS__ +#define c_false(...) + +#define declare_arc(SELF, VAL) \ + typedef VAL SELF##_value; \ + typedef struct SELF##_ctrl SELF##_ctrl; \ +\ + typedef union SELF { \ + SELF##_value* get; \ + SELF##_ctrl* ctrl; \ + } SELF + +#define declare_arc2(SELF, VAL) \ + typedef VAL SELF##_value; \ + typedef struct SELF##_ctrl SELF##_ctrl; \ +\ + typedef struct SELF { \ + SELF##_value* get; \ + SELF##_ctrl* ctrl2; \ + } SELF + +#define declare_box(SELF, VAL) \ + typedef VAL SELF##_value; \ +\ + typedef struct SELF { \ + SELF##_value* get; \ + } SELF + +#define _declare_queue(SELF, VAL, AUXDEF) \ + typedef VAL SELF##_value; \ +\ + typedef struct SELF { \ + SELF##_value *cbuf; \ + ptrdiff_t start, end, capmask; \ + AUXDEF \ + } SELF; \ +\ + typedef struct { \ + SELF##_value *ref; \ + ptrdiff_t pos; \ + const SELF* _s; \ + } SELF##_iter + +#define _declare_list(SELF, VAL, AUXDEF) \ + typedef VAL SELF##_value; \ + typedef struct SELF##_node SELF##_node; \ +\ + typedef struct { \ + SELF##_value *ref; \ + SELF##_node *const *_last, *prev; \ + } SELF##_iter; \ +\ + typedef struct SELF { \ + SELF##_node *last; \ + AUXDEF \ + } SELF + +#define _declare_htable(SELF, KEY, VAL, MAP_ONLY, SET_ONLY, AUXDEF) \ + typedef KEY SELF##_key; \ + typedef VAL SELF##_mapped; \ +\ + typedef SET_ONLY( SELF##_key ) \ + MAP_ONLY( struct SELF##_value ) \ + SELF##_value, SELF##_entry; \ +\ + typedef struct { \ + SELF##_value *ref; \ + size_t idx; \ + bool inserted; \ + uint8_t hashx; \ + uint16_t dist; \ + } SELF##_result; \ +\ + typedef struct { \ + SELF##_value *ref, *_end; \ + struct hmap_meta *_mref; \ + } SELF##_iter; \ +\ + typedef struct SELF { \ + SELF##_value* table; \ + struct hmap_meta* meta; \ + ptrdiff_t size, bucket_count; \ + AUXDEF \ + } SELF + +#define _declare_aatree(SELF, KEY, VAL, MAP_ONLY, SET_ONLY, AUXDEF) \ + typedef KEY SELF##_key; \ + typedef VAL SELF##_mapped; \ + typedef struct SELF##_node SELF##_node; \ +\ + typedef SET_ONLY( SELF##_key ) \ + MAP_ONLY( struct SELF##_value ) \ + SELF##_value, SELF##_entry; \ +\ + typedef struct { \ + SELF##_value *ref; \ + bool inserted; \ + } SELF##_result; \ +\ + typedef struct { \ + SELF##_value *ref; \ + SELF##_node *_d; \ + int _top; \ + int32_t _tn, _st[36]; \ + } SELF##_iter; \ +\ + typedef struct SELF { \ + SELF##_node *nodes; \ + int32_t root, disp, head, size, capacity; \ + AUXDEF \ + } SELF + +#define declare_stack_fixed(SELF, VAL, CAP) \ + typedef VAL SELF##_value; \ + typedef struct { SELF##_value *ref, *end; } SELF##_iter; \ + typedef struct SELF { SELF##_value data[CAP]; ptrdiff_t size; } SELF + +#define _declare_stack(SELF, VAL, AUXDEF) \ + typedef VAL SELF##_value; \ + typedef struct { SELF##_value *ref, *end; } SELF##_iter; \ + typedef struct SELF { SELF##_value *data; ptrdiff_t size, capacity; AUXDEF } SELF + +#endif // STC_TYPES_H_INCLUDED diff --git a/stc/utf8.h b/stc/utf8.h new file mode 100644 index 0000000..3f91b65 --- /dev/null +++ b/stc/utf8.h @@ -0,0 +1,37 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "priv/linkage.h" + +#ifndef STC_UTF8_H_INCLUDED +#define STC_UTF8_H_INCLUDED + +#include "common.h" // IWYU pragma: keep +#include "types.h" +#include "priv/utf8_prv.h" // IWYU pragma: keep + +#endif // STC_UTF8_H_INCLUDED + +#if defined i_implement + #include "priv/utf8_prv.c" +#endif +#include "priv/linkage2.h" diff --git a/stc/vec.h b/stc/vec.h new file mode 100644 index 0000000..468ff78 --- /dev/null +++ b/stc/vec.h @@ -0,0 +1,392 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* +#define i_implement +#include +#include + +declare_vec(vec_i32, int); + +typedef struct MyStruct { + vec_i32 int_vec; + cstr name; +} MyStruct; + +#define i_key float +#include + +#define i_keypro cstr // cstr is a "pro"-type +#include + +#define T vec_i32, int32_t, (c_declared) +#include + +int main(void) { + vec_i32 vec = {0}; + vec_i32_push(&vec, 123); + vec_i32_drop(&vec); + + vec_float fvec = {0}; + vec_float_push(&fvec, 123.3); + vec_float_drop(&fvec); + + vec_cstr svec = {0}; + vec_cstr_emplace(&svec, "Hello, friend"); + vec_cstr_drop(&svec); +} +*/ +#include "priv/linkage.h" +#include "types.h" + +#ifndef STC_VEC_H_INCLUDED +#define STC_VEC_H_INCLUDED +#include "common.h" +#include + +#define _it2_ptr(it1, it2) (it1.ref && !it2.ref ? it1.end : it2.ref) +#define _it_ptr(it) (it.ref ? it.ref : it.end) +#endif // STC_VEC_H_INCLUDED + +#ifndef _i_prefix + #define _i_prefix vec_ +#endif +#include "priv/template.h" + +#ifndef i_declared + _c_DEFTYPES(_declare_stack, Self, i_key, _i_aux_def); +#endif +typedef i_keyraw _m_raw; +STC_API void _c_MEMB(_drop)(const Self* cself); +STC_API void _c_MEMB(_clear)(Self* self); +STC_API bool _c_MEMB(_reserve)(Self* self, isize cap); +STC_API bool _c_MEMB(_resize)(Self* self, isize size, _m_value null); +STC_API _m_iter _c_MEMB(_erase_n)(Self* self, isize idx, isize n); +STC_API _m_iter _c_MEMB(_insert_uninit)(Self* self, isize idx, isize n); +#if defined _i_has_eq +STC_API _m_iter _c_MEMB(_find_in)(const Self* self, _m_iter it1, _m_iter it2, _m_raw raw); +#endif // _i_has_eq + +STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* val) + { (void)self; i_keydrop(val); } + +STC_INLINE Self _c_MEMB(_move)(Self *self) { + Self m = *self; + self->capacity = self->size = 0; + self->data = NULL; + return m; +} + +STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned) { + _c_MEMB(_drop)(self); + *self = unowned; +} + +STC_INLINE _m_value* _c_MEMB(_push)(Self* self, _m_value value) { + if (self->size == self->capacity) + if (!_c_MEMB(_reserve)(self, self->size*2 + 4)) + return NULL; + _m_value *v = self->data + self->size++; + *v = value; + return v; +} + +STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n) + { while (n--) _c_MEMB(_push)(self, i_keyfrom((*raw))), ++raw; } + +#if !defined i_no_emplace +STC_API _m_iter _c_MEMB(_emplace_n)(Self* self, isize idx, const _m_raw raw[], isize n); + +STC_INLINE _m_value* _c_MEMB(_emplace)(Self* self, _m_raw raw) + { return _c_MEMB(_push)(self, i_keyfrom(raw)); } + +STC_INLINE _m_value* _c_MEMB(_emplace_back)(Self* self, _m_raw raw) + { return _c_MEMB(_push)(self, i_keyfrom(raw)); } + +STC_INLINE _m_iter _c_MEMB(_emplace_at)(Self* self, _m_iter it, _m_raw raw) + { return _c_MEMB(_emplace_n)(self, _it_ptr(it) - self->data, &raw, 1); } +#endif // !i_no_emplace + +#if !defined i_no_clone +STC_API void _c_MEMB(_copy)(Self* self, const Self* other); +STC_API _m_iter _c_MEMB(_copy_to)(Self* self, isize idx, const _m_value arr[], isize n); +STC_API Self _c_MEMB(_clone)(Self vec); +STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val) + { (void)self; return i_keyclone(val); } +#endif // !i_no_clone + +STC_INLINE isize _c_MEMB(_size)(const Self* self) { return self->size; } +STC_INLINE isize _c_MEMB(_capacity)(const Self* self) { return self->capacity; } +STC_INLINE bool _c_MEMB(_is_empty)(const Self* self) { return !self->size; } +STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* val) { return i_keytoraw(val); } +STC_INLINE const _m_value* _c_MEMB(_front)(const Self* self) { return self->data; } +STC_INLINE _m_value* _c_MEMB(_front_mut)(Self* self) { return self->data; } +STC_INLINE const _m_value* _c_MEMB(_back)(const Self* self) { return &self->data[self->size - 1]; } +STC_INLINE _m_value* _c_MEMB(_back_mut)(Self* self) { return &self->data[self->size - 1]; } + +STC_INLINE void _c_MEMB(_pop)(Self* self) + { c_assert(self->size); _m_value* p = &self->data[--self->size]; i_keydrop(p); } +STC_INLINE _m_value _c_MEMB(_pull)(Self* self) + { c_assert(self->size); return self->data[--self->size]; } +STC_INLINE _m_value* _c_MEMB(_push_back)(Self* self, _m_value value) + { return _c_MEMB(_push)(self, value); } +STC_INLINE void _c_MEMB(_pop_back)(Self* self) { _c_MEMB(_pop)(self); } + +#ifndef _i_aux_alloc +STC_INLINE Self _c_MEMB(_init)(void) + { return c_literal(Self){0}; } + +STC_INLINE Self _c_MEMB(_with_capacity)(isize cap) + { Self out = {_i_new_n(_m_value, cap), 0, cap}; return out; } + +STC_INLINE Self _c_MEMB(_with_size_uninit)(isize size) + { Self out = {_i_new_n(_m_value, size), size, size}; return out; } + +STC_INLINE Self _c_MEMB(_with_size)(isize size, _m_raw default_raw) { + Self out = {_i_new_n(_m_value, size), size, size}; + while (size) out.data[--size] = i_keyfrom(default_raw); + return out; +} +STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) { + Self out = _c_MEMB(_with_capacity)(n); + _c_MEMB(_put_n)(&out, raw, n); return out; +} +#endif + +STC_INLINE void _c_MEMB(_shrink_to_fit)(Self* self) + { _c_MEMB(_reserve)(self, _c_MEMB(_size)(self)); } + +STC_INLINE _m_iter +_c_MEMB(_insert_n)(Self* self, const isize idx, const _m_value arr[], const isize n) { + _m_iter it = _c_MEMB(_insert_uninit)(self, idx, n); + if (it.ref) + c_memcpy(it.ref, arr, n*c_sizeof *arr); + return it; +} + +STC_INLINE _m_iter _c_MEMB(_insert_at)(Self* self, _m_iter it, const _m_value value) { + return _c_MEMB(_insert_n)(self, _it_ptr(it) - self->data, &value, 1); +} + +STC_INLINE _m_iter _c_MEMB(_erase_at)(Self* self, _m_iter it) { + return _c_MEMB(_erase_n)(self, it.ref - self->data, 1); +} + +STC_INLINE _m_iter _c_MEMB(_erase_range)(Self* self, _m_iter i1, _m_iter i2) { + return _c_MEMB(_erase_n)(self, i1.ref - self->data, _it2_ptr(i1, i2) - i1.ref); +} + +STC_INLINE const _m_value* _c_MEMB(_at)(const Self* self, const isize idx) { + c_assert(c_uless(idx, self->size)); return self->data + idx; +} + +STC_INLINE _m_value* _c_MEMB(_at_mut)(Self* self, const isize idx) { + c_assert(c_uless(idx, self->size)); return self->data + idx; +} + +// iteration + +STC_INLINE _m_iter _c_MEMB(_begin)(const Self* self) { + _m_iter it = {(_m_value*)self->data, (_m_value*)self->data}; + if (self->size) it.end += self->size; + else it.ref = NULL; + return it; +} + +STC_INLINE _m_iter _c_MEMB(_rbegin)(const Self* self) { + _m_iter it = {(_m_value*)self->data, (_m_value*)self->data}; + if (self->size) { it.ref += self->size - 1; it.end -= 1; } + else it.ref = NULL; + return it; +} + +STC_INLINE _m_iter _c_MEMB(_end)(const Self* self) + { (void)self; _m_iter it = {0}; return it; } + +STC_INLINE _m_iter _c_MEMB(_rend)(const Self* self) + { (void)self; _m_iter it = {0}; return it; } + +STC_INLINE void _c_MEMB(_next)(_m_iter* it) + { if (++it->ref == it->end) it->ref = NULL; } + +STC_INLINE void _c_MEMB(_rnext)(_m_iter* it) + { if (--it->ref == it->end) it->ref = NULL; } + +STC_INLINE _m_iter _c_MEMB(_advance)(_m_iter it, size_t n) { + if ((it.ref += n) >= it.end) it.ref = NULL; + return it; +} + +STC_INLINE isize _c_MEMB(_index)(const Self* self, _m_iter it) + { return (it.ref - self->data); } + +STC_INLINE void _c_MEMB(_adjust_end_)(Self* self, isize n) + { self->size += n; } + +#if defined _i_has_eq +STC_INLINE _m_iter _c_MEMB(_find)(const Self* self, _m_raw raw) { + return _c_MEMB(_find_in)(self, _c_MEMB(_begin)(self), _c_MEMB(_end)(self), raw); +} + +STC_INLINE bool _c_MEMB(_eq)(const Self* self, const Self* other) { + if (self->size != other->size) return false; + for (isize i = 0; i < self->size; ++i) { + const _m_raw _rx = i_keytoraw((self->data+i)), _ry = i_keytoraw((other->data+i)); + if (!(i_eq((&_rx), (&_ry)))) return false; + } + return true; +} +#endif // _i_has_eq + +#if defined _i_has_cmp +#include "priv/sort_prv.h" +#endif // _i_has_cmp + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined i_implement + +STC_DEF void +_c_MEMB(_clear)(Self* self) { + if (self->size == 0) return; + _m_value *p = self->data + self->size; + while (p-- != self->data) { i_keydrop(p); } + self->size = 0; +} + +STC_DEF void +_c_MEMB(_drop)(const Self* cself) { + Self* self = (Self*)cself; + if (self->capacity == 0) + return; + _c_MEMB(_clear)(self); + _i_free_n(self->data, self->capacity); +} + +STC_DEF bool +_c_MEMB(_reserve)(Self* self, const isize cap) { + if (cap > self->capacity || (cap && cap == self->size)) { + _m_value* d = (_m_value*)_i_realloc_n(self->data, self->capacity, cap); + if (d == NULL) + return false; + self->data = d; + self->capacity = cap; + } + return self->data != NULL; +} + +STC_DEF bool +_c_MEMB(_resize)(Self* self, const isize len, _m_value null) { + if (!_c_MEMB(_reserve)(self, len)) + return false; + const isize n = self->size; + for (isize i = len; i < n; ++i) + { i_keydrop((self->data + i)); } + for (isize i = n; i < len; ++i) + self->data[i] = null; + self->size = len; + return true; +} + +STC_DEF _m_iter +_c_MEMB(_insert_uninit)(Self* self, const isize idx, const isize n) { + if (self->size + n >= self->capacity) + if (!_c_MEMB(_reserve)(self, self->size*3/2 + n)) + return _c_MEMB(_end)(self); + + _m_value *pos = self->data + idx; + c_memmove(pos + n, pos, (self->size - idx)*c_sizeof *pos); + self->size += n; + return c_literal(_m_iter){pos, self->data + self->size}; +} + +STC_DEF _m_iter +_c_MEMB(_erase_n)(Self* self, const isize idx, const isize len) { + c_assert(idx + len <= self->size); + _m_value* d = self->data + idx, *p = d, *end = self->data + self->size; + for (isize i = 0; i < len; ++i, ++p) + { i_keydrop(p); } + memmove(d, p, (size_t)(end - p)*sizeof *d); + self->size -= len; + return c_literal(_m_iter){p == end ? NULL : d, end - len}; +} + +#if !defined i_no_clone +STC_DEF void +_c_MEMB(_copy)(Self* self, const Self* other) { + if (self == other) return; + _c_MEMB(_clear)(self); + _c_MEMB(_reserve)(self, other->size); + self->size = other->size; + for (c_range(i, other->size)) + self->data[i] = i_keyclone((other->data[i])); +} + +STC_DEF Self +_c_MEMB(_clone)(Self vec) { + Self out = vec, *self = &out; (void)self; + out.data = NULL; out.size = out.capacity = 0; + _c_MEMB(_reserve)(&out, vec.size); + out.size = vec.size; + for (c_range(i, vec.size)) + out.data[i] = i_keyclone(vec.data[i]); + return out; +} + +STC_DEF _m_iter +_c_MEMB(_copy_to)(Self* self, const isize idx, + const _m_value arr[], const isize n) { + _m_iter it = _c_MEMB(_insert_uninit)(self, idx, n); + if (it.ref) + for (_m_value* p = it.ref, *q = p + n; p != q; ++arr) + *p++ = i_keyclone((*arr)); + return it; +} +#endif // !i_no_clone + +#if !defined i_no_emplace +STC_DEF _m_iter +_c_MEMB(_emplace_n)(Self* self, const isize idx, const _m_raw raw[], isize n) { + _m_iter it = _c_MEMB(_insert_uninit)(self, idx, n); + if (it.ref) + for (_m_value* p = it.ref; n--; ++raw, ++p) + *p = i_keyfrom((*raw)); + return it; +} +#endif // !i_no_emplace + +#if defined _i_has_eq +STC_DEF _m_iter +_c_MEMB(_find_in)(const Self* self, _m_iter i1, _m_iter i2, _m_raw raw) { + (void)self; + const _m_value* p2 = _it2_ptr(i1, i2); + for (; i1.ref != p2; ++i1.ref) { + const _m_raw r = i_keytoraw(i1.ref); + if (i_eq((&raw), (&r))) + return i1; + } + i2.ref = NULL; + return i2; +} +#endif // _i_has_eq +#endif // i_implement +#include "sys/finalize.h" diff --git a/stc/zsview.h b/stc/zsview.h new file mode 100644 index 0000000..8afb73a --- /dev/null +++ b/stc/zsview.h @@ -0,0 +1,173 @@ +/* MIT License + * + * Copyright (c) 2025 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +// zsview is a zero-terminated string view. + +#ifndef STC_ZSVIEW_H_INCLUDED +#define STC_ZSVIEW_H_INCLUDED + +#include "common.h" +#include "types.h" +#include "priv/utf8_prv.h" + +#define zsview_init() c_zv("") +#define zsview_clone(zs) c_default_clone(zs) +#define zsview_drop(self) c_default_drop(self) +#define zsview_toraw(self) (self)->str + +STC_INLINE zsview zsview_from(const char* str) + { return c_literal(zsview){str, c_strlen(str)}; } +STC_INLINE void zsview_clear(zsview* self) { *self = c_zv(""); } +STC_INLINE csview zsview_sv(zsview zs) { return c_sv_2(zs.str, zs.size); } + +STC_INLINE isize zsview_size(zsview zs) { return zs.size; } +STC_INLINE bool zsview_is_empty(zsview zs) { return zs.size == 0; } + +STC_INLINE bool zsview_equals(zsview zs, const char* str) { + isize n = c_strlen(str); + return zs.size == n && !c_memcmp(zs.str, str, n); +} + +STC_INLINE isize zsview_find(zsview zs, const char* search) { + const char* res = strstr(zs.str, search); + return res ? (res - zs.str) : c_NPOS; +} + +STC_INLINE bool zsview_contains(zsview zs, const char* str) + { return zsview_find(zs, str) != c_NPOS; } + +STC_INLINE bool zsview_starts_with(zsview zs, const char* str) { + isize n = c_strlen(str); + return n <= zs.size && !c_memcmp(zs.str, str, n); +} + +STC_INLINE bool zsview_ends_with(zsview zs, const char* str) { + isize n = c_strlen(str); + return n <= zs.size && !c_memcmp(zs.str + zs.size - n, str, n); +} + +STC_INLINE zsview zsview_from_pos(zsview zs, isize pos) { + if (pos > zs.size) pos = zs.size; + zs.str += pos; zs.size -= pos; return zs; +} + +STC_INLINE csview zsview_subview(zsview zs, isize pos, isize len) { + c_assert(((size_t)pos <= (size_t)zs.size) & (len >= 0)); + if (pos + len > zs.size) len = zs.size - pos; + return c_literal(csview){zs.str + pos, len}; +} + +STC_INLINE zsview zsview_tail(zsview zs, isize len) { + c_assert(len >= 0); + if (len > zs.size) len = zs.size; + zs.str += zs.size - len; zs.size = len; + return zs; +} + +/* utf8 */ + +STC_INLINE zsview zsview_u8_from_pos(zsview zs, isize u8pos) + { return zsview_from_pos(zs, utf8_to_index(zs.str, u8pos)); } + +STC_INLINE zsview zsview_u8_tail(zsview zs, isize u8len) { + const char* p = &zs.str[zs.size]; + while (u8len && p != zs.str) + u8len -= (*--p & 0xC0) != 0x80; + zs.size -= p - zs.str, zs.str = p; + return zs; +} + +STC_INLINE csview zsview_u8_subview(zsview zs, isize u8pos, isize u8len) + { return utf8_subview(zs.str, u8pos, u8len); } + +STC_INLINE zsview_iter zsview_u8_at(zsview zs, isize u8pos) { + csview sv; + sv.buf = utf8_at(zs.str, u8pos); + sv.size = utf8_chr_size(sv.buf); + return c_literal(zsview_iter){.chr = sv}; +} + +STC_INLINE isize zsview_u8_size(zsview zs) + { return utf8_count(zs.str); } + +STC_INLINE bool zsview_u8_valid(zsview zs) // requires linking with utf8 symbols + { return utf8_valid_n(zs.str, zs.size); } + +/* utf8 iterator */ + +STC_INLINE zsview_iter zsview_begin(const zsview* self) { + zsview_iter it = {.chr = {self->str, utf8_chr_size(self->str)}}; return it; +} + +STC_INLINE zsview_iter zsview_end(const zsview* self) { + (void)self; zsview_iter it = {0}; return it; +} + +STC_INLINE void zsview_next(zsview_iter* it) { + it->ref += it->chr.size; + it->chr.size = utf8_chr_size(it->ref); + if (*it->ref == '\0') it->ref = NULL; +} + +STC_INLINE zsview_iter zsview_advance(zsview_iter it, isize u8pos) { + it.ref = utf8_offset(it.ref, u8pos); + it.chr.size = utf8_chr_size(it.ref); + if (*it.ref == '\0') it.ref = NULL; + return it; +} + +/* ---- Container helper functions ---- */ + +STC_INLINE size_t zsview_hash(const zsview *self) + { return c_hash_str(self->str); } + +STC_INLINE int zsview_cmp(const zsview* x, const zsview* y) + { return strcmp(x->str, y->str); } + +STC_INLINE bool zsview_eq(const zsview* x, const zsview* y) + { return x->size == y->size && !c_memcmp(x->str, y->str, x->size); } + +STC_INLINE int zsview_icmp(const zsview* x, const zsview* y) + { return utf8_icmp(x->str, y->str); } + +STC_INLINE bool zsview_ieq(const zsview* x, const zsview* y) + { return x->size == y->size && !utf8_icmp(x->str, y->str); } + +/* ---- case insensitive ---- */ + +STC_INLINE bool zsview_iequals(zsview zs, const char* str) + { return c_strlen(str) == zs.size && !utf8_icmp(zs.str, str); } + +STC_INLINE bool zsview_istarts_with(zsview zs, const char* str) + { return c_strlen(str) <= zs.size && !utf8_icmp(zs.str, str); } + +STC_INLINE bool zsview_iends_with(zsview zs, const char* str) { + isize n = c_strlen(str); + return n <= zs.size && !utf8_icmp(zs.str + zs.size - n, str); +} + +#endif // STC_ZSVIEW_H_INCLUDED + +#if defined i_import + #include "priv/utf8_prv.c" +#endif diff --git a/str.h b/str.h new file mode 100644 index 0000000..cd30247 --- /dev/null +++ b/str.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct LString { + bool ref; + size_t length; + uint8_t data[]; +} LString; + +static inline LString *lstring_new(size_t len, uint8_t *data) { + LString *lstr = calloc(1, sizeof(*lstr) + len); + lstr->length = len; + memcpy(lstr->data, data, len); + return lstr; +} + +static inline LString *lstring_newz(const char *data) { + return lstring_new(strlen(data), (uint8_t*) data); +} + +#define lstring_free free \ No newline at end of file diff --git a/table.h b/table.h new file mode 100644 index 0000000..6ed90e7 --- /dev/null +++ b/table.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include"value.h" + +typedef struct LTableEntry { + LValue key; + LValue val; +} LTableEntry; + +typedef struct LTableBuckets { + size_t capacity; + LTableEntry data[]; +} LTableBuckets; + +typedef struct LTable { + bool ref; + LTableBuckets *buckets; +} LTable; + +static inline LTable *ltable_new(size_t capacity) { + assert(capacity >= 8 && (capacity & (capacity - 1)) == 0); + + LTable *tbl = calloc(1, sizeof(*tbl)); + tbl->ref = false; + tbl->buckets = calloc(1, sizeof(LTableBuckets) + sizeof(LTableEntry) * capacity); + tbl->buckets->capacity = capacity; + + for(int i = 0; i < tbl->buckets->capacity; i++) { + tbl->buckets->data[i] = (LTableEntry) { + .key = {.u = LTAG_NIL}, + .val = {.u = LTAG_NIL}, + }; + } + + return tbl; +} + +static inline bool ltablebuckets_set(LTableBuckets *self, LValue key, LValue val) { + size_t idx = lvalue_hash(key); + + int probe_limit = __builtin_ctz(self->capacity); + probe_limit += probe_limit >> 1; + + LTableEntry *current = self->data; + + while(1) { + idx &= self->capacity - 1; + + LValue prevKey = {.u = LTAG_NIL}; + atomic_compare_exchange_strong(¤t[idx].key.u, &prevKey.u, key.u); + + if(prevKey.u == LTAG_NIL || prevKey.u == key.u) { + atomic_store(¤t[idx].val.u, val.u); + break; + } + + idx++; + + probe_limit--; + if(probe_limit == 0) { + return false; + } + } + + return true; +} + +static inline void ltable_set(LTable *self, LValue key, LValue val) { + if(!ltablebuckets_set(self->buckets, key, val)) { + assert(0 && "No table resizing"); + } +} + +static inline LValue ltablebuckets_get(LTableBuckets *self, LValue key) { + size_t idx = lvalue_hash(key); + + size_t tries = self->capacity; + + while(1) { + idx &= self->capacity - 1; + + LValue foundKey; + foundKey.u = atomic_load(&self->data[idx].key.u); + + if(lvalue_eq(foundKey, key)) { + return (LValue) {.u = atomic_load(&self->data[idx].val.u)}; + } else if(--tries == 0) { + return (LValue) {.u = LTAG_NIL}; + } + + idx++; + } +} + +static inline LValue ltable_get(LTable *self, LValue key) { + return ltablebuckets_get(self->buckets, key); +} diff --git a/value.h b/value.h new file mode 100644 index 0000000..4ab089d --- /dev/null +++ b/value.h @@ -0,0 +1,126 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include"str.h" + +#define LTAG_MASK 0xFFFF000000000000ULL +#define LTAG_NAN 0xFFF8000000000001ULL +#define LTAG_NIL 0xFFF8000000000002ULL +#define LTAG_FALSE 0xFFF8000000000003ULL +#define LTAG_TRUE 0xFFF8000000000004ULL +#define LTAG_I32 0xFFF9000000000000ULL +#define LTAG_TABLE 0xFFFA000000000000ULL +#define LTAG_USERDATA 0xFFFB000000000000ULL +#define LTAG_FUNCTION 0xFFFC000000000000ULL +#define LTAG_FLOAT 0xFFFD000000000000ULL +#define LTAG_STRING 0xFFFE000000000000ULL + +typedef enum { + LT_NIL, + LT_STRING, + LT_NUMBER, + LT_INTEGER, + LT_TABLE, + LT_USERDATA, + LT_BOOLEAN, + LT_FUNCTION, +} LType; + +typedef union { + double f; + uint64_t u; +} LValue; + +static inline uint64_t lhash64(uint64_t val) { + val ^= (val >> 33); + val *= 0xff51afd7ed558ccdUL; + val ^= (val >> 33); + val *= 0xc4ceb9fe1a85ec53UL; + val ^= (val >> 33); + return val; +} + +static inline LValue lvalue_from_double(double f) { + LValue v; + + if(isnan(f)) { + // Normalize + v.u = LTAG_NAN; + } else { + v.f = f; + } + + return (LValue) {.f = f}; +} + +static inline LValue lvalue_from_int32(int32_t i) { + return (LValue) {.u = LTAG_I32 | (uint32_t) i}; +} + +static inline LValue lvalue_from_bool(bool b) { + return (LValue) {.u = b ? LTAG_TRUE : LTAG_FALSE}; +} + +static inline LValue lvalue_from_nil() { + return (LValue) {.u = LTAG_NIL}; +} + +struct LTable; +static inline LValue lvalue_from_table(struct LTable *tbl) { + return (LValue) {.u = LTAG_TABLE | (uintptr_t) tbl}; +} + +struct LString; +static inline LValue lvalue_from_string(struct LString *str) { + return (LValue) {.u = LTAG_STRING | (uintptr_t) str}; +} + +struct LFunc; +static inline LValue lvalue_from_func(struct LFunc *f) { + return (LValue) {.u = LTAG_FUNCTION | (uintptr_t) f}; +} + +static inline int32_t lvalue_to_int32(LValue v) { + return v.u & ~LTAG_MASK; +} + +static inline uint64_t lvalue_tag(LValue v) { + return v.u & LTAG_MASK; +} + +static inline LValue lvalue_raw(uint64_t tag, uint64_t rawval) { + assert(rawval == (rawval & ~LTAG_MASK)); + return (LValue) {.u = tag | rawval}; +} + +static inline uint64_t lvalue_hash(LValue lv) { + if(lvalue_tag(lv) == LTAG_STRING) { + LString *str = (LString*) (lv.u & ~LTAG_MASK); + + uint64_t h = 0; + + for(size_t i = 0; i < (str->length & ~7); i += 8) { + h = lhash64(h ^ *(uint64_t*) &str->data[i]); + } + + uint64_t last = 0; + memcpy(&last, str->data + (str->length & ~7), str->length - (str->length & ~7)); + + h = lhash64(h ^ last); + + return h; + } + + return lhash64(lv.u); +} + +bool lvalue_eq(LValue, LValue); diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..2c24aef --- /dev/null +++ b/vm.c @@ -0,0 +1,326 @@ +#define i_implement +#include"vm.h" +#undef i_implement + +#include"str.h" + +#include + +size_t lvm_run(LVM *L, LFunc *func, size_t arg_count, LValue *regs) { + if(func->is_native) { + return func->native_func(L, func->ud, arg_count, regs); + } + + static void *dispatch_table[] = { + [L_GETGLOBAL] = &&do_getglobal, + [L_SETGLOBAL] = &&do_setglobal, + [L_SETINT16] = &&do_setint16, + [L_SETINT32] = &&do_setint32, + [L_SETFLOAT] = &&do_setfloat, + [L_SETSTR] = &&do_setstr, + [L_SETTABLE] = &&do_settable, + [L_SETBOOL] = &&do_setbool, + [L_SETNIL] = &&do_setnil, + [L_SETFUNC] = &&do_setfunc, + [L_ADD] = &&do_add, + [L_SUB] = &&do_sub, + [L_MUL] = &&do_mul, + [L_DIV] = &&do_div, + [L_MOD] = &&do_mod, + [L_RET] = &&do_ret, + [L_JNOTCOND] = &&do_jnotcond, + [L_MOVE] = &&do_move, + [L_CALL] = &&do_call, + [L_JUMP] = &&do_jump, + [L_ADVANCETEST] = &&do_advancetest, + [L_COND_EQ] = &&do_cond_eq, + [L_COND_NEQ] = &&do_cond_neq, + [L_SETFIELD] = &&do_setfield, + [L_GETFIELD] = &&do_getfield, + }; + + LUnit *unit = func->unit; + + LInst *inst = func->lua_instrs; + #define DISPATCH() goto *dispatch_table[(++inst)->opcode] + + inst--; + DISPATCH(); + +do_getglobal:; + { + uint8_t *area = unit->abyss + inst->bc; + size_t len = *(uint16_t*) area; + area += 2; + + LString *str = lstring_new(len, area); + regs[inst->a] = ltable_get(func->env, lvalue_from_string(str)); + lstring_free(str); + } + DISPATCH(); + +do_setglobal:; + { + uint8_t *area = unit->abyss + inst->bc; + size_t len = *(uint16_t*) area; + area += 2; + + LString *str = lstring_new(len, area); + ltable_set(func->env, lvalue_from_string(str), regs[inst->a]); + lvm_gc_add(L, lvalue_from_string(str)); + } + DISPATCH(); + +do_setint16:; + regs[inst->a] = lvalue_from_int32((int16_t) inst->bc); + DISPATCH(); + +do_setint32:; + regs[inst->a] = lvalue_from_int32(*(int32_t*) &unit->abyss[inst->bc]); + DISPATCH(); + +do_setfloat:; + DISPATCH(); + +do_setstr:; + { + uint8_t *area = unit->abyss + inst->bc; + size_t len = *(uint16_t*) area; + area += 2; + + regs[inst->a] = lvalue_raw(LTAG_STRING, (uintptr_t) lstring_new(len, area)); + lvm_gc_add(L, regs[inst->a]); + } + DISPATCH(); + +do_settable:; + { + LTable *tbl = ltable_new(inst->bc); + lvm_gc_add(L, lvalue_from_table(tbl)); + regs[inst->a] = lvalue_from_table(tbl); + } + DISPATCH(); + +do_setbool:; + regs[inst->a] = lvalue_from_bool(inst->b); + DISPATCH(); + +do_setnil:; + regs[inst->a] = lvalue_from_nil(); + DISPATCH(); + +do_setfunc:; + regs[inst->a] = lvalue_from_func(&func->unit->funcs[inst->bc]); + DISPATCH(); + +do_add:; + { + LValue x = regs[inst->b]; + LValue y = regs[inst->c]; + if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_FLOAT) { + regs[inst->a] = lvalue_from_double(lvalue_to_int32(x) + y.f); + } else if(lvalue_tag(x) == LTAG_FLOAT && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_double(x.f + lvalue_to_int32(y)); + } else if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_int32(lvalue_to_int32(x) + lvalue_to_int32(y)); + } else goto err; + } + DISPATCH(); + +do_sub:; + { + LValue x = regs[inst->b]; + LValue y = regs[inst->c]; + if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_FLOAT) { + regs[inst->a] = lvalue_from_double(lvalue_to_int32(x) - y.f); + } else if(lvalue_tag(x) == LTAG_FLOAT && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_double(x.f - lvalue_to_int32(y)); + } else if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_int32(lvalue_to_int32(x) - lvalue_to_int32(y)); + } else goto err; + } + DISPATCH(); + +do_mul:; + { + LValue x = regs[inst->b]; + LValue y = regs[inst->c]; + if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_FLOAT) { + regs[inst->a] = lvalue_from_double(lvalue_to_int32(x) * y.f); + } else if(lvalue_tag(x) == LTAG_FLOAT && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_double(x.f * lvalue_to_int32(y)); + } else if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_int32(lvalue_to_int32(x) * lvalue_to_int32(y)); + } else goto err; + } + DISPATCH(); + +do_div:; + { + LValue x = regs[inst->b]; + LValue y = regs[inst->c]; + if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_FLOAT) { + regs[inst->a] = lvalue_from_double(lvalue_to_int32(x) / y.f); + } else if(lvalue_tag(x) == LTAG_FLOAT && lvalue_tag(y) == LTAG_I32) { + regs[inst->a] = lvalue_from_double(x.f / lvalue_to_int32(y)); + } else if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_I32) { + int32_t yv = lvalue_to_int32(y); + if(yv == 0) { + regs[inst->a] = lvalue_from_nil(); + } else { + regs[inst->a] = lvalue_from_int32(lvalue_to_int32(x) / yv); + } + } else goto err; + } + DISPATCH(); + +do_mod:; + { + LValue x = regs[inst->b]; + LValue y = regs[inst->c]; + if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_FLOAT) { + regs[inst->a] = lvalue_from_double(fmod(fmod(lvalue_to_int32(x), y.f) + y.f, y.f)); + } else if(lvalue_tag(x) == LTAG_FLOAT && lvalue_tag(y) == LTAG_I32) { + int32_t yv = lvalue_to_int32(y); + regs[inst->a] = lvalue_from_double(fmod(fmod(x.f, yv) + yv, yv)); + } else if(lvalue_tag(x) == LTAG_I32 && lvalue_tag(y) == LTAG_I32) { + int32_t yv = lvalue_to_int32(y); + if(yv == 0) { + goto err; + } else { + regs[inst->a] = lvalue_from_int32((lvalue_to_int32(x) % yv + yv) % yv); + } + } else goto err; + } + DISPATCH(); + +do_jump:; + inst += (int16_t) inst->bc; + DISPATCH(); + +do_jnotcond:; + { + LValue v = regs[inst->a]; + if(v.u == LTAG_NIL || v.u == LTAG_FALSE) { + inst += (int16_t) inst->bc; + } + } + DISPATCH(); + +do_call:; + { + if(lvalue_tag(regs[inst->a]) != LTAG_FUNCTION) { + goto err; + } + + uint8_t *abyss_data = unit->abyss + inst->bc; + + uint8_t ret_vreg = abyss_data[0]; + uint8_t arg_count = abyss_data[1]; + uint8_t *args = &abyss_data[2]; + + LValue regs2[256]; + lvm_reset_regs(regs2); + for(int i = 0; i < arg_count; i++) { + regs2[i] = regs[args[i]]; + } + size_t returned_count = lvm_run(L, (LFunc*) (regs[inst->a].u & ~LTAG_MASK), arg_count, regs2); + + if(returned_count) { + // TODO: more than 1 return + regs[ret_vreg] = regs2[0]; + } + } + DISPATCH(); + +do_move:; + regs[inst->a] = regs[inst->b]; + DISPATCH(); + +do_advancetest:; + { + int64_t a = lvalue_to_int32(regs[inst->a]); + int64_t b = lvalue_to_int32(regs[inst->b]); + int64_t c = lvalue_to_int32(regs[inst->c]); + if(!((c >= 0 && a > b) || (c < 0 && a < b))) { + inst++; + } + } + DISPATCH(); + +do_cond_eq:; + regs[inst->a] = lvalue_from_bool(lvalue_eq(regs[inst->b], regs[inst->c])); + DISPATCH(); + +do_cond_neq:; + regs[inst->a] = lvalue_from_bool(!lvalue_eq(regs[inst->b], regs[inst->c])); + DISPATCH(); + +do_setfield:; + { + if(lvalue_tag(regs[inst->a]) != LTAG_TABLE) { + goto err; + } + + if(lvalue_tag(regs[inst->b]) == LTAG_NIL) { + goto err; + } + + LTable *tbl = (void*) (regs[inst->a].u & ~LTAG_MASK); + + ltable_set(tbl, regs[inst->b], regs[inst->c]); + } + DISPATCH(); + +do_getfield:; + { + if(lvalue_tag(regs[inst->a]) != LTAG_TABLE) { + goto err; + } + + LTable *tbl = (void*) (regs[inst->b].u & ~LTAG_MASK); + + regs[inst->a] = ltable_get(tbl, regs[inst->c]); + } + DISPATCH(); + +err:; +do_ret:; + return 0; + +} + +void lvm_gc_add(LVM *L, LValue lvalue) { + set_LValueU_insert(&L->gc_objects, lvalue.u); +} + +LFunc *lvm_func_from_native(LFuncCallback cb, void *ud) { + LFunc *f = calloc(1, sizeof(*f)); + f->is_native = true; + f->ud = ud; + f->native_func = cb; + return f; +} + +bool lvalue_eq(LValue a, LValue b) { + if(a.u == b.u) { + return true; + } + + if(lvalue_tag(a) == LTAG_I32 && lvalue_tag(b) == LTAG_FLOAT) { + return (a.u & ~LTAG_MASK) == b.f; + } else if(lvalue_tag(a) == LTAG_FLOAT && lvalue_tag(b) == LTAG_I32) { + return (b.u & ~LTAG_MASK) == a.f; + } else if(lvalue_tag(a) == LTAG_STRING && lvalue_tag(b) == LTAG_STRING) { + LString *sa = (LString*) (a.u & ~LTAG_MASK); + LString *sb = (LString*) (b.u & ~LTAG_MASK); + + if(sa->length != sb->length) { + return false; + } + + return !memcmp(sa->data, sb->data, sa->length); + } + + return false; +} diff --git a/vm.h b/vm.h new file mode 100644 index 0000000..60c6520 --- /dev/null +++ b/vm.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include"table.h" + +typedef enum { + L_GETGLOBAL, + L_SETGLOBAL, + L_SETINT16, + L_SETFLOAT, + L_SETSTR, + L_SETTABLE, + L_SETBOOL, + L_SETNIL, + L_SETFUNC, + L_ADD, + L_SUB, + L_MUL, + L_DIV, + L_IDIV, + L_MOD, + L_POW, + L_BOR, + L_BAND, + L_BXOR, + L_COND_EQ, + L_COND_NEQ, + L_JUMP, + L_JNOTCOND, + L_RET, + L_MOVE, + L_CALL, + L_ADVANCETEST, + L_SETFIELD, + L_GETFIELD, + L_SETINT32, +} LOp; + +typedef union __attribute__((packed)) { + struct { + uint8_t opcode; + union { + uint8_t argb[3]; + struct __attribute__((packed)) { + uint8_t a; + union { + uint16_t bc; + struct __attribute__((packed)) { + uint8_t b; + uint8_t c; + }; + }; + }; + }; + }; + struct { + uint32_t hahalol; + } extension; +} LInst; + +struct LUnit; +struct LVM; + +typedef size_t(*LFuncCallback)(struct LVM*, void *ud, size_t argn, LValue *args); +typedef struct LFunc { + struct LUnit *unit; + bool is_native; + uint8_t upvalue_count; + LValue *upvalues; + union { + struct { + LFuncCallback native_func; + void *ud; + }; + struct { + LInst *lua_instrs; + LTable *env; + }; + }; +} LFunc; + +typedef struct LUnit { + uint8_t *abyss; + + size_t func_count; + LFunc *funcs; +} LUnit; + +#define i_header +#define T set_LValueU, uint64_t +#include"stc/hashset.h" +#undef i_header + +typedef struct LVM { + size_t unit_count; + LUnit *units; + + set_LValueU gc_objects; +} LVM; + +size_t lvm_run(LVM *L, LFunc *func, size_t arg_count, LValue *regs); +void lvm_gc_add(LVM *L, LValue lvalue); + +LFunc *lvm_func_from_native(LFuncCallback, void *ud); + +static inline void lvm_reset_regs(LValue *regs) { + for(int i = 0; i < 256; i++) { + regs[i] = lvalue_from_nil(); + } +}