Compare commits

...

11 Commits

Author SHA1 Message Date
Mid
3cdd88a52d Update README.md 2025-09-06 16:24:03 +03:00
Mid
eca0ac6fe4 Expand table.insert test 2025-09-06 16:23:51 +03:00
Mid
3d8b09167b table.insert test 2025-09-06 15:45:54 +03:00
Mid
3edff81d89 finish table_insert interface 2025-09-06 15:45:47 +03:00
Mid
319779a16c secondary_thread test 2025-09-06 15:06:23 +03:00
Mid
2bf96c0f76 Yield because maybe good??? 2025-09-06 15:06:14 +03:00
Mid
fa67393723 threads.run 2025-09-06 15:06:03 +03:00
Mid
c2cd319d0c Bug fix 2025-09-06 14:58:10 +03:00
Mid
eb5a23761d Bug fixes 2025-09-06 13:59:14 +03:00
Mid
5809a89eae Basic tests 2025-09-05 21:16:09 +03:00
Mid
65af565668 oops forgot this 2025-09-05 21:16:01 +03:00
11 changed files with 232 additions and 19 deletions

View File

@@ -17,9 +17,33 @@ Impotent is still work-in-progress:
5. Most operators are still missing
6. The user API is completely different from that of PoC Lua
7. Being lock-free, tables are not split to "array" and "hash" parts
8. Userdata is not yet supported.
Impotent requires C11 and an architecture with 8-byte atomic operations, but otherwise it is completely cross-platform.
Performance-wise, it's surprisingly competitive with PoC Lua, considering how quickly it was made up to the point of writing this README (~2 weeks). By far the worst bottleneck is the GC, since it requires all threads and their heaps to synchronize.
Certain Lua idioms become impossible under Impotent. For example the idiom of appending to tables (`t[#t + 1] = x`) isn't atomic, therefore `table.insert` should be used instead.
Certain Lua idioms become impossible under Impotent. For example the idiom of appending to tables (`t[#t + 1] = x`) isn't atomic, therefore `table.insert` should be used instead.
## Additions
Obviously, threading. Any thread can access any value from any other thread, including for reading or writing. Operations such as getting or setting are lock-free in the best case scenario, but other operations (such as the `#` operator) must lock the table temporarily.
Besides this, I have no intent to greatly deviate from standard Lua, to keep source-level compatibility as best I can. The only addition to the standard library is the `threads` global, with two methods as of now.
### `threads.run`
Runs a function in a newly created thread. Does not block the caller.
threads.run(function()
-- Do something expensive
end)
### `threads.parallel`
Runs a function in n parallel threads. Blocks the caller until all threads finish.
threads.parallel(8, function()
-- Do something parallelizable.
end)

10
atomicity.lua Normal file
View File

@@ -0,0 +1,10 @@
t = {}
threads.parallel(8, function(no)
for i = 1, 1250 do
table.insert(t, i)
end
end)
print("Expected: 10000")
print(#t)

63
lrwl.h Normal file
View File

@@ -0,0 +1,63 @@
#pragma omp
#define LRWL_WRITE_ACTIVE (1UL<<31)
#define LRWL_MASK (LRWL_WRITE_ACTIVE - 1)
#define LRWL_MAX_READERS 1024
typedef struct LRWL {
_Atomic uint32_t data;
} LRWL;
static inline void lrwl_read_lock(LRWL *self) {
while(1) {
uint32_t data = atomic_load(&self->data);
if((data & LRWL_WRITE_ACTIVE) || (data & LRWL_MASK) >= LRWL_MAX_READERS) {
continue;
}
if(atomic_compare_exchange_weak(&self->data, &data, data + 1)) {
return;
}
}
}
static inline void lrwl_read_unlock(LRWL *self) {
while(1) {
uint32_t data = atomic_load(&self->data);
assert((data & LRWL_MASK) > 0);
if(atomic_compare_exchange_weak(&self->data, &data, data - 1)) {
return;
}
}
}
static inline void lrwl_write_lock(LRWL *self) {
while(1) {
uint32_t data = atomic_load(&self->data);
if(data & LRWL_WRITE_ACTIVE) {
continue;
}
if(atomic_compare_exchange_weak(&self->data, &data, data | LRWL_WRITE_ACTIVE)) {
while((atomic_load(&self->data) & LRWL_MASK) > 0);
return;
}
}
}
static inline void lrwl_write_unlock(LRWL *self) {
while(1) {
uint32_t data = atomic_load(&self->data);
assert(data & LRWL_WRITE_ACTIVE);
if(atomic_compare_exchange_weak(&self->data, &data, data & ~LRWL_WRITE_ACTIVE)) {
return;
}
}
}

84
main.c
View File

@@ -32,25 +32,83 @@ static size_t native_print(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, L
static size_t table_insert(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
LTable *tbl = (LTable*) (regset->regs[0].u & ~LTAG_MASK);
ltable_insert(tbl, regset->regs[1], 0);
if(argn == 2) {
ltable_insert(tbl, regset->regs[1], 0);
} else if(argn > 2) {
size_t idx = lvalue_to_int32(regset->regs[1]);
ltable_insert(tbl, regset->regs[2], idx);
}
return 0;
}
// This function is intended for small-medium runtimes, since the caller thread's heap is not touchable during this call.
struct ThreadsParallelCtx {
LVM *L;
LFunc *func;
_Atomic uint32_t finished;
};
static int threads_parallel_worker(void *arg) {
struct ThreadsParallelCtx *ctx = arg;
LRegSet regset = {};
lvm_reset_regs(&regset);
lvm_run(ctx->L, ctx->func, 0, &regset);
atomic_fetch_add(&ctx->finished, 1);
return 0;
}
static size_t threads_parallel(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
size_t no = lvalue_to_int32(regset->regs[0]);
LFunc *func = (LFunc*) (regset->regs[1].u & ~LTAG_MASK);
atomic_fetch_sub(&lvm->active_thread_count, 1);
#pragma omp parallel for
for(size_t i = 0; i < no; i++) {
LRegSet regset = {};
lvm_reset_regs(&regset);
lvm_run(lvm, func, 0, &regset);
if(no == 0) {
return 0;
}
atomic_fetch_add(&lvm->active_thread_count, 1);
struct ThreadsParallelCtx ctx = {.L = lvm, .func = func, .finished = 0};
thrd_t thrds[no];
for(size_t i = 1; i < no; i++) {
thrd_create(&thrds[i], threads_parallel_worker, &ctx);
}
LRegSet set = {.parent = regset};
lvm_reset_regs(&set);
lvm_call(lvm, func, 0, heap, &set);
// This thread must still respond to the GC
while(atomic_load(&ctx.finished) != no - 1) {
lvm->safepoint_func(lvm, heap, regset);
thrd_yield();
}
return 0;
}
struct ThreadsRunCtx {
LVM *L;
LFunc *func;
};
static int threads_run_worker(void *arg) {
struct ThreadsRunCtx *ctx = arg;
LRegSet regs = {};
lvm_reset_regs(&regs);
lvm_run(ctx->L, ctx->func, 0, &regs);
free(ctx);
return 0;
}
static size_t threads_run(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
LFunc *func = (LFunc*) (regset->regs[0].u & ~LTAG_MASK);
struct ThreadsRunCtx *ctx = calloc(1, sizeof(*ctx));
ctx->L = lvm;
ctx->func = func;
thrd_t thrd;
thrd_create(&thrd, threads_run_worker, ctx);
return 0;
}
@@ -89,6 +147,9 @@ int main(int argc, char **argv) {
ltable_set(threads,
lvalue_from_string(lstring_newz("parallel")),
lvalue_from_func(lvm_func_from_native(threads_parallel, NULL)));
ltable_set(threads,
lvalue_from_string(lstring_newz("run")),
lvalue_from_func(lvm_func_from_native(threads_run, NULL)));
}
ltable_set(env,
lvalue_from_string(lstring_newz("threads")),
@@ -122,6 +183,9 @@ int main(int argc, char **argv) {
LVM lvm = {};
lvm_init(&lvm);
lvm.unit_count = 1;
lvm.units = calloc(1, sizeof(*lvm.units));
lvm.units[0] = unit;
LRegSet regset = {.parent = NULL};
lvm_reset_regs(&regset);

11
multifizz.lua Normal file
View File

@@ -0,0 +1,11 @@
threads.parallel(1, function(no)
for i = 1, 100000 do
print(i)
if i % 3 == 0 then
print("Fizz")
end
if i % 5 == 0 then
print("Buzz")
end
end
end)

View File

@@ -1072,6 +1072,7 @@ bool parse_stat(Parser *P) {
Scope *new_scope = calloc(1, sizeof(*new_scope));
new_scope->parent = P->scope;
P->scope = new_scope;
expect(P, TOK_DO);
parse_chunk(P);
@@ -1195,7 +1196,7 @@ LUnit *lparse(size_t sz, Token *tokens, LTable *environment) {
P.unit_functions.data[0].lua_instrs = P.current_chunk.instrs.data;
unit->abyss = P.abyss;
unit->func_count = 1;
unit->func_count = P.unit_functions.size;
unit->funcs = P.unit_functions.data;
for(Expr *e = last_desc; e;) {

11
secondary_thread.lua Normal file
View File

@@ -0,0 +1,11 @@
done = false
threads.run(function()
for i = 1, 100000 do
print(i)
end
done = true
end)
while done == false do
end

17
table.h
View File

@@ -241,9 +241,20 @@ static inline size_t ltable_len(LTable *self) {
return ret;
}
static inline void ltable_insert(LTable *self, LValue val, size_t index) {
static inline bool ltable_insert(LTable *self, LValue val, size_t index) {
lrwl_write_lock(&self->lock);
index = ltable_len_nolock(self) + 1;
ltable_set_nolock(self, lvalue_from_int32(index), val);
size_t len = ltable_len_nolock(self);
if(index == 0) {
index = len + 1;
}
bool success = false;
if(index <= len + 1) {
for(size_t i = len; i >= index; i--) {
ltable_set_nolock(self, lvalue_from_int32(i + 1), ltable_get_nolock(self, lvalue_from_int32(i)));
}
ltable_set_nolock(self, lvalue_from_int32(index), val);
success = true;
}
lrwl_write_unlock(&self->lock);
return success;
}

17
table_insert.lua Normal file
View File

@@ -0,0 +1,17 @@
t = {}
for i = 50, 1, -1 do
table.insert(t, 1, i)
end
for i = 50, 25, -1 do
table.insert(t, #t + 1, i)
end
for i = 24, 1, -1 do
table.insert(t, i)
end
for i = 1, #t do
print(t[i])
end

7
vm.c
View File

@@ -63,9 +63,10 @@ do_getglobal:;
regset->regs[inst->a] = ltable_get(func->env, lvalue_from_string(str));
set_LValueU_insert(heap, lvalue_from_string(str).u);
free(str);
lvm_gc_alert(L, &privates, sizeof(*str) + len);
//set_LValueU_insert(heap, lvalue_from_string(str).u);
//lvm_gc_alert(L, &privates, sizeof(*str) + len);
}
DISPATCH();
@@ -421,7 +422,7 @@ static void gc_mark(LValue v) {
}
static void gc_mark_units(LVM *L) {
for(size_t u = 0; u < L->unit_count; u++) {
LUnit *unit = &L->units[u];
LUnit *unit = L->units[u];
for(size_t f = 0; f < unit->func_count; f++) {
LFunc *func = &unit->funcs[f];
gc_mark(lvalue_from_table(func->env));

2
vm.h
View File

@@ -113,7 +113,7 @@ typedef struct LThreadPrivates {
#define L_THREADS_MAX 32
typedef struct LVM {
size_t unit_count;
LUnit *units;
LUnit **units;
// The following is all used for GC
_Atomic bool gcInProgress;