Update README.md

Expand table.insert test
table.insert test
2025-09-06 16:24:03 +03:00 · 2025-09-06 16:23:51 +03:00 · 2025-09-06 15:45:54 +03:00 · 2025-09-06 15:45:47 +03:00 · 2025-09-06 15:06:23 +03:00 · 2025-09-06 15:06:14 +03:00
11 changed files with 232 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -17,9 +17,33 @@ Impotent is still work-in-progress:
 5. Most operators are still missing
 6. The user API is completely different from that of PoC Lua
 7. Being lock-free, tables are not split to "array" and "hash" parts
+8. Userdata is not yet supported.

 Impotent requires C11 and an architecture with 8-byte atomic operations, but otherwise it is completely cross-platform.

 Performance-wise, it's surprisingly competitive with PoC Lua, considering how quickly it was made up to the point of writing this README (~2 weeks). By far the worst bottleneck is the GC, since it requires all threads and their heaps to synchronize.

-Certain Lua idioms become impossible under Impotent. For example the idiom of appending to tables (`t[#t + 1] = x`) isn't atomic, therefore `table.insert` should be used instead.
+Certain Lua idioms become impossible under Impotent. For example the idiom of appending to tables (`t[#t + 1] = x`) isn't atomic, therefore `table.insert` should be used instead.
+
+## Additions
+
+Obviously, threading. Any thread can access any value from any other thread, including for reading or writing. Operations such as getting or setting are lock-free in the best case scenario, but other operations (such as the `#` operator) must lock the table temporarily.
+
+Besides this, I have no intent to greatly deviate from standard Lua, to keep source-level compatibility as best I can. The only addition to the standard library is the `threads` global, with two methods as of now.
+
+### `threads.run`
+
+Runs a function in a newly created thread. Does not block the caller.
+
+	threads.run(function()
+		-- Do something expensive
+	end)
+
+
+### `threads.parallel`
+
+Runs a function in n parallel threads. Blocks the caller until all threads finish.
+
+	threads.parallel(8, function()
+		-- Do something parallelizable.
+	end)
--- a/atomicity.lua
+++ b/atomicity.lua
@@ -0,0 +1,10 @@
+t = {}
+
+threads.parallel(8, function(no)
+	for i = 1, 1250 do
+		table.insert(t, i)
+	end
+end)
+
+print("Expected: 10000")
+print(#t)
--- a/lrwl.h
+++ b/lrwl.h
@@ -0,0 +1,63 @@
+#pragma omp
+
+#define LRWL_WRITE_ACTIVE (1UL<<31)
+#define LRWL_MASK (LRWL_WRITE_ACTIVE - 1)
+#define LRWL_MAX_READERS 1024
+
+typedef struct LRWL {
+	_Atomic uint32_t data;
+} LRWL;
+
+static inline void lrwl_read_lock(LRWL *self) {
+	while(1) {
+		uint32_t data = atomic_load(&self->data);
+		
+		if((data & LRWL_WRITE_ACTIVE) || (data & LRWL_MASK) >= LRWL_MAX_READERS) {
+			continue;
+		}
+		
+		if(atomic_compare_exchange_weak(&self->data, &data, data + 1)) {
+			return;
+		}
+	}
+}
+
+static inline void lrwl_read_unlock(LRWL *self) {
+	while(1) {
+		uint32_t data = atomic_load(&self->data);
+		
+		assert((data & LRWL_MASK) > 0);
+		
+		if(atomic_compare_exchange_weak(&self->data, &data, data - 1)) {
+			return;
+		}
+	}
+}
+
+static inline void lrwl_write_lock(LRWL *self) {
+	while(1) {
+		uint32_t data = atomic_load(&self->data);
+		
+		if(data & LRWL_WRITE_ACTIVE) {
+			continue;
+		}
+		
+		if(atomic_compare_exchange_weak(&self->data, &data, data | LRWL_WRITE_ACTIVE)) {
+			while((atomic_load(&self->data) & LRWL_MASK) > 0);
+			
+			return;
+		}
+	}
+}
+
+static inline void lrwl_write_unlock(LRWL *self) {
+	while(1) {
+		uint32_t data = atomic_load(&self->data);
+		
+		assert(data & LRWL_WRITE_ACTIVE);
+		
+		if(atomic_compare_exchange_weak(&self->data, &data, data & ~LRWL_WRITE_ACTIVE)) {
+			return;
+		}
+	}
+}
--- a/main.c
+++ b/main.c
@@ -32,25 +32,83 @@ static size_t native_print(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, L

 static size_t table_insert(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
 	LTable *tbl = (LTable*) (regset->regs[0].u & ~LTAG_MASK);
-	ltable_insert(tbl, regset->regs[1], 0);
+	if(argn == 2) {
+		ltable_insert(tbl, regset->regs[1], 0);
+	} else if(argn > 2) {
+		size_t idx = lvalue_to_int32(regset->regs[1]);
+		ltable_insert(tbl, regset->regs[2], idx);
+	}
 	return 0;
 }

-// This function is intended for small-medium runtimes, since the caller thread's heap is not touchable during this call.
+struct ThreadsParallelCtx {
+	LVM *L;
+	LFunc *func;
+	_Atomic uint32_t finished;
+};
+static int threads_parallel_worker(void *arg) {
+	struct ThreadsParallelCtx *ctx = arg;
+	
+	LRegSet regset = {};
+	lvm_reset_regs(&regset);
+	lvm_run(ctx->L, ctx->func, 0, &regset);
+	
+	atomic_fetch_add(&ctx->finished, 1);
+	
+	return 0;
+}
 static size_t threads_parallel(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
 	size_t no = lvalue_to_int32(regset->regs[0]);
 	LFunc *func = (LFunc*) (regset->regs[1].u & ~LTAG_MASK);
 	
-	atomic_fetch_sub(&lvm->active_thread_count, 1);
-	
-	#pragma omp parallel for
-	for(size_t i = 0; i < no; i++) {
-		LRegSet regset = {};
-		lvm_reset_regs(&regset);
-		lvm_run(lvm, func, 0, &regset);
+	if(no == 0) {
+		return 0;
 	}
 	
-	atomic_fetch_add(&lvm->active_thread_count, 1);
+	struct ThreadsParallelCtx ctx = {.L = lvm, .func = func, .finished = 0};
+	
+	thrd_t thrds[no];
+	for(size_t i = 1; i < no; i++) {
+		thrd_create(&thrds[i], threads_parallel_worker, &ctx);
+	}
+	
+	LRegSet set = {.parent = regset};
+	lvm_reset_regs(&set);
+	lvm_call(lvm, func, 0, heap, &set);
+	
+	// This thread must still respond to the GC
+	while(atomic_load(&ctx.finished) != no - 1) {
+		lvm->safepoint_func(lvm, heap, regset);
+		thrd_yield();
+	}
+	
+	return 0;
+}
+
+struct ThreadsRunCtx {
+	LVM *L;
+	LFunc *func;
+};
+static int threads_run_worker(void *arg) {
+	struct ThreadsRunCtx *ctx = arg;
+	
+	LRegSet regs = {};
+	lvm_reset_regs(&regs);
+	lvm_run(ctx->L, ctx->func, 0, &regs);
+	
+	free(ctx);
+	
+	return 0;
+}
+static size_t threads_run(LVM *lvm, void *ud, size_t argn, set_LValueU *heap, LRegSet *regset) {
+	LFunc *func = (LFunc*) (regset->regs[0].u & ~LTAG_MASK);
+	
+	struct ThreadsRunCtx *ctx = calloc(1, sizeof(*ctx));
+	ctx->L = lvm;
+	ctx->func = func;
+	
+	thrd_t thrd;
+	thrd_create(&thrd, threads_run_worker, ctx);
 	
 	return 0;
 }
@@ -89,6 +147,9 @@ int main(int argc, char **argv) {
 		ltable_set(threads,
 			lvalue_from_string(lstring_newz("parallel")),
 			lvalue_from_func(lvm_func_from_native(threads_parallel, NULL)));
+		ltable_set(threads,
+			lvalue_from_string(lstring_newz("run")),
+			lvalue_from_func(lvm_func_from_native(threads_run, NULL)));
 	}
 	ltable_set(env,
 		lvalue_from_string(lstring_newz("threads")),
@@ -122,6 +183,9 @@ int main(int argc, char **argv) {
 	
 	LVM lvm = {};
 	lvm_init(&lvm);
+	lvm.unit_count = 1;
+	lvm.units = calloc(1, sizeof(*lvm.units));
+	lvm.units[0] = unit;
 	
 	LRegSet regset = {.parent = NULL};
 	lvm_reset_regs(&regset);
--- a/multifizz.lua
+++ b/multifizz.lua
@@ -0,0 +1,11 @@
+threads.parallel(1, function(no)
+	for i = 1, 100000 do
+		print(i)
+		if i % 3 == 0 then
+			print("Fizz")
+		end
+		if i % 5 == 0 then
+			print("Buzz")
+		end
+	end
+end)
--- a/parse.c
+++ b/parse.c
@@ -1072,6 +1072,7 @@ bool parse_stat(Parser *P) {

 		Scope *new_scope = calloc(1, sizeof(*new_scope));
 		new_scope->parent = P->scope;
+		P->scope = new_scope;

 		expect(P, TOK_DO);
 		parse_chunk(P);
@@ -1195,7 +1196,7 @@ LUnit *lparse(size_t sz, Token *tokens, LTable *environment) {
 	P.unit_functions.data[0].lua_instrs = P.current_chunk.instrs.data;
 	
 	unit->abyss = P.abyss;
-	unit->func_count = 1;
+	unit->func_count = P.unit_functions.size;
 	unit->funcs = P.unit_functions.data;
 	
 	for(Expr *e = last_desc; e;) {
--- a/secondary_thread.lua
+++ b/secondary_thread.lua
@@ -0,0 +1,11 @@
+done = false
+
+threads.run(function()
+	for i = 1, 100000 do
+		print(i)
+	end
+	done = true
+end)
+
+while done == false do
+end
--- a/table.h
+++ b/table.h
@@ -241,9 +241,20 @@ static inline size_t ltable_len(LTable *self) {
 	return ret;
 }

-static inline void ltable_insert(LTable *self, LValue val, size_t index) {
+static inline bool ltable_insert(LTable *self, LValue val, size_t index) {
 	lrwl_write_lock(&self->lock);
-	index = ltable_len_nolock(self) + 1;
-	ltable_set_nolock(self, lvalue_from_int32(index), val);
+	size_t len = ltable_len_nolock(self);
+	if(index == 0) {
+		index = len + 1;
+	}
+	bool success = false;
+	if(index <= len + 1) {
+		for(size_t i = len; i >= index; i--) {
+			ltable_set_nolock(self, lvalue_from_int32(i + 1), ltable_get_nolock(self, lvalue_from_int32(i)));
+		}
+		ltable_set_nolock(self, lvalue_from_int32(index), val);
+		success = true;
+	}
 	lrwl_write_unlock(&self->lock);
+	return success;
 }
--- a/table_insert.lua
+++ b/table_insert.lua
@@ -0,0 +1,17 @@
+t = {}
+
+for i = 50, 1, -1 do
+	table.insert(t, 1, i)
+end
+
+for i = 50, 25, -1 do
+	table.insert(t, #t + 1, i)
+end
+
+for i = 24, 1, -1 do
+	table.insert(t, i)
+end
+
+for i = 1, #t do
+	print(t[i])
+end
--- a/vm.c
+++ b/vm.c
@@ -63,9 +63,10 @@ do_getglobal:;
 		
 		regset->regs[inst->a] = ltable_get(func->env, lvalue_from_string(str));
 		
-		set_LValueU_insert(heap, lvalue_from_string(str).u);
+		free(str);
 		
-		lvm_gc_alert(L, &privates, sizeof(*str) + len);
+		//set_LValueU_insert(heap, lvalue_from_string(str).u);
+		//lvm_gc_alert(L, &privates, sizeof(*str) + len);
 	}
 	DISPATCH();
 	
@@ -421,7 +422,7 @@ static void gc_mark(LValue v) {
 }
 static void gc_mark_units(LVM *L) {
 	for(size_t u = 0; u < L->unit_count; u++) {
-		LUnit *unit = &L->units[u];
+		LUnit *unit = L->units[u];
 		for(size_t f = 0; f < unit->func_count; f++) {
 			LFunc *func = &unit->funcs[f];
 			gc_mark(lvalue_from_table(func->env));
--- a/vm.h
+++ b/vm.h
@@ -113,7 +113,7 @@ typedef struct LThreadPrivates {
 #define L_THREADS_MAX 32
 typedef struct LVM {
 	size_t unit_count;
-	LUnit *units;
+	LUnit **units;
 	
 	// The following is all used for GC
 	_Atomic bool gcInProgress;
Author	SHA1	Message	Date
Mid	3cdd88a52d	Update README.md	2025-09-06 16:24:03 +03:00
Mid	eca0ac6fe4	Expand table.insert test	2025-09-06 16:23:51 +03:00
Mid	3d8b09167b	table.insert test	2025-09-06 15:45:54 +03:00
Mid	3edff81d89	finish table_insert interface	2025-09-06 15:45:47 +03:00
Mid	319779a16c	secondary_thread test	2025-09-06 15:06:23 +03:00
Mid	2bf96c0f76	Yield because maybe good???	2025-09-06 15:06:14 +03:00
Mid	fa67393723	threads.run	2025-09-06 15:06:03 +03:00
Mid	c2cd319d0c	Bug fix	2025-09-06 14:58:10 +03:00
Mid	eb5a23761d	Bug fixes	2025-09-06 13:59:14 +03:00
Mid	5809a89eae	Basic tests	2025-09-05 21:16:09 +03:00
Mid	65af565668	oops forgot this	2025-09-05 21:16:01 +03:00