summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMikhail Burakov <mburakov@mailbox.org>2023-09-15 16:54:15 +0200
committerMikhail Burakov <mburakov@mailbox.org>2023-09-16 12:08:16 +0200
commita15fb43722e7b75a52077f6185f4d7b922c397d6 (patch)
tree7e1c736bd8182f66435f9c7a907f79e6cf1fef74
parent2841b851e95cc799c50e7a2e843d53dd4f349693 (diff)
Major refactoring and aarch64 implementation for gthreadHEADmaster
Finished refactoring and x86-64 implementation for gthread
-rw-r--r--gthread.c418
1 files changed, 290 insertions, 128 deletions
diff --git a/gthread.c b/gthread.c
index 024273c..0e98553 100644
--- a/gthread.c
+++ b/gthread.c
@@ -22,17 +22,28 @@
#include <stdlib.h>
#include <string.h>
+#if !defined(__x86_64__) && !defined(__aarch64__)
+#error Unsupported architecture!
+#endif
+
+extern void GThreadTrampoline(void);
+extern bool GThreadSwapContexts(struct GThread* previous_thread,
+ struct GThread* next_thread);
+
struct GThread {
struct GThread* prev;
struct {
- uint64_t rbx; // 0x08
- uint64_t rbp; // 0x10
- uint64_t rsp; // 0x18
- uint64_t r12; // 0x20
- uint64_t r13; // 0x28
- uint64_t r14; // 0x30
- uint64_t r15; // 0x38
+ uint64_t sp; // 0x08
+#if defined(__x86_64__)
+ // mburakov: Frame pointer is saved on the stack.
+ uint64_t rbx; // 0x10
+ uint64_t r[4]; // 0x18
+#elif defined(__aarch64__)
+ // mburakov: Frame pointer and link register are saved on the stack.
+ uint64_t x[10]; // 0x10
+ uint64_t d[8]; // 0x60
+#endif
} registers;
struct {
@@ -60,15 +71,29 @@ static struct GThread* GThreadAlloc(size_t stack_alloc) {
return thread;
}
-static void __attribute__((naked)) Trampoline(void) {
- __asm__(
- "pop %rdi\n"
- "ret\n");
-}
+__asm__(
+ // TODO(mburakov): Compiling for Aarch64 yields invalid pointer to
+ // GThreadTrampoline function in GThreadCreateImpl call below unless the
+ // former is marked with dot global... Why???
+ ".global GThreadTrampoline\n"
+ ".type GThreadTrampoline,@function\n"
+ "GThreadTrampoline:\n"
+#if defined(__x86_64__)
+ "pop %rdi\n"
+ "pop %rax\n"
+ "push $0\n"
+ "push $0\n"
+ "jmp *%rax\n"
+#elif defined(__aarch64__)
+ "ldp x0, x1, [sp], #16\n"
+ "mov lr, 0\n"
+ "br x1\n"
+#endif
+);
static struct GThread* __attribute__((used))
GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) {
- void* stack_data[] = {NULL, (void*)Trampoline, user, (void*)proc, NULL};
+ void* stack_data[] = {NULL, (void*)GThreadTrampoline, user, (void*)proc};
struct GThread* thread = GThreadAlloc(sizeof(stack_data));
if (!thread) {
return NULL;
@@ -80,151 +105,288 @@ GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) {
g_master_thread.stack.tip = stack_tip;
}
thread->stack.tip = stack_tip;
- thread->registers.rsp = stack_tip - sizeof(stack_data);
+ thread->registers.sp = stack_tip - sizeof(stack_data);
memcpy(thread->stack.data, stack_data, sizeof(stack_data));
return thread;
}
-static bool __attribute__((used, naked))
-GThreadSaveRegisters(struct GThread* thread) {
- __asm__(
- "pop %rax\n"
- "mov %rbx, 0x08(%rdi)\n"
- "mov %rbp, 0x10(%rdi)\n"
- "mov %rsp, 0x18(%rdi)\n"
- "mov %r12, 0x20(%rdi)\n"
- "mov %r13, 0x28(%rdi)\n"
- "mov %r14, 0x30(%rdi)\n"
- "mov %r15, 0x38(%rdi)\n"
- "push %rax\n"
- "ret\n");
-}
-
-static bool __attribute__((used)) GThreadSaveStack(struct GThread* thread) {
- if (thread->registers.rsp >= thread->stack.tip) {
+static bool __attribute__((used))
+GThreadVerifyStack(struct GThread* thread, uint64_t stack_tip) {
+ if (thread->stack.tip <= stack_tip) {
// mburakov: Stack tip is down the stack, no need to save anything.
return true;
}
- size_t stack_size = thread->stack.tip - thread->registers.rsp;
- if (thread->stack.alloc < stack_size) {
- free(thread->stack.data);
- thread->stack.data = malloc(stack_size);
- thread->stack.alloc = stack_size;
- if (!thread->stack.data) {
- thread->stack.alloc = 0;
- return false;
- }
+ size_t stack_alloc = thread->stack.tip - stack_tip;
+ if (stack_alloc <= thread->stack.alloc) {
+ // mburakov: All good, stack will fit into current buffer.
+ return true;
+ }
+ free(thread->stack.data);
+ thread->stack.data = malloc(stack_alloc);
+ if (!thread->stack.data) {
+ thread->stack.alloc = 0;
+ return false;
}
- memcpy(thread->stack.data, (void*)thread->registers.rsp, stack_size);
+ thread->stack.alloc = stack_alloc;
return true;
}
-static void __attribute__((used, naked))
-GThreadRestoreRegisters(struct GThread* thread) {
- __asm__(
- "pop %rax\n"
- "mov 0x08(%rdi), %rbx\n"
- "mov 0x10(%rdi), %rbp\n"
- "mov 0x18(%rdi), %rsp\n"
- "mov 0x20(%rdi), %r12\n"
- "mov 0x28(%rdi), %r13\n"
- "mov 0x30(%rdi), %r14\n"
- "mov 0x38(%rdi), %r15\n"
- "push %rax\n"
- "ret\n");
+static void __attribute__((used)) GThreadSaveStack(struct GThread* thread) {
+ if (thread->stack.tip > thread->registers.sp) {
+ size_t stack_size = thread->stack.tip - thread->registers.sp;
+ memcpy(thread->stack.data, (void*)thread->registers.sp, stack_size);
+ }
}
static void __attribute__((used)) GThreadRestoreStack(struct GThread* thread) {
- if (thread->registers.rsp >= thread->stack.tip) {
- // mburakov: Stack tip is down the stack, no need to restore anything.
- return;
+ if (thread->stack.tip > thread->registers.sp) {
+ size_t stack_size = thread->stack.tip - thread->registers.sp;
+ memcpy((void*)thread->registers.sp, thread->stack.data, stack_size);
}
- size_t stack_size = thread->stack.tip - thread->registers.rsp;
- memcpy((void*)thread->registers.rsp, thread->stack.data, stack_size);
}
+__asm__(
+ "GThreadSwapContexts:\n"
+#if defined(__x86_64__)
+ // mburakov: Preserve frame pointer. Not really needed, but the stack has to
+ // be aligned on 16 bytes for the magical mmx leprechaun anyway.
+ "push %rbp\n"
+ "mov %rsp, %rbp\n"
+
+ // mburakov: Preserve function arguments, they would be overwritten by the
+ // upcoming thread storage size verification function call.
+ "push %rdi\n"
+ "push %rsi\n"
+ "mov %rbp, %rsi\n"
+ "call GThreadVerifyStack\n"
+ "pop %rsi\n"
+ "pop %rdi\n"
+ "test %al, %al\n"
+ "je escape_swap\n"
+
+ // mburakov: Update g_current_thread with the the passed next_thread.
+ "mov %rsi, %fs:g_current_thread@tpoff\n"
+
+ // mburakov: Preserve callee-saved registers of the previous_thread
+ // according to the x86-64 ABI requirements. There's no need to preserve
+ // frame pointer because it would be saved together with the rest of the
+ // previous_thread stack below.
+ "mov %rsp, 0x08(%rdi)\n"
+ "mov %rbx, 0x10(%rdi)\n"
+ "mov %r12, 0x18(%rdi)\n"
+ "mov %r13, 0x20(%rdi)\n"
+ "mov %r14, 0x28(%rdi)\n"
+ "mov %r15, 0x30(%rdi)\n"
+
+ // mburakov: Preserve stack of the previous_thread. Use callee-saved
+ // register to preserve next_thread argument during the call.
+ "mov %rsi, %rbx\n"
+ "call GThreadSaveStack\n"
+ "mov %rbx, %rdi\n"
+
+ // mburakov: Recover callee-saved registers of the next_thread according to
+ // the x86-64 ABI requirements. Frame pointer would be recovered together
+ // with the rest of the next_thread stack below.
+ "mov 0x08(%rdi), %rsp\n"
+ "mov 0x10(%rdi), %rbx\n"
+ "mov 0x18(%rdi), %r12\n"
+ "mov 0x20(%rdi), %r13\n"
+ "mov 0x28(%rdi), %r14\n"
+ "mov 0x30(%rdi), %r15\n"
+
+ // mburakov: Restore stack of the next_thread. Do not use tailcall because
+ // it's still necessary to restore frame pointer.
+ "call GThreadRestoreStack\n"
+ "mov $1, %al\n"
+
+ "escape_swap:\n"
+ "pop %rbp\n"
+ "ret\n"
+#elif defined(__aarch64__)
+ // mburakov: Preserve frame pointer and link register, they would be
+ // overwritten by the upcoming function calls.
+ "stp fp, lr, [sp, -16]!\n"
+ "mov fp, sp\n"
+
+ // mburakov: Preserve function arguments, they would be overwritten by the
+ // upcoming thread storage size verification function call. Stack tip
+ // argument is adjusted on 16 bytes to compensate for saved function args.
+ "stp x0, x1, [sp, -16]!\n"
+ "add x1, sp, 16\n"
+ "bl GThreadVerifyStack\n"
+ "ldp x1, x2, [sp], 16\n"
+ "tbz x0, 0, escape_swap\n"
+
+ // mburakov: Update g_current_thread with the the passed next_thread.
+ "mrs x0, tpidr_el0\n"
+ "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
+ "add x0, x0, :tprel_lo12_nc:g_current_thread\n"
+ "str x2, [x0]\n"
+ "mov x0, x1\n"
+
+ // mburakov: Preserve callee-saved registers of the previous_thread
+ // according to the Aarch64 ABI requirements. There's no need to preserve
+ // frame pointer and link registers because they would be saved together
+ // with the rest of the previous_thread stack below.
+ "mov x1, sp\n"
+ "str x1, [x0, 0x08]\n"
+ "stp x19, x20, [x0, 0x10]\n"
+ "stp x21, x22, [x0, 0x20]\n"
+ "stp x23, x24, [x0, 0x30]\n"
+ "stp x25, x26, [x0, 0x40]\n"
+ "stp x27, x28, [x0, 0x50]\n"
+ "stp d8, d9, [x0, 0x60]\n"
+ "stp d9, d11, [x0, 0x70]\n"
+ "stp d12, d13, [x0, 0x80]\n"
+ "stp d14, d15, [x0, 0x90]\n"
+
+ // mburakov: Preserve stack of the previous_thread. Use callee-saved
+ // register to preserve next_thread argument during the call.
+ "mov x19, x2\n"
+ "bl GThreadSaveStack\n"
+ "mov x0, x19\n"
+
+ // mburakov: Recover callee-saved registers of the next_thread according to
+ // the Aarch64 ABI requirements. Frame pointer and link register would be
+ // recovered together with the rest of the next_thread stack below.
+ "ldr x1, [x0, 0x08]\n"
+ "mov sp, x1\n"
+ "ldp x19, x20, [x0, 0x10]\n"
+ "ldp x21, x22, [x0, 0x20]\n"
+ "ldp x23, x24, [x0, 0x30]\n"
+ "ldp x25, x26, [x0, 0x40]\n"
+ "ldp x27, x28, [x0, 0x50]\n"
+ "ldp d8, d9, [x0, 0x60]\n"
+ "ldp d9, d11, [x0, 0x70]\n"
+ "ldp d12, d13, [x0, 0x80]\n"
+ "ldp d14, d15, [x0, 0x90]\n"
+
+ // mburakov: Restore stack of the next_thread. Do not use tailcall because
+ // it's still necessary to restore frame pointer and link register.
+ "bl GThreadRestoreStack\n"
+ "mov w0, 1\n"
+
+ "escape_swap:\n"
+ "ldp fp, lr, [sp], 16\n"
+ "ret\n"
+#endif
+);
+
static void GThreadFree(struct GThread* thread) {
free(thread->stack.data);
free(thread);
}
-struct GThread* __attribute__((naked))
-GThreadCreate(void (*proc)(void*), void* user) {
- __asm__(
- // mburakov: Save stack tip and pass it to the impl. Tip is measured from
- // outside of spawn function (8 bytes offset). On threads switching, stack
- // would be preserved starting from this point till stack pointer.
- "lea 8(%rsp), %rdx\n"
- "jmp GThreadCreateImpl\n");
-}
+__asm__(
+ // mburakov: Save stack tip and pass it to the impl. Tip is measured from
+ // outside of spawn function. On threads switching, stack would be preserved
+ // starting from this tip down to the effective stack pointer.
+ ".global GThreadCreate\n"
+ ".type GThreadCreate,@function\n"
+ "GThreadCreate:\n"
+#if defined(__x86_64__)
+ // For x86-64 function calls return address is pushed to the stack, so it's
+ // necessary to add 8 bytes offset to compensate.
+ "lea 8(%rsp), %rdx\n"
+ "jmp GThreadCreateImpl\n"
+#elif defined(__aarch64__)
+ // For Aarch64 function calls return address is saved in the link register,
+ // so no further adjustments are needed.
+ "mov x2, sp\n"
+ "b GThreadCreateImpl\n"
+#endif
+);
-bool __attribute__((naked)) GThreadWake(struct GThread* thread) {
- __asm__(
- // mburakov: Argument would be overwritten by upcoming function calls.
- "push %rdi\n"
+__asm__(
+ ".global GThreadWake\n"
+ ".type GThreadWake,@function\n"
+ "GThreadWake:\n"
+#if defined(__x86_64__)
+ // mburakov: Forward received argument as next_thread to swapping function,
+ // and load g_current_thread to pass it as a previous_thread.
+ "mov %rdi, %rsi\n"
+ "mov %fs:g_current_thread@tpoff, %rdi\n"
- // mburakov: Preserve registers of the current thread.
- "mov %fs:g_current_thread@tpoff, %rdi\n"
- "call GThreadSaveRegisters\n"
+ // mburakov: Waking the currently active thread has no effect.
+ "cmp %rdi, %rsi\n"
+ "je skip_wake\n"
- // mburakov: Preserve stack of the current thread.
- "call GThreadSaveStack\n"
- "test %al, %al\n"
- "je escape_wake\n"
+ // mburakov: Save current thread to prev field on the activated thread.
+ "mov %rdi, (%rsi)\n"
- // mburakov: Recover registers of the new thread.
- "mov (%rsp), %rdi\n"
- "call GThreadRestoreRegisters\n"
+ // mburakov: Call swap with the prepared arguments.
+ "jmp GThreadSwapContexts\n"
+ "skip_wake:\n"
+ "ret\n"
+#elif defined(__aarch64__)
+ // mburakov: Forward received argument as next_thread to swapping function,
+ // and load g_current_thread to pass it as a previous_thread.
+ "mov x1, x0\n"
+ "mrs x0, tpidr_el0\n"
+ "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
+ "add x0, x0, :tprel_lo12_nc:g_current_thread\n"
+ "ldr x0, [x0]\n"
- // mburakov: Update prev and current thread pointers.
- "mov %fs:g_current_thread@tpoff, %rsi\n"
- "mov %rdi, %fs:g_current_thread@tpoff\n"
- "mov %rsi, (%rdi)\n"
+ // mburakov: Waking the currently active thread has no effect.
+ "cmp x0, x1\n"
+ "b.eq skip_wake\n"
- // mburakov: Recover stack of the new thread.
- "call GThreadRestoreStack\n"
- "mov $1, %eax\n"
+ // mburakov: Save current thread to prev field on the activated thread.
+ "str x0, [x1]\n"
- "escape_wake:\n"
- "add $8, %rsp\n"
- "ret\n");
-}
+ // mburakov: Call swap with the prepared arguments.
+ "b GThreadSwapContexts\n"
+ "skip_wake:\n"
+ "ret\n"
+#endif
+);
-bool __attribute__((naked)) GThreadYield(void) {
- __asm__(
- // mburakov: Yielding on master thread has no effect.
- "mov %fs:g_current_thread@tpoff, %rdi\n"
- "mov (%rdi), %rax\n"
- "test %rax, %rax\n"
- "jne proceed\n"
- "ret\n"
- "proceed:\n"
- "push %rax\n"
-
- // mburakov: Preserve registers of the current thread.
- "mov %fs:g_current_thread@tpoff, %rdi\n"
- "call GThreadSaveRegisters\n"
-
- // mburakov: Preserve stack of the current thread.
- "call GThreadSaveStack\n"
- "test %al, %al\n"
- "je escape_yield\n"
-
- // mburakov: Recover registers of the new thread.
- "mov (%rsp), %rdi\n"
- "call GThreadRestoreRegisters\n"
-
- // mburakov: Update current thread pointer.
- "mov %rdi, %fs:g_current_thread@tpoff\n"
-
- // mburakov: Recover stack of the new thread.
- "call GThreadRestoreStack\n"
- "mov $1, %eax\n"
-
- "escape_yield:\n"
- "add $8, %rsp\n"
- "ret\n");
-}
+__asm__(
+ ".global GThreadYield\n"
+ ".type GThreadYield,@function\n"
+ "GThreadYield:\n"
+#if defined(__x86_64__)
+ // mburakov: Load g_current_thread to forward it to swapping function as a
+ // previous_thread argument.
+ "mov %fs:g_current_thread@tpoff, %rdi\n"
+
+ // mburakov: Yielding without a current thread has no effect.
+ "test %rdi, %rdi\n"
+ "jz skip_yield\n"
+
+ // mburakov: Prev field of the current thread would be forwarded as a
+ // next_thread argument to swapping function.
+ "mov (%rdi), %rsi\n"
+
+ // mburakov: Yielding on the main thread (that does not have prev field
+ // assigned) has no effect. Otherwise swap with the prepared arguments.
+ "test %rsi, %rsi\n"
+ "jz skip_yield\n"
+ "jnz GThreadSwapContexts\n"
+ "skip_yield:\n"
+ "ret\n"
+#elif defined(__aarch64__)
+ // mburakov: Load g_current_thread to forward it to swapping function as a
+ // previous_thread argument.
+ "mrs x0, tpidr_el0\n"
+ "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
+ "add x0, x0, :tprel_lo12_nc:g_current_thread\n"
+ "ldr x0, [x0]\n"
+
+ // mburakov: Yielding without a current thread has no effect.
+ "cbz x0, skip_yield\n"
+
+ // mburakov: Prev field of the current thread would be forwarded as a
+ // next_thread argument to swapping function.
+ "ldr x1, [x0]\n"
+
+ // mburakov: Yielding on the main thread (that does not have prev field
+ // assigned) has no effect. Otherwise swap with the prepared arguments.
+ "cbnz x1, GThreadSwapContexts\n"
+ "skip_yield:\n"
+ "ret\n"
+#endif
+);
void GThreadDestroy(struct GThread* thread) {
if (thread == &g_master_thread || thread == g_current_thread) {