diff options
author | Mikhail Burakov <mburakov@mailbox.org> | 2023-09-15 16:54:15 +0200 |
---|---|---|
committer | Mikhail Burakov <mburakov@mailbox.org> | 2023-09-16 12:08:16 +0200 |
commit | a15fb43722e7b75a52077f6185f4d7b922c397d6 (patch) | |
tree | 7e1c736bd8182f66435f9c7a907f79e6cf1fef74 | |
parent | 2841b851e95cc799c50e7a2e843d53dd4f349693 (diff) |
Finished refactoring and x86-64 implementation for gthread
-rw-r--r-- | gthread.c | 418 |
1 files changed, 290 insertions, 128 deletions
@@ -22,17 +22,28 @@ #include <stdlib.h> #include <string.h> +#if !defined(__x86_64__) && !defined(__aarch64__) +#error Unsupported architecture! +#endif + +extern void GThreadTrampoline(void); +extern bool GThreadSwapContexts(struct GThread* previous_thread, + struct GThread* next_thread); + struct GThread { struct GThread* prev; struct { - uint64_t rbx; // 0x08 - uint64_t rbp; // 0x10 - uint64_t rsp; // 0x18 - uint64_t r12; // 0x20 - uint64_t r13; // 0x28 - uint64_t r14; // 0x30 - uint64_t r15; // 0x38 + uint64_t sp; // 0x08 +#if defined(__x86_64__) + // mburakov: Frame pointer is saved on the stack. + uint64_t rbx; // 0x10 + uint64_t r[4]; // 0x18 +#elif defined(__aarch64__) + // mburakov: Frame pointer and link register are saved on the stack. + uint64_t x[10]; // 0x10 + uint64_t d[8]; // 0x60 +#endif } registers; struct { @@ -60,15 +71,29 @@ static struct GThread* GThreadAlloc(size_t stack_alloc) { return thread; } -static void __attribute__((naked)) Trampoline(void) { - __asm__( - "pop %rdi\n" - "ret\n"); -} +__asm__( + // TODO(mburakov): Compiling for Aarch64 yields invalid pointer to + // GThreadTrampoline function in GThreadCreateImpl call below unless the + // former is marked with dot global... Why??? + ".global GThreadTrampoline\n" + ".type GThreadTrampoline,@function\n" + "GThreadTrampoline:\n" +#if defined(__x86_64__) + "pop %rdi\n" + "pop %rax\n" + "push $0\n" + "push $0\n" + "jmp *%rax\n" +#elif defined(__aarch64__) + "ldp x0, x1, [sp], #16\n" + "mov lr, 0\n" + "br x1\n" +#endif +); static struct GThread* __attribute__((used)) GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) { - void* stack_data[] = {NULL, (void*)Trampoline, user, (void*)proc, NULL}; + void* stack_data[] = {NULL, (void*)GThreadTrampoline, user, (void*)proc}; struct GThread* thread = GThreadAlloc(sizeof(stack_data)); if (!thread) { return NULL; @@ -80,151 +105,288 @@ GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) { g_master_thread.stack.tip = stack_tip; } thread->stack.tip = stack_tip; - thread->registers.rsp = stack_tip - sizeof(stack_data); + thread->registers.sp = stack_tip - sizeof(stack_data); memcpy(thread->stack.data, stack_data, sizeof(stack_data)); return thread; } -static bool __attribute__((used, naked)) -GThreadSaveRegisters(struct GThread* thread) { - __asm__( - "pop %rax\n" - "mov %rbx, 0x08(%rdi)\n" - "mov %rbp, 0x10(%rdi)\n" - "mov %rsp, 0x18(%rdi)\n" - "mov %r12, 0x20(%rdi)\n" - "mov %r13, 0x28(%rdi)\n" - "mov %r14, 0x30(%rdi)\n" - "mov %r15, 0x38(%rdi)\n" - "push %rax\n" - "ret\n"); -} - -static bool __attribute__((used)) GThreadSaveStack(struct GThread* thread) { - if (thread->registers.rsp >= thread->stack.tip) { +static bool __attribute__((used)) +GThreadVerifyStack(struct GThread* thread, uint64_t stack_tip) { + if (thread->stack.tip <= stack_tip) { // mburakov: Stack tip is down the stack, no need to save anything. return true; } - size_t stack_size = thread->stack.tip - thread->registers.rsp; - if (thread->stack.alloc < stack_size) { - free(thread->stack.data); - thread->stack.data = malloc(stack_size); - thread->stack.alloc = stack_size; - if (!thread->stack.data) { - thread->stack.alloc = 0; - return false; - } + size_t stack_alloc = thread->stack.tip - stack_tip; + if (stack_alloc <= thread->stack.alloc) { + // mburakov: All good, stack will fit into current buffer. + return true; + } + free(thread->stack.data); + thread->stack.data = malloc(stack_alloc); + if (!thread->stack.data) { + thread->stack.alloc = 0; + return false; } - memcpy(thread->stack.data, (void*)thread->registers.rsp, stack_size); + thread->stack.alloc = stack_alloc; return true; } -static void __attribute__((used, naked)) -GThreadRestoreRegisters(struct GThread* thread) { - __asm__( - "pop %rax\n" - "mov 0x08(%rdi), %rbx\n" - "mov 0x10(%rdi), %rbp\n" - "mov 0x18(%rdi), %rsp\n" - "mov 0x20(%rdi), %r12\n" - "mov 0x28(%rdi), %r13\n" - "mov 0x30(%rdi), %r14\n" - "mov 0x38(%rdi), %r15\n" - "push %rax\n" - "ret\n"); +static void __attribute__((used)) GThreadSaveStack(struct GThread* thread) { + if (thread->stack.tip > thread->registers.sp) { + size_t stack_size = thread->stack.tip - thread->registers.sp; + memcpy(thread->stack.data, (void*)thread->registers.sp, stack_size); + } } static void __attribute__((used)) GThreadRestoreStack(struct GThread* thread) { - if (thread->registers.rsp >= thread->stack.tip) { - // mburakov: Stack tip is down the stack, no need to restore anything. - return; + if (thread->stack.tip > thread->registers.sp) { + size_t stack_size = thread->stack.tip - thread->registers.sp; + memcpy((void*)thread->registers.sp, thread->stack.data, stack_size); } - size_t stack_size = thread->stack.tip - thread->registers.rsp; - memcpy((void*)thread->registers.rsp, thread->stack.data, stack_size); } +__asm__( + "GThreadSwapContexts:\n" +#if defined(__x86_64__) + // mburakov: Preserve frame pointer. Not really needed, but the stack has to + // be aligned on 16 bytes for the magical mmx leprechaun anyway. + "push %rbp\n" + "mov %rsp, %rbp\n" + + // mburakov: Preserve function arguments, they would be overwritten by the + // upcoming thread storage size verification function call. + "push %rdi\n" + "push %rsi\n" + "mov %rbp, %rsi\n" + "call GThreadVerifyStack\n" + "pop %rsi\n" + "pop %rdi\n" + "test %al, %al\n" + "je escape_swap\n" + + // mburakov: Update g_current_thread with the the passed next_thread. + "mov %rsi, %fs:g_current_thread@tpoff\n" + + // mburakov: Preserve callee-saved registers of the previous_thread + // according to the x86-64 ABI requirements. There's no need to preserve + // frame pointer because it would be saved together with the rest of the + // previous_thread stack below. + "mov %rsp, 0x08(%rdi)\n" + "mov %rbx, 0x10(%rdi)\n" + "mov %r12, 0x18(%rdi)\n" + "mov %r13, 0x20(%rdi)\n" + "mov %r14, 0x28(%rdi)\n" + "mov %r15, 0x30(%rdi)\n" + + // mburakov: Preserve stack of the previous_thread. Use callee-saved + // register to preserve next_thread argument during the call. + "mov %rsi, %rbx\n" + "call GThreadSaveStack\n" + "mov %rbx, %rdi\n" + + // mburakov: Recover callee-saved registers of the next_thread according to + // the x86-64 ABI requirements. Frame pointer would be recovered together + // with the rest of the next_thread stack below. + "mov 0x08(%rdi), %rsp\n" + "mov 0x10(%rdi), %rbx\n" + "mov 0x18(%rdi), %r12\n" + "mov 0x20(%rdi), %r13\n" + "mov 0x28(%rdi), %r14\n" + "mov 0x30(%rdi), %r15\n" + + // mburakov: Restore stack of the next_thread. Do not use tailcall because + // it's still necessary to restore frame pointer. + "call GThreadRestoreStack\n" + "mov $1, %al\n" + + "escape_swap:\n" + "pop %rbp\n" + "ret\n" +#elif defined(__aarch64__) + // mburakov: Preserve frame pointer and link register, they would be + // overwritten by the upcoming function calls. + "stp fp, lr, [sp, -16]!\n" + "mov fp, sp\n" + + // mburakov: Preserve function arguments, they would be overwritten by the + // upcoming thread storage size verification function call. Stack tip + // argument is adjusted on 16 bytes to compensate for saved function args. + "stp x0, x1, [sp, -16]!\n" + "add x1, sp, 16\n" + "bl GThreadVerifyStack\n" + "ldp x1, x2, [sp], 16\n" + "tbz x0, 0, escape_swap\n" + + // mburakov: Update g_current_thread with the the passed next_thread. + "mrs x0, tpidr_el0\n" + "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" + "add x0, x0, :tprel_lo12_nc:g_current_thread\n" + "str x2, [x0]\n" + "mov x0, x1\n" + + // mburakov: Preserve callee-saved registers of the previous_thread + // according to the Aarch64 ABI requirements. There's no need to preserve + // frame pointer and link registers because they would be saved together + // with the rest of the previous_thread stack below. + "mov x1, sp\n" + "str x1, [x0, 0x08]\n" + "stp x19, x20, [x0, 0x10]\n" + "stp x21, x22, [x0, 0x20]\n" + "stp x23, x24, [x0, 0x30]\n" + "stp x25, x26, [x0, 0x40]\n" + "stp x27, x28, [x0, 0x50]\n" + "stp d8, d9, [x0, 0x60]\n" + "stp d9, d11, [x0, 0x70]\n" + "stp d12, d13, [x0, 0x80]\n" + "stp d14, d15, [x0, 0x90]\n" + + // mburakov: Preserve stack of the previous_thread. Use callee-saved + // register to preserve next_thread argument during the call. + "mov x19, x2\n" + "bl GThreadSaveStack\n" + "mov x0, x19\n" + + // mburakov: Recover callee-saved registers of the next_thread according to + // the Aarch64 ABI requirements. Frame pointer and link register would be + // recovered together with the rest of the next_thread stack below. + "ldr x1, [x0, 0x08]\n" + "mov sp, x1\n" + "ldp x19, x20, [x0, 0x10]\n" + "ldp x21, x22, [x0, 0x20]\n" + "ldp x23, x24, [x0, 0x30]\n" + "ldp x25, x26, [x0, 0x40]\n" + "ldp x27, x28, [x0, 0x50]\n" + "ldp d8, d9, [x0, 0x60]\n" + "ldp d9, d11, [x0, 0x70]\n" + "ldp d12, d13, [x0, 0x80]\n" + "ldp d14, d15, [x0, 0x90]\n" + + // mburakov: Restore stack of the next_thread. Do not use tailcall because + // it's still necessary to restore frame pointer and link register. + "bl GThreadRestoreStack\n" + "mov w0, 1\n" + + "escape_swap:\n" + "ldp fp, lr, [sp], 16\n" + "ret\n" +#endif +); + static void GThreadFree(struct GThread* thread) { free(thread->stack.data); free(thread); } -struct GThread* __attribute__((naked)) -GThreadCreate(void (*proc)(void*), void* user) { - __asm__( - // mburakov: Save stack tip and pass it to the impl. Tip is measured from - // outside of spawn function (8 bytes offset). On threads switching, stack - // would be preserved starting from this point till stack pointer. - "lea 8(%rsp), %rdx\n" - "jmp GThreadCreateImpl\n"); -} +__asm__( + // mburakov: Save stack tip and pass it to the impl. Tip is measured from + // outside of spawn function. On threads switching, stack would be preserved + // starting from this tip down to the effective stack pointer. + ".global GThreadCreate\n" + ".type GThreadCreate,@function\n" + "GThreadCreate:\n" +#if defined(__x86_64__) + // For x86-64 function calls return address is pushed to the stack, so it's + // necessary to add 8 bytes offset to compensate. + "lea 8(%rsp), %rdx\n" + "jmp GThreadCreateImpl\n" +#elif defined(__aarch64__) + // For Aarch64 function calls return address is saved in the link register, + // so no further adjustments are needed. + "mov x2, sp\n" + "b GThreadCreateImpl\n" +#endif +); -bool __attribute__((naked)) GThreadWake(struct GThread* thread) { - __asm__( - // mburakov: Argument would be overwritten by upcoming function calls. - "push %rdi\n" +__asm__( + ".global GThreadWake\n" + ".type GThreadWake,@function\n" + "GThreadWake:\n" +#if defined(__x86_64__) + // mburakov: Forward received argument as next_thread to swapping function, + // and load g_current_thread to pass it as a previous_thread. + "mov %rdi, %rsi\n" + "mov %fs:g_current_thread@tpoff, %rdi\n" - // mburakov: Preserve registers of the current thread. - "mov %fs:g_current_thread@tpoff, %rdi\n" - "call GThreadSaveRegisters\n" + // mburakov: Waking the currently active thread has no effect. + "cmp %rdi, %rsi\n" + "je skip_wake\n" - // mburakov: Preserve stack of the current thread. - "call GThreadSaveStack\n" - "test %al, %al\n" - "je escape_wake\n" + // mburakov: Save current thread to prev field on the activated thread. + "mov %rdi, (%rsi)\n" - // mburakov: Recover registers of the new thread. - "mov (%rsp), %rdi\n" - "call GThreadRestoreRegisters\n" + // mburakov: Call swap with the prepared arguments. + "jmp GThreadSwapContexts\n" + "skip_wake:\n" + "ret\n" +#elif defined(__aarch64__) + // mburakov: Forward received argument as next_thread to swapping function, + // and load g_current_thread to pass it as a previous_thread. + "mov x1, x0\n" + "mrs x0, tpidr_el0\n" + "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" + "add x0, x0, :tprel_lo12_nc:g_current_thread\n" + "ldr x0, [x0]\n" - // mburakov: Update prev and current thread pointers. - "mov %fs:g_current_thread@tpoff, %rsi\n" - "mov %rdi, %fs:g_current_thread@tpoff\n" - "mov %rsi, (%rdi)\n" + // mburakov: Waking the currently active thread has no effect. + "cmp x0, x1\n" + "b.eq skip_wake\n" - // mburakov: Recover stack of the new thread. - "call GThreadRestoreStack\n" - "mov $1, %eax\n" + // mburakov: Save current thread to prev field on the activated thread. + "str x0, [x1]\n" - "escape_wake:\n" - "add $8, %rsp\n" - "ret\n"); -} + // mburakov: Call swap with the prepared arguments. + "b GThreadSwapContexts\n" + "skip_wake:\n" + "ret\n" +#endif +); -bool __attribute__((naked)) GThreadYield(void) { - __asm__( - // mburakov: Yielding on master thread has no effect. - "mov %fs:g_current_thread@tpoff, %rdi\n" - "mov (%rdi), %rax\n" - "test %rax, %rax\n" - "jne proceed\n" - "ret\n" - "proceed:\n" - "push %rax\n" - - // mburakov: Preserve registers of the current thread. - "mov %fs:g_current_thread@tpoff, %rdi\n" - "call GThreadSaveRegisters\n" - - // mburakov: Preserve stack of the current thread. - "call GThreadSaveStack\n" - "test %al, %al\n" - "je escape_yield\n" - - // mburakov: Recover registers of the new thread. - "mov (%rsp), %rdi\n" - "call GThreadRestoreRegisters\n" - - // mburakov: Update current thread pointer. - "mov %rdi, %fs:g_current_thread@tpoff\n" - - // mburakov: Recover stack of the new thread. - "call GThreadRestoreStack\n" - "mov $1, %eax\n" - - "escape_yield:\n" - "add $8, %rsp\n" - "ret\n"); -} +__asm__( + ".global GThreadYield\n" + ".type GThreadYield,@function\n" + "GThreadYield:\n" +#if defined(__x86_64__) + // mburakov: Load g_current_thread to forward it to swapping function as a + // previous_thread argument. + "mov %fs:g_current_thread@tpoff, %rdi\n" + + // mburakov: Yielding without a current thread has no effect. + "test %rdi, %rdi\n" + "jz skip_yield\n" + + // mburakov: Prev field of the current thread would be forwarded as a + // next_thread argument to swapping function. + "mov (%rdi), %rsi\n" + + // mburakov: Yielding on the main thread (that does not have prev field + // assigned) has no effect. Otherwise swap with the prepared arguments. + "test %rsi, %rsi\n" + "jz skip_yield\n" + "jnz GThreadSwapContexts\n" + "skip_yield:\n" + "ret\n" +#elif defined(__aarch64__) + // mburakov: Load g_current_thread to forward it to swapping function as a + // previous_thread argument. + "mrs x0, tpidr_el0\n" + "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" + "add x0, x0, :tprel_lo12_nc:g_current_thread\n" + "ldr x0, [x0]\n" + + // mburakov: Yielding without a current thread has no effect. + "cbz x0, skip_yield\n" + + // mburakov: Prev field of the current thread would be forwarded as a + // next_thread argument to swapping function. + "ldr x1, [x0]\n" + + // mburakov: Yielding on the main thread (that does not have prev field + // assigned) has no effect. Otherwise swap with the prepared arguments. + "cbnz x1, GThreadSwapContexts\n" + "skip_yield:\n" + "ret\n" +#endif +); void GThreadDestroy(struct GThread* thread) { if (thread == &g_master_thread || thread == g_current_thread) { |