diff options
| -rw-r--r-- | gthread.c | 418 | 
1 files changed, 290 insertions, 128 deletions
| @@ -22,17 +22,28 @@  #include <stdlib.h>  #include <string.h> +#if !defined(__x86_64__) && !defined(__aarch64__) +#error Unsupported architecture! +#endif + +extern void GThreadTrampoline(void); +extern bool GThreadSwapContexts(struct GThread* previous_thread, +                                struct GThread* next_thread); +  struct GThread {    struct GThread* prev;    struct { -    uint64_t rbx;  // 0x08 -    uint64_t rbp;  // 0x10 -    uint64_t rsp;  // 0x18 -    uint64_t r12;  // 0x20 -    uint64_t r13;  // 0x28 -    uint64_t r14;  // 0x30 -    uint64_t r15;  // 0x38 +    uint64_t sp;  // 0x08 +#if defined(__x86_64__) +    // mburakov: Frame pointer is saved on the stack. +    uint64_t rbx;   // 0x10 +    uint64_t r[4];  // 0x18 +#elif defined(__aarch64__) +    // mburakov: Frame pointer and link register are saved on the stack. +    uint64_t x[10];  // 0x10 +    uint64_t d[8];   // 0x60 +#endif    } registers;    struct { @@ -60,15 +71,29 @@ static struct GThread* GThreadAlloc(size_t stack_alloc) {    return thread;  } -static void __attribute__((naked)) Trampoline(void) { -  __asm__( -      "pop %rdi\n" -      "ret\n"); -} +__asm__( +    // TODO(mburakov): Compiling for Aarch64 yields invalid pointer to +    // GThreadTrampoline function in GThreadCreateImpl call below unless the +    // former is marked with dot global... Why??? +    ".global GThreadTrampoline\n" +    ".type GThreadTrampoline,@function\n" +    "GThreadTrampoline:\n" +#if defined(__x86_64__) +    "pop %rdi\n" +    "pop %rax\n" +    "push $0\n" +    "push $0\n" +    "jmp *%rax\n" +#elif defined(__aarch64__) +    "ldp x0, x1, [sp], #16\n" +    "mov lr, 0\n" +    "br x1\n" +#endif +);  static struct GThread* __attribute__((used))  GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) { -  void* stack_data[] = {NULL, (void*)Trampoline, user, (void*)proc, NULL}; +  void* stack_data[] = {NULL, (void*)GThreadTrampoline, user, (void*)proc};    struct GThread* thread = GThreadAlloc(sizeof(stack_data));    if (!thread) {      return NULL; @@ -80,151 +105,288 @@ GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) {      g_master_thread.stack.tip = stack_tip;    }    thread->stack.tip = stack_tip; -  thread->registers.rsp = stack_tip - sizeof(stack_data); +  thread->registers.sp = stack_tip - sizeof(stack_data);    memcpy(thread->stack.data, stack_data, sizeof(stack_data));    return thread;  } -static bool __attribute__((used, naked)) -GThreadSaveRegisters(struct GThread* thread) { -  __asm__( -      "pop %rax\n" -      "mov %rbx, 0x08(%rdi)\n" -      "mov %rbp, 0x10(%rdi)\n" -      "mov %rsp, 0x18(%rdi)\n" -      "mov %r12, 0x20(%rdi)\n" -      "mov %r13, 0x28(%rdi)\n" -      "mov %r14, 0x30(%rdi)\n" -      "mov %r15, 0x38(%rdi)\n" -      "push %rax\n" -      "ret\n"); -} - -static bool __attribute__((used)) GThreadSaveStack(struct GThread* thread) { -  if (thread->registers.rsp >= thread->stack.tip) { +static bool __attribute__((used)) +GThreadVerifyStack(struct GThread* thread, uint64_t stack_tip) { +  if (thread->stack.tip <= stack_tip) {      // mburakov: Stack tip is down the stack, no need to save anything.      return true;    } -  size_t stack_size = thread->stack.tip - thread->registers.rsp; -  if (thread->stack.alloc < stack_size) { -    free(thread->stack.data); -    thread->stack.data = malloc(stack_size); -    thread->stack.alloc = stack_size; -    if (!thread->stack.data) { -      thread->stack.alloc = 0; -      return false; -    } +  size_t stack_alloc = thread->stack.tip - stack_tip; +  if (stack_alloc <= thread->stack.alloc) { +    // mburakov: All good, stack will fit into current buffer. +    return true; +  } +  free(thread->stack.data); +  thread->stack.data = malloc(stack_alloc); +  if (!thread->stack.data) { +    thread->stack.alloc = 0; +    return false;    } -  memcpy(thread->stack.data, (void*)thread->registers.rsp, stack_size); +  thread->stack.alloc = stack_alloc;    return true;  } -static void __attribute__((used, naked)) -GThreadRestoreRegisters(struct GThread* thread) { -  __asm__( -      "pop %rax\n" -      "mov 0x08(%rdi), %rbx\n" -      "mov 0x10(%rdi), %rbp\n" -      "mov 0x18(%rdi), %rsp\n" -      "mov 0x20(%rdi), %r12\n" -      "mov 0x28(%rdi), %r13\n" -      "mov 0x30(%rdi), %r14\n" -      "mov 0x38(%rdi), %r15\n" -      "push %rax\n" -      "ret\n"); +static void __attribute__((used)) GThreadSaveStack(struct GThread* thread) { +  if (thread->stack.tip > thread->registers.sp) { +    size_t stack_size = thread->stack.tip - thread->registers.sp; +    memcpy(thread->stack.data, (void*)thread->registers.sp, stack_size); +  }  }  static void __attribute__((used)) GThreadRestoreStack(struct GThread* thread) { -  if (thread->registers.rsp >= thread->stack.tip) { -    // mburakov: Stack tip is down the stack, no need to restore anything. -    return; +  if (thread->stack.tip > thread->registers.sp) { +    size_t stack_size = thread->stack.tip - thread->registers.sp; +    memcpy((void*)thread->registers.sp, thread->stack.data, stack_size);    } -  size_t stack_size = thread->stack.tip - thread->registers.rsp; -  memcpy((void*)thread->registers.rsp, thread->stack.data, stack_size);  } +__asm__( +    "GThreadSwapContexts:\n" +#if defined(__x86_64__) +    // mburakov: Preserve frame pointer. Not really needed, but the stack has to +    // be aligned on 16 bytes for the magical mmx leprechaun anyway. +    "push %rbp\n" +    "mov %rsp, %rbp\n" + +    // mburakov: Preserve function arguments, they would be overwritten by the +    // upcoming thread storage size verification function call. +    "push %rdi\n" +    "push %rsi\n" +    "mov %rbp, %rsi\n" +    "call GThreadVerifyStack\n" +    "pop %rsi\n" +    "pop %rdi\n" +    "test %al, %al\n" +    "je escape_swap\n" + +    // mburakov: Update g_current_thread with the the passed next_thread. +    "mov %rsi, %fs:g_current_thread@tpoff\n" + +    // mburakov: Preserve callee-saved registers of the previous_thread +    // according to the x86-64 ABI requirements. There's no need to preserve +    // frame pointer because it would be saved together with the rest of the +    // previous_thread stack below. +    "mov %rsp, 0x08(%rdi)\n" +    "mov %rbx, 0x10(%rdi)\n" +    "mov %r12, 0x18(%rdi)\n" +    "mov %r13, 0x20(%rdi)\n" +    "mov %r14, 0x28(%rdi)\n" +    "mov %r15, 0x30(%rdi)\n" + +    // mburakov: Preserve stack of the previous_thread. Use callee-saved +    // register to preserve next_thread argument during the call. +    "mov %rsi, %rbx\n" +    "call GThreadSaveStack\n" +    "mov %rbx, %rdi\n" + +    // mburakov: Recover callee-saved registers of the next_thread according to +    // the x86-64 ABI requirements. Frame pointer would be recovered together +    // with the rest of the next_thread stack below. +    "mov 0x08(%rdi), %rsp\n" +    "mov 0x10(%rdi), %rbx\n" +    "mov 0x18(%rdi), %r12\n" +    "mov 0x20(%rdi), %r13\n" +    "mov 0x28(%rdi), %r14\n" +    "mov 0x30(%rdi), %r15\n" + +    // mburakov: Restore stack of the next_thread. Do not use tailcall because +    // it's still necessary to restore frame pointer. +    "call GThreadRestoreStack\n" +    "mov $1, %al\n" + +    "escape_swap:\n" +    "pop %rbp\n" +    "ret\n" +#elif defined(__aarch64__) +    // mburakov: Preserve frame pointer and link register, they would be +    // overwritten by the upcoming function calls. +    "stp fp, lr, [sp, -16]!\n" +    "mov fp, sp\n" + +    // mburakov: Preserve function arguments, they would be overwritten by the +    // upcoming thread storage size verification function call. Stack tip +    // argument is adjusted on 16 bytes to compensate for saved function args. +    "stp x0, x1, [sp, -16]!\n" +    "add x1, sp, 16\n" +    "bl GThreadVerifyStack\n" +    "ldp x1, x2, [sp], 16\n" +    "tbz x0, 0, escape_swap\n" + +    // mburakov: Update g_current_thread with the the passed next_thread. +    "mrs x0, tpidr_el0\n" +    "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" +    "add x0, x0, :tprel_lo12_nc:g_current_thread\n" +    "str x2, [x0]\n" +    "mov x0, x1\n" + +    // mburakov: Preserve callee-saved registers of the previous_thread +    // according to the Aarch64 ABI requirements. There's no need to preserve +    // frame pointer and link registers because they would be saved together +    // with the rest of the previous_thread stack below. +    "mov x1, sp\n" +    "str x1, [x0, 0x08]\n" +    "stp x19, x20, [x0, 0x10]\n" +    "stp x21, x22, [x0, 0x20]\n" +    "stp x23, x24, [x0, 0x30]\n" +    "stp x25, x26, [x0, 0x40]\n" +    "stp x27, x28, [x0, 0x50]\n" +    "stp d8, d9, [x0, 0x60]\n" +    "stp d9, d11, [x0, 0x70]\n" +    "stp d12, d13, [x0, 0x80]\n" +    "stp d14, d15, [x0, 0x90]\n" + +    // mburakov: Preserve stack of the previous_thread. Use callee-saved +    // register to preserve next_thread argument during the call. +    "mov x19, x2\n" +    "bl GThreadSaveStack\n" +    "mov x0, x19\n" + +    // mburakov: Recover callee-saved registers of the next_thread according to +    // the Aarch64 ABI requirements. Frame pointer and link register would be +    // recovered together with the rest of the next_thread stack below. +    "ldr x1, [x0, 0x08]\n" +    "mov sp, x1\n" +    "ldp x19, x20, [x0, 0x10]\n" +    "ldp x21, x22, [x0, 0x20]\n" +    "ldp x23, x24, [x0, 0x30]\n" +    "ldp x25, x26, [x0, 0x40]\n" +    "ldp x27, x28, [x0, 0x50]\n" +    "ldp d8, d9, [x0, 0x60]\n" +    "ldp d9, d11, [x0, 0x70]\n" +    "ldp d12, d13, [x0, 0x80]\n" +    "ldp d14, d15, [x0, 0x90]\n" + +    // mburakov: Restore stack of the next_thread. Do not use tailcall because +    // it's still necessary to restore frame pointer and link register. +    "bl GThreadRestoreStack\n" +    "mov w0, 1\n" + +    "escape_swap:\n" +    "ldp fp, lr, [sp], 16\n" +    "ret\n" +#endif +); +  static void GThreadFree(struct GThread* thread) {    free(thread->stack.data);    free(thread);  } -struct GThread* __attribute__((naked)) -GThreadCreate(void (*proc)(void*), void* user) { -  __asm__( -      // mburakov: Save stack tip and pass it to the impl. Tip is measured from -      // outside of spawn function (8 bytes offset). On threads switching, stack -      // would be preserved starting from this point till stack pointer. -      "lea 8(%rsp), %rdx\n" -      "jmp GThreadCreateImpl\n"); -} +__asm__( +    // mburakov: Save stack tip and pass it to the impl. Tip is measured from +    // outside of spawn function. On threads switching, stack would be preserved +    // starting from this tip down to the effective stack pointer. +    ".global GThreadCreate\n" +    ".type GThreadCreate,@function\n" +    "GThreadCreate:\n" +#if defined(__x86_64__) +    // For x86-64 function calls return address is pushed to the stack, so it's +    // necessary to add 8 bytes offset to compensate. +    "lea 8(%rsp), %rdx\n" +    "jmp GThreadCreateImpl\n" +#elif defined(__aarch64__) +    // For Aarch64 function calls return address is saved in the link register, +    // so no further adjustments are needed. +    "mov x2, sp\n" +    "b GThreadCreateImpl\n" +#endif +); -bool __attribute__((naked)) GThreadWake(struct GThread* thread) { -  __asm__( -      // mburakov: Argument would be overwritten by upcoming function calls. -      "push %rdi\n" +__asm__( +    ".global GThreadWake\n" +    ".type GThreadWake,@function\n" +    "GThreadWake:\n" +#if defined(__x86_64__) +    // mburakov: Forward received argument as next_thread to swapping function, +    // and load g_current_thread to pass it as a previous_thread. +    "mov %rdi, %rsi\n" +    "mov %fs:g_current_thread@tpoff, %rdi\n" -      // mburakov: Preserve registers of the current thread. -      "mov %fs:g_current_thread@tpoff, %rdi\n" -      "call GThreadSaveRegisters\n" +    // mburakov: Waking the currently active thread has no effect. +    "cmp %rdi, %rsi\n" +    "je skip_wake\n" -      // mburakov: Preserve stack of the current thread. -      "call GThreadSaveStack\n" -      "test %al, %al\n" -      "je escape_wake\n" +    // mburakov: Save current thread to prev field on the activated thread. +    "mov %rdi, (%rsi)\n" -      // mburakov: Recover registers of the new thread. -      "mov (%rsp), %rdi\n" -      "call GThreadRestoreRegisters\n" +    // mburakov: Call swap with the prepared arguments. +    "jmp GThreadSwapContexts\n" +    "skip_wake:\n" +    "ret\n" +#elif defined(__aarch64__) +    // mburakov: Forward received argument as next_thread to swapping function, +    // and load g_current_thread to pass it as a previous_thread. +    "mov x1, x0\n" +    "mrs x0, tpidr_el0\n" +    "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" +    "add x0, x0, :tprel_lo12_nc:g_current_thread\n" +    "ldr x0, [x0]\n" -      // mburakov: Update prev and current thread pointers. -      "mov %fs:g_current_thread@tpoff, %rsi\n" -      "mov %rdi, %fs:g_current_thread@tpoff\n" -      "mov %rsi, (%rdi)\n" +    // mburakov: Waking the currently active thread has no effect. +    "cmp x0, x1\n" +    "b.eq skip_wake\n" -      // mburakov: Recover stack of the new thread. -      "call GThreadRestoreStack\n" -      "mov $1, %eax\n" +    // mburakov: Save current thread to prev field on the activated thread. +    "str x0, [x1]\n" -      "escape_wake:\n" -      "add $8, %rsp\n" -      "ret\n"); -} +    // mburakov: Call swap with the prepared arguments. +    "b GThreadSwapContexts\n" +    "skip_wake:\n" +    "ret\n" +#endif +); -bool __attribute__((naked)) GThreadYield(void) { -  __asm__( -      // mburakov: Yielding on master thread has no effect. -      "mov %fs:g_current_thread@tpoff, %rdi\n" -      "mov (%rdi), %rax\n" -      "test %rax, %rax\n" -      "jne proceed\n" -      "ret\n" -      "proceed:\n" -      "push %rax\n" - -      // mburakov: Preserve registers of the current thread. -      "mov %fs:g_current_thread@tpoff, %rdi\n" -      "call GThreadSaveRegisters\n" - -      // mburakov: Preserve stack of the current thread. -      "call GThreadSaveStack\n" -      "test %al, %al\n" -      "je escape_yield\n" - -      // mburakov: Recover registers of the new thread. -      "mov (%rsp), %rdi\n" -      "call GThreadRestoreRegisters\n" - -      // mburakov: Update current thread pointer. -      "mov %rdi, %fs:g_current_thread@tpoff\n" - -      // mburakov: Recover stack of the new thread. -      "call GThreadRestoreStack\n" -      "mov $1, %eax\n" - -      "escape_yield:\n" -      "add $8, %rsp\n" -      "ret\n"); -} +__asm__( +    ".global GThreadYield\n" +    ".type GThreadYield,@function\n" +    "GThreadYield:\n" +#if defined(__x86_64__) +    // mburakov: Load g_current_thread to forward it to swapping function as a +    // previous_thread argument. +    "mov %fs:g_current_thread@tpoff, %rdi\n" + +    // mburakov: Yielding without a current thread has no effect. +    "test %rdi, %rdi\n" +    "jz skip_yield\n" + +    // mburakov: Prev field of the current thread would be forwarded as a +    // next_thread argument to swapping function. +    "mov (%rdi), %rsi\n" + +    // mburakov: Yielding on the main thread (that does not have prev field +    // assigned) has no effect. Otherwise swap with the prepared arguments. +    "test %rsi, %rsi\n" +    "jz skip_yield\n" +    "jnz GThreadSwapContexts\n" +    "skip_yield:\n" +    "ret\n" +#elif defined(__aarch64__) +    // mburakov: Load g_current_thread to forward it to swapping function as a +    // previous_thread argument. +    "mrs x0, tpidr_el0\n" +    "add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n" +    "add x0, x0, :tprel_lo12_nc:g_current_thread\n" +    "ldr x0, [x0]\n" + +    // mburakov: Yielding without a current thread has no effect. +    "cbz x0, skip_yield\n" + +    // mburakov: Prev field of the current thread would be forwarded as a +    // next_thread argument to swapping function. +    "ldr x1, [x0]\n" + +    // mburakov: Yielding on the main thread (that does not have prev field +    // assigned) has no effect. Otherwise swap with the prepared arguments. +    "cbnz x1, GThreadSwapContexts\n" +    "skip_yield:\n" +    "ret\n" +#endif +);  void GThreadDestroy(struct GThread* thread) {    if (thread == &g_master_thread || thread == g_current_thread) { | 
