1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
|
/*
* Copyright (C) 2023 Mikhail Burakov. This file is part of toolbox.
*
* toolbox is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* toolbox is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with toolbox. If not, see <https://www.gnu.org/licenses/>.
*/
#include "gthread.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#if !defined(__x86_64__) && !defined(__aarch64__)
#error Unsupported architecture!
#endif
extern void GThreadTrampoline(void);
extern bool GThreadSwapContexts(struct GThread* previous_thread,
struct GThread* next_thread);
struct GThread {
struct GThread* prev;
struct {
uint64_t sp; // 0x08
#if defined(__x86_64__)
// mburakov: Frame pointer is saved on the stack.
uint64_t rbx; // 0x10
uint64_t r[4]; // 0x18
#elif defined(__aarch64__)
// mburakov: Frame pointer and link register are saved on the stack.
uint64_t x[10]; // 0x10
uint64_t d[8]; // 0x60
#endif
} registers;
struct {
uint64_t tip;
size_t alloc;
void* data;
} stack;
};
static _Thread_local struct GThread g_master_thread;
static _Thread_local struct GThread* __attribute__((used)) g_current_thread;
static _Thread_local size_t g_threads_count;
static struct GThread* GThreadAlloc(size_t stack_alloc) {
struct GThread* thread = malloc(sizeof(struct GThread));
if (!thread) {
return NULL;
}
thread->stack.data = malloc(stack_alloc);
thread->stack.alloc = stack_alloc;
if (!thread->stack.data) {
free(thread);
return NULL;
}
return thread;
}
__asm__(
// TODO(mburakov): Compiling for Aarch64 yields invalid pointer to
// GThreadTrampoline function in GThreadCreateImpl call below unless the
// former is marked with dot global... Why???
".global GThreadTrampoline\n"
".type GThreadTrampoline,@function\n"
"GThreadTrampoline:\n"
#if defined(__x86_64__)
"pop %rdi\n"
"pop %rax\n"
"push $0\n"
"push $0\n"
"jmp *%rax\n"
#elif defined(__aarch64__)
"ldp x0, x1, [sp], #16\n"
"mov lr, 0\n"
"br x1\n"
#endif
);
static struct GThread* __attribute__((used))
GThreadCreateImpl(void (*proc)(void*), void* user, uint64_t stack_tip) {
void* stack_data[] = {NULL, (void*)GThreadTrampoline, user, (void*)proc};
struct GThread* thread = GThreadAlloc(sizeof(stack_data));
if (!thread) {
return NULL;
}
if (!g_threads_count++) {
g_current_thread = &g_master_thread;
}
if (stack_tip > g_master_thread.stack.tip) {
g_master_thread.stack.tip = stack_tip;
}
thread->stack.tip = stack_tip;
thread->registers.sp = stack_tip - sizeof(stack_data);
memcpy(thread->stack.data, stack_data, sizeof(stack_data));
return thread;
}
static bool __attribute__((used))
GThreadVerifyStack(struct GThread* thread, uint64_t stack_tip) {
if (thread->stack.tip <= stack_tip) {
// mburakov: Stack tip is down the stack, no need to save anything.
return true;
}
size_t stack_alloc = thread->stack.tip - stack_tip;
if (stack_alloc <= thread->stack.alloc) {
// mburakov: All good, stack will fit into current buffer.
return true;
}
free(thread->stack.data);
thread->stack.data = malloc(stack_alloc);
if (!thread->stack.data) {
thread->stack.alloc = 0;
return false;
}
thread->stack.alloc = stack_alloc;
return true;
}
static void __attribute__((used)) GThreadSaveStack(struct GThread* thread) {
if (thread->stack.tip > thread->registers.sp) {
size_t stack_size = thread->stack.tip - thread->registers.sp;
memcpy(thread->stack.data, (void*)thread->registers.sp, stack_size);
}
}
static void __attribute__((used)) GThreadRestoreStack(struct GThread* thread) {
if (thread->stack.tip > thread->registers.sp) {
size_t stack_size = thread->stack.tip - thread->registers.sp;
memcpy((void*)thread->registers.sp, thread->stack.data, stack_size);
}
}
__asm__(
"GThreadSwapContexts:\n"
#if defined(__x86_64__)
// mburakov: Preserve frame pointer. Not really needed, but the stack has to
// be aligned on 16 bytes for the magical mmx leprechaun anyway.
"push %rbp\n"
"mov %rsp, %rbp\n"
// mburakov: Preserve function arguments, they would be overwritten by the
// upcoming thread storage size verification function call.
"push %rdi\n"
"push %rsi\n"
"mov %rbp, %rsi\n"
"call GThreadVerifyStack\n"
"pop %rsi\n"
"pop %rdi\n"
"test %al, %al\n"
"je escape_swap\n"
// mburakov: Update g_current_thread with the the passed next_thread.
"mov %rsi, %fs:g_current_thread@tpoff\n"
// mburakov: Preserve callee-saved registers of the previous_thread
// according to the x86-64 ABI requirements. There's no need to preserve
// frame pointer because it would be saved together with the rest of the
// previous_thread stack below.
"mov %rsp, 0x08(%rdi)\n"
"mov %rbx, 0x10(%rdi)\n"
"mov %r12, 0x18(%rdi)\n"
"mov %r13, 0x20(%rdi)\n"
"mov %r14, 0x28(%rdi)\n"
"mov %r15, 0x30(%rdi)\n"
// mburakov: Preserve stack of the previous_thread. Use callee-saved
// register to preserve next_thread argument during the call.
"mov %rsi, %rbx\n"
"call GThreadSaveStack\n"
"mov %rbx, %rdi\n"
// mburakov: Recover callee-saved registers of the next_thread according to
// the x86-64 ABI requirements. Frame pointer would be recovered together
// with the rest of the next_thread stack below.
"mov 0x08(%rdi), %rsp\n"
"mov 0x10(%rdi), %rbx\n"
"mov 0x18(%rdi), %r12\n"
"mov 0x20(%rdi), %r13\n"
"mov 0x28(%rdi), %r14\n"
"mov 0x30(%rdi), %r15\n"
// mburakov: Restore stack of the next_thread. Do not use tailcall because
// it's still necessary to restore frame pointer.
"call GThreadRestoreStack\n"
"mov $1, %al\n"
"escape_swap:\n"
"pop %rbp\n"
"ret\n"
#elif defined(__aarch64__)
// mburakov: Preserve frame pointer and link register, they would be
// overwritten by the upcoming function calls.
"stp fp, lr, [sp, -16]!\n"
"mov fp, sp\n"
// mburakov: Preserve function arguments, they would be overwritten by the
// upcoming thread storage size verification function call. Stack tip
// argument is adjusted on 16 bytes to compensate for saved function args.
"stp x0, x1, [sp, -16]!\n"
"add x1, sp, 16\n"
"bl GThreadVerifyStack\n"
"ldp x1, x2, [sp], 16\n"
"tbz x0, 0, escape_swap\n"
// mburakov: Update g_current_thread with the the passed next_thread.
"mrs x0, tpidr_el0\n"
"add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
"add x0, x0, :tprel_lo12_nc:g_current_thread\n"
"str x2, [x0]\n"
"mov x0, x1\n"
// mburakov: Preserve callee-saved registers of the previous_thread
// according to the Aarch64 ABI requirements. There's no need to preserve
// frame pointer and link registers because they would be saved together
// with the rest of the previous_thread stack below.
"mov x1, sp\n"
"str x1, [x0, 0x08]\n"
"stp x19, x20, [x0, 0x10]\n"
"stp x21, x22, [x0, 0x20]\n"
"stp x23, x24, [x0, 0x30]\n"
"stp x25, x26, [x0, 0x40]\n"
"stp x27, x28, [x0, 0x50]\n"
"stp d8, d9, [x0, 0x60]\n"
"stp d9, d11, [x0, 0x70]\n"
"stp d12, d13, [x0, 0x80]\n"
"stp d14, d15, [x0, 0x90]\n"
// mburakov: Preserve stack of the previous_thread. Use callee-saved
// register to preserve next_thread argument during the call.
"mov x19, x2\n"
"bl GThreadSaveStack\n"
"mov x0, x19\n"
// mburakov: Recover callee-saved registers of the next_thread according to
// the Aarch64 ABI requirements. Frame pointer and link register would be
// recovered together with the rest of the next_thread stack below.
"ldr x1, [x0, 0x08]\n"
"mov sp, x1\n"
"ldp x19, x20, [x0, 0x10]\n"
"ldp x21, x22, [x0, 0x20]\n"
"ldp x23, x24, [x0, 0x30]\n"
"ldp x25, x26, [x0, 0x40]\n"
"ldp x27, x28, [x0, 0x50]\n"
"ldp d8, d9, [x0, 0x60]\n"
"ldp d9, d11, [x0, 0x70]\n"
"ldp d12, d13, [x0, 0x80]\n"
"ldp d14, d15, [x0, 0x90]\n"
// mburakov: Restore stack of the next_thread. Do not use tailcall because
// it's still necessary to restore frame pointer and link register.
"bl GThreadRestoreStack\n"
"mov w0, 1\n"
"escape_swap:\n"
"ldp fp, lr, [sp], 16\n"
"ret\n"
#endif
);
static void GThreadFree(struct GThread* thread) {
free(thread->stack.data);
free(thread);
}
__asm__(
// mburakov: Save stack tip and pass it to the impl. Tip is measured from
// outside of spawn function. On threads switching, stack would be preserved
// starting from this tip down to the effective stack pointer.
".global GThreadCreate\n"
".type GThreadCreate,@function\n"
"GThreadCreate:\n"
#if defined(__x86_64__)
// For x86-64 function calls return address is pushed to the stack, so it's
// necessary to add 8 bytes offset to compensate.
"lea 8(%rsp), %rdx\n"
"jmp GThreadCreateImpl\n"
#elif defined(__aarch64__)
// For Aarch64 function calls return address is saved in the link register,
// so no further adjustments are needed.
"mov x2, sp\n"
"b GThreadCreateImpl\n"
#endif
);
__asm__(
".global GThreadWake\n"
".type GThreadWake,@function\n"
"GThreadWake:\n"
#if defined(__x86_64__)
// mburakov: Forward received argument as next_thread to swapping function,
// and load g_current_thread to pass it as a previous_thread.
"mov %rdi, %rsi\n"
"mov %fs:g_current_thread@tpoff, %rdi\n"
// mburakov: Waking the currently active thread has no effect.
"cmp %rdi, %rsi\n"
"je skip_wake\n"
// mburakov: Save current thread to prev field on the activated thread.
"mov %rdi, (%rsi)\n"
// mburakov: Call swap with the prepared arguments.
"jmp GThreadSwapContexts\n"
"skip_wake:\n"
"ret\n"
#elif defined(__aarch64__)
// mburakov: Forward received argument as next_thread to swapping function,
// and load g_current_thread to pass it as a previous_thread.
"mov x1, x0\n"
"mrs x0, tpidr_el0\n"
"add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
"add x0, x0, :tprel_lo12_nc:g_current_thread\n"
"ldr x0, [x0]\n"
// mburakov: Waking the currently active thread has no effect.
"cmp x0, x1\n"
"b.eq skip_wake\n"
// mburakov: Save current thread to prev field on the activated thread.
"str x0, [x1]\n"
// mburakov: Call swap with the prepared arguments.
"b GThreadSwapContexts\n"
"skip_wake:\n"
"ret\n"
#endif
);
__asm__(
".global GThreadYield\n"
".type GThreadYield,@function\n"
"GThreadYield:\n"
#if defined(__x86_64__)
// mburakov: Load g_current_thread to forward it to swapping function as a
// previous_thread argument.
"mov %fs:g_current_thread@tpoff, %rdi\n"
// mburakov: Yielding without a current thread has no effect.
"test %rdi, %rdi\n"
"jz skip_yield\n"
// mburakov: Prev field of the current thread would be forwarded as a
// next_thread argument to swapping function.
"mov (%rdi), %rsi\n"
// mburakov: Yielding on the main thread (that does not have prev field
// assigned) has no effect. Otherwise swap with the prepared arguments.
"test %rsi, %rsi\n"
"jz skip_yield\n"
"jnz GThreadSwapContexts\n"
"skip_yield:\n"
"ret\n"
#elif defined(__aarch64__)
// mburakov: Load g_current_thread to forward it to swapping function as a
// previous_thread argument.
"mrs x0, tpidr_el0\n"
"add x0, x0, :tprel_hi12:g_current_thread, lsl 12\n"
"add x0, x0, :tprel_lo12_nc:g_current_thread\n"
"ldr x0, [x0]\n"
// mburakov: Yielding without a current thread has no effect.
"cbz x0, skip_yield\n"
// mburakov: Prev field of the current thread would be forwarded as a
// next_thread argument to swapping function.
"ldr x1, [x0]\n"
// mburakov: Yielding on the main thread (that does not have prev field
// assigned) has no effect. Otherwise swap with the prepared arguments.
"cbnz x1, GThreadSwapContexts\n"
"skip_yield:\n"
"ret\n"
#endif
);
void GThreadDestroy(struct GThread* thread) {
if (thread == &g_master_thread || thread == g_current_thread) {
// mburakov: It is prohibited to cancel either master or current thread.
abort();
}
GThreadFree(thread);
if (!--g_threads_count) {
// mburakov: No more non-master threads. Cleanup master thread stack data.
g_master_thread.stack.tip = 0;
free(g_master_thread.stack.data);
g_master_thread.stack.alloc = 0;
}
}
|