00001 #ifndef FRAY_H
00002 #define FRAY_H
00003 #define COMPILED_WITH_FRAME_POINTERS
00004 #define MAX_FRAY_MEMBERS 8
00005
00006 #define STACK_SIZE_64 4096
00007 #include <stdint.h>
00008
00009
00010 #if defined(DEBUG) || defined(FRAY_STATS)
00011 #include <stdio.h>
00012 #endif
00013 #define blue_doo(x) #x ": "
00014 #define voo_doo(x) blue_doo(x)
00015 #define at_line() voo_doo(__LINE__)
00016
00017 #ifdef __cplusplus
00018 #define EXTERN_C extern "C"
00019 #else
00020 #define EXTERN_C
00021 #endif
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifdef __cplusplus
00039 class fray_block {
00040 public:
00041 volatile int sched;
00042 volatile int alive;
00043 uint64_t stacks[STACK_SIZE_64*MAX_FRAY_MEMBERS];
00044 uint64_t * sptrs[MAX_FRAY_MEMBERS+1];
00045 volatile int s_i;
00046 int int64_per_stack;
00047 int err;
00048 void * data;
00049 #ifdef FRAY_STATS
00050 volatile int64_t num_yields;
00051 volatile int64_t hist[MAX_FRAY_MEMBERS];
00052 #endif
00053 fray_block() {
00054
00055 sptrs[0] = &(stacks[STACK_SIZE_64*MAX_FRAY_MEMBERS - 1]);
00056 sched = alive = 0;
00057 s_i = 0;
00058 err = 0;
00059 #ifdef FRAY_STATS
00060 num_yields = 0;
00061 for (int i = 0; i < MAX_FRAY_MEMBERS; i++) hist[i] = 0;
00062 #endif
00063 }
00064 };
00065 #else
00066 typedef struct {
00067 uint64_t stacks[STACK_SIZE_64*MAX_FRAY_MEMBERS];
00068 uint64_t * sptrs[MAX_FRAY_MEMBERS+1];
00069 volatile int sched;
00070 volatile int alive;
00071 volatile int s_i;
00072 int int64_per_stack;
00073 int err;
00074 void * data;
00075 } fray_block;
00076 #endif
00077
00078 extern fray_block * global_fb;
00079
00080 EXTERN_C void swap_frays(uint64_t **from, uint64_t **to);
00081
00082 static void fray_yield(fray_block *fb) {
00083 #ifdef DEBUG
00084 if (STACK_SIZE_64 >= 1024) {
00085 int i; for(i = 0; i < MAX_FRAY_MEMBERS; i++) fprintf(stderr, "%p ", fb->sptrs[i]); fprintf(stderr, "\n");
00086 fprintf(stderr, "yielding from %d... %p to %p\n", fb->s_i, fb->sptrs[fb->s_i], fb->sptrs[(fb->s_i+1 < fb->sched) ? (fb->s_i+1) : 0]);
00087 } else {
00088 fprintf(stderr, "fray.h fray_yield: fray stack too small to report full diagnostics\n");
00089 }
00090 #endif
00091 int now = fb->s_i;
00092 int next = (now + 1 < fb->sched) ? now + 1 : 0;
00093 fb->s_i = next;
00094 #ifdef FRAY_STATS
00095 fb->num_yields++;
00096 fb->hist[fb->sched-1]++;
00097 #endif
00098 swap_frays(&fb->sptrs[now], &fb->sptrs[next]);
00099 }
00100 static void fray_yield(fray_block *fb) __attribute__((always_inline));
00101
00102 typedef void (*voidfn)(fray_block *, int64_t);
00103
00104 static uint64_t fray_fetch_and_yield(fray_block *fb, const void *p) __attribute__((unused, always_inline));
00105 static uint64_t fray_fetch_and_yield(fray_block *fb, const void *p) {
00106 __builtin_prefetch(p,0,0);
00107 fray_yield(fb);
00108 return *((uint64_t *)p);
00109 }
00110 static void fray_prefetch_and_yield(fray_block *fb, const void *p) __attribute__((unused, always_inline));
00111 static void fray_prefetch_and_yield(fray_block *fb, const void *p) {
00112 __builtin_prefetch(p,0,0);
00113 fray_yield(fb);
00114 }
00115 static void fray_prefetch_L2_and_yield(fray_block *fb, const void *p) __attribute__((unused, always_inline));
00116 static void fray_prefetch_L2_and_yield(fray_block *fb, const void *p) {
00117 __builtin_prefetch(p,0,3);
00118 fray_yield(fb);
00119 }
00120 static void fray_prefetch_rw_and_yield(fray_block *fb, const void *p) __attribute__((unused, always_inline));
00121 static void fray_prefetch_rw_and_yield(fray_block *fb, const void *p) {
00122 __builtin_prefetch(p,1,1);
00123 fray_yield(fb);
00124 }
00125 static void stack_init(uint64_t *sp, voidfn f, uint64_t *arg0, uint64_t *arg1);
00126 static void start_fray(void *rdi, void *rsi, void *rdx, void *rcx, void *r8, void *r9,
00127 voidfn, void *arg0, void *arg1);
00128
00129 typedef enum regoff_t {
00130 _R15 = 0,
00131 _R14,
00132 _R13,
00133 _R12,
00134 _RBX,
00135 _RBP,
00136 _RPC,
00137 _OLD_BP,
00138 _ARG7,
00139 _ARG8,
00140 _ARG9,
00141 _REGOFF_N
00142 } regoff_t;
00143
00144 static void start_fray(void *rdi, void *rsi, void *rdx, void *rcx, void *r8, void *r9,
00145 voidfn f, void *arg0, void *arg1)
00146 {
00147 (*f)((fray_block *) arg0, (int64_t) arg1);
00148 }
00149
00150 static void stack_init(uint64_t *sp, voidfn f,
00151
00152 uint64_t *arg0,
00153 uint64_t *arg1) {
00154
00155 # define CAST(x) (uint64_t) x
00156 sp[_R15] = CAST(1);
00157 sp[_R14] = CAST(2);
00158 sp[_R13] = CAST(3);
00159 sp[_R12] = CAST(4);
00160 sp[_RBX] = CAST(5);
00161 sp[_RBP] = CAST(6);
00162 sp[_RPC] = (uint64_t) start_fray;
00163 uint64_t old_bp;
00164 asm("mov %%rbp, %0" : "=r" (old_bp));
00165 sp[_OLD_BP] = old_bp;
00166
00167 sp[_ARG7] = (uint64_t) f;
00168 sp[_ARG8] = (int64_t) arg0;
00169 sp[_ARG9] = (uint64_t) arg1;
00170 }
00171
00172 static int fray(fray_block *fb, voidfn fn, void * data, int sched) __attribute__((unused));
00173 static int fray(fray_block *fb, voidfn fn, void * data, int sched) {
00174 #ifdef DEBUG
00175 fprintf(stderr, "fray: %p %p %p %d \n", fb, fn, data, sched);
00176 #endif
00177 fb->int64_per_stack = STACK_SIZE_64;
00178 fb-> alive = fb->sched = (sched < MAX_FRAY_MEMBERS) ? sched : MAX_FRAY_MEMBERS;
00179 fb->data = data;
00180 fb->s_i = 0;
00181 fb->err = 0;
00182 int64_t i;
00183 for (i = 0; i < fb->alive; i++) {
00184 fb->sptrs[i] = &(fb->stacks[fb->int64_per_stack*(i+1) - _REGOFF_N]);
00185 stack_init(fb->sptrs[i], (voidfn)(fn), (uint64_t*)(fb),(uint64_t*)(i));
00186 }
00187 swap_frays(&fb->sptrs[MAX_FRAY_MEMBERS], &fb->sptrs[0]);
00188 return fb->err;
00189 }
00190
00191 static void fray_exit(fray_block *fb, int err) __attribute__((unused));
00192 static void fray_exit(fray_block *fb, int err) {
00193 fb->err = err;
00194 swap_frays(&fb->sptrs[fb->s_i], &fb->sptrs[MAX_FRAY_MEMBERS]);
00195 }
00196
00197 static void fray_return(fray_block *fb) __attribute__((unused));
00198 static void fray_return(fray_block *fb) {
00199 #ifdef DEBUG
00200 fprintf(stderr, "fray_return: fb->sptrs[MAX_FRAY_MEMBERS] = %p\n", fb->sptrs[MAX_FRAY_MEMBERS]);
00201 fprintf(stderr, "fray_return: %p %d, %d remain\n", fb, fb->s_i, fb->sched);
00202 #endif
00203 int n = --fb->alive;
00204 if (n) {
00205 int sched_last = --fb->sched;
00206 if (sched_last) {
00207 int s = fb->s_i;
00208 fb->sptrs[s] = fb->sptrs[sched_last];
00209 fb->sptrs[sched_last] = fb->sptrs[n];
00210 s = (s + 1 < sched_last) ? s + 1 : 0;
00211 fb->s_i = s;
00212 swap_frays(&fb->sptrs[fb->alive], &fb->sptrs[s]);
00213 }
00214 else {
00215 fb->sched = n;
00216 fb->sptrs[0] = fb->sptrs[n];
00217 swap_frays(&fb->sptrs[n], &fb->sptrs[0]);
00218 }
00219 }
00220 else fray_exit(fb, fb->err);
00221 }
00222
00223 static int fray_partial(fray_block *fb) __attribute__((unused));
00224 static int fray_partial(fray_block *fb) {
00225 return (fb->sched < MAX_FRAY_MEMBERS ? 1 : 0);
00226 }
00227
00228
00229
00230 static void fray_barrier(fray_block *fb) __attribute__((unused));
00231 #include <stdio.h>
00232 static void fray_barrier(fray_block *fb) {
00233 #ifdef DEBUG
00234 fprintf(stderr, "fray_barrier: barrier %d s_i %d sched %d \n", fb->barrier, fb->s_i, fb->sched);
00235 #endif
00236 int n = --fb->sched;
00237 if (n) {
00238 int s = fb->s_i;
00239 uint64_t * tmp = fb->sptrs[s];
00240 fb->sptrs[s] = fb->sptrs[n];
00241 fb->sptrs[n] = tmp;
00242 s = (s + 1 < fb->sched) ? s + 1 : 0;
00243 fb->s_i = s;
00244 swap_frays(&fb->sptrs[n], &fb->sptrs[s]);
00245 }
00246 else {
00247 fb->sched = fb->alive;
00248
00249 }
00250 }
00251 #ifdef FRAY_STATS
00252 static void fray_stats(fray_block *fb) __attribute__((unused));
00253 static void fray_stats(fray_block *fb) {
00254 fprintf(stderr, "fray stats: %ld yields; sched ", fb->num_yields);
00255 for (int i = 0; i < MAX_FRAY_MEMBERS; i++) {
00256 fprintf(stderr, "%ld ", fb->hist[i]);
00257 }
00258 fprintf(stderr, "\n");
00259 }
00260 #endif
00261 #endif // FRAY_H