6 #define CMDLINE_BUFFER_SIZE 256
\r
7 CMDLINE_BUFFER(CMDLINE_BUFFER_SIZE);
\r
9 #define POLLUTE_CACHE_SIZE (512 * 1024)
\r
11 #ifdef CONFIG_UART_OXPCIE952
\r
12 #define UART_BASE 0xe010
\r
14 #define UART_BASE 0x3f8
\r
16 #define UART_LSR 0x5
\r
17 #define UART_LSR_THRE 0x20
\r
18 #define UART_IDLE_LOOPS 100
\r
20 //uintstd in jailhouse way
\r
21 #define uint32_t u32
\r
22 #define uint64_t u64
\r
24 #define perror(FUNC) (printk(#FUNC))
\r
26 #define printf printk
\r
28 #define exit(SIG) { printk("exit with %d\n", SIG); asm volatile("hlt");}
\r
30 //-----Time-and-and-randomization-overrides-------------------------------
\r
31 static inline unsigned long time(unsigned long * seconds)
\r
33 u64 time_sec = tsc_read();
\r
34 if (seconds != NULL){
\r
35 (*seconds) = time_sec;
\r
41 * Tables of Maximally-Equidistributed Combined Lfsr Generators (1998)
\r
42 * by Pierre L'Ecuyer
\r
43 * taken from: http://stackoverflow.com/questions/1167253/implementation-of-rand
\r
45 static unsigned int z1 = 12345, z2 = 12345, z3 = 12345, z4 = 12345;
\r
47 static unsigned int lfsr113_Bits (void)
\r
50 b = ((z1 << 6) ^ z1) >> 13;
\r
51 z1 = ((z1 & 4294967294U) << 18) ^ b;
\r
52 b = ((z2 << 2) ^ z2) >> 27;
\r
53 z2 = ((z2 & 4294967288U) << 2) ^ b;
\r
54 b = ((z3 << 13) ^ z3) >> 21;
\r
55 z3 = ((z3 & 4294967280U) << 7) ^ b;
\r
56 b = ((z4 << 3) ^ z4) >> 12;
\r
57 z4 = ((z4 & 4294967168U) << 13) ^ b;
\r
58 return (z1 ^ z2 ^ z3 ^ z4);
\r
61 static unsigned int rand(void)
\r
63 return lfsr113_Bits();
\r
66 static void srand(unsigned int seed)
\r
68 //seed into z1 z2 z3 z4
\r
69 z1 = z2 = z3 = z4 = seed;
\r
71 //-END:-Time-and-and-randomization-overrides-------------------------------
\r
73 //-----Assertion-overrides-------------------------------------------------
\r
77 # define assert(EX) (void)((EX) || (__assert (#EX, __FILE__, __LINE__),0))
\r
80 static inline void __assert(const char *msg, const char *file, int line)
\r
82 printk("Assertion %s in %s:%s failed.\n", msg, file, line);
\r
85 //-END:-Assertion-overrides-------------------------------------------------
\r
87 //------Threads-overrides---------------------------------------------------
\r
94 typedef u32 pthread_t;
\r
95 typedef u32 pthread_barrier_t;
\r
96 typedef u32 pthread_attr_t;
\r
98 static inline void CPU_ZERO(cpu_set_t *set) { set->bits = 0; set->count = 0; }
\r
99 static inline void CPU_SET(int cpu, cpu_set_t *set) { set->bits |= (1 << cpu); set->count++; }
\r
100 static inline void CPU_CLR(int cpu, cpu_set_t *set) { set->bits ^= (1 << cpu); set->count--; }
\r
101 static inline int CPU_ISSET(int cpu, cpu_set_t *set) { return (set->bits & (1 << cpu)); }
\r
102 static inline int CPU_COUNT(cpu_set_t *set) { return (int) set->count; }
\r
104 static inline int pthread_setaffinity_np(pthread_t id, unsigned long affty, cpu_set_t * set) {return 0;}
\r
106 static inline void pthread_barrier_init(pthread_barrier_t * bar, pthread_attr_t * attr, unsigned count) {}
\r
107 static inline void pthread_barrier_wait(pthread_barrier_t * bar) {}
\r
108 static inline void pthread_barrier_destroy(pthread_barrier_t * bar) {}
\r
109 static inline void pthread_join(pthread_t id, void * smth){}
\r
111 static int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
\r
112 void *(*start_routine) (void *), void *arg)
\r
114 start_routine(arg);
\r
118 //-END:-Threads-overrides---------------------------------------------------
\r
120 //just remove fflush
\r
121 #define fflush (void)sizeof
\r
126 #else //END JAILHOUSE
\r
128 #define _GNU_SOURCE
\r
129 #include <assert.h>
\r
130 #include <pthread.h>
\r
132 #include <stdbool.h>
\r
133 #include <stdint.h>
\r
135 #include <stdlib.h>
\r
136 #include <string.h>
\r
137 #include <sys/types.h>
\r
139 #include <unistd.h>
\r
143 #define STRINGIFY(val) #val
\r
144 #define TOSTRING(val) STRINGIFY(val)
\r
145 #define LOC __FILE__ ":" TOSTRING(__LINE__) ": "
\r
147 #define CHECK(cmd) ({ int ret = (cmd); if (ret == -1) { perror(LOC #cmd); exit(1); }; ret; })
\r
148 #define CHECKPTR(cmd) ({ void *ptr = (cmd); if (ptr == (void*)-1) { perror(LOC #cmd); exit(1); }; ptr; })
\r
149 #define CHECKNULL(cmd) ({ typeof(cmd) ptr = (cmd); if (ptr == NULL) { perror(LOC #cmd); exit(1); }; ptr; })
\r
150 //#define CHECKFGETS(s, size, stream) ({ void *ptr = fgets(s, size, stream); if (ptr == NULL) { if (feof(stream)) fprintf(stderr, LOC "fgets(" #s "): Unexpected end of stream\n"); else perror(LOC "fgets(" #s ")"); exit(1); }; ptr; })
\r
151 #define CHECKTRUE(bool, msg) ({ if (!(bool)) { printf("Error: " msg "\n"); exit(1); }; })
\r
156 unsigned num_threads;
\r
157 unsigned read_count;
\r
161 bool use_cycles; /* instead of ns */
\r
166 uint32_t dummy[(64 - sizeof(struct s*))/sizeof(uint32_t)];
\r
169 _Static_assert(sizeof(struct s) == 64, "Struct size differs from cacheline size");
\r
178 #define MRS32(reg) ({ uint32_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })
\r
179 #define MRS64(reg) ({ uint64_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })
\r
181 #define MSR(reg, v) ({ asm volatile ("msr " # reg ",%0" :: "r" (v)); })
\r
183 static void ccntr_init(void)
\r
185 MSR(PMCNTENSET_EL0, 0x80000000);
\r
186 MSR(PMCR_EL0, MRS32(PMCR_EL0) | 1);
\r
189 static uint64_t ccntr_get(void)
\r
191 return MRS64(PMCCNTR_EL0);
\r
194 static void ccntr_init(void) {}
\r
196 static uint64_t ccntr_get(void)
\r
199 //taken from lib/timing.c
\r
203 asm volatile("rdtsc" : "=a" (lo), "=d" (hi));
\r
204 return (u64)lo | (((u64)hi) << 32);
\r
208 asm volatile("rdtsc" : "=A" (v));
\r
218 static uint64_t get_time(struct cfg *cfg)
\r
220 if (cfg->use_cycles == false) {
\r
227 clock_gettime(CLOCK_MONOTONIC, &t);
\r
228 return (uint64_t)t.tv_sec * 1000000000 + t.tv_nsec;
\r
232 return ccntr_get();
\r
236 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
\r
238 static void prepare(struct s *array, unsigned size, bool sequential)
\r
241 int count = size / sizeof(struct s);
\r
244 for (i = 0; i < count - 1; i++){
\r
245 array[i].ptr = &array[i+1];
\r
247 array[count - 1].ptr = &array[0];
\r
249 memset(array, 0, size);
\r
250 struct s *p = &array[0];
\r
251 for (i = 0; i < count - 1; i++) {
\r
252 p->ptr = (struct s*)1; /* Mark as occupied to avoid self-loop */
\r
253 for (j = rand() % count;
\r
254 array[j].ptr != NULL;
\r
255 j = (j >= count) ? 0 : j+1);
\r
256 p = p->ptr = &array[j];
\r
258 p->ptr = &array[0];
\r
262 static void do_read(struct s *array, unsigned reads)
\r
264 unsigned i = reads / 32;
\r
265 volatile struct s *p = &array[0];
\r
267 p = p->ptr; /* 0 */
\r
268 p = p->ptr; /* 1 */
\r
269 p = p->ptr; /* 2 */
\r
270 p = p->ptr; /* 3 */
\r
271 p = p->ptr; /* 4 */
\r
272 p = p->ptr; /* 5 */
\r
273 p = p->ptr; /* 6 */
\r
274 p = p->ptr; /* 7 */
\r
275 p = p->ptr; /* 8 */
\r
276 p = p->ptr; /* 9 */
\r
277 p = p->ptr; /* 10 */
\r
278 p = p->ptr; /* 11 */
\r
279 p = p->ptr; /* 12 */
\r
280 p = p->ptr; /* 13 */
\r
281 p = p->ptr; /* 14 */
\r
282 p = p->ptr; /* 15 */
\r
283 p = p->ptr; /* 16 */
\r
284 p = p->ptr; /* 17 */
\r
285 p = p->ptr; /* 18 */
\r
286 p = p->ptr; /* 19 */
\r
287 p = p->ptr; /* 20 */
\r
288 p = p->ptr; /* 21 */
\r
289 p = p->ptr; /* 22 */
\r
290 p = p->ptr; /* 23 */
\r
291 p = p->ptr; /* 24 */
\r
292 p = p->ptr; /* 25 */
\r
293 p = p->ptr; /* 26 */
\r
294 p = p->ptr; /* 27 */
\r
295 p = p->ptr; /* 28 */
\r
296 p = p->ptr; /* 29 */
\r
297 p = p->ptr; /* 30 */
\r
298 p = p->ptr; /* 31 */
\r
302 static void do_write(struct s *array, unsigned accesses, unsigned ofs)
\r
304 unsigned i = accesses / 32;
\r
305 volatile struct s *p = &array[0];
\r
307 p->dummy[ofs]++; p = p->ptr; /* 0 */
\r
308 p->dummy[ofs]++; p = p->ptr; /* 1 */
\r
309 p->dummy[ofs]++; p = p->ptr; /* 2 */
\r
310 p->dummy[ofs]++; p = p->ptr; /* 3 */
\r
311 p->dummy[ofs]++; p = p->ptr; /* 4 */
\r
312 p->dummy[ofs]++; p = p->ptr; /* 5 */
\r
313 p->dummy[ofs]++; p = p->ptr; /* 6 */
\r
314 p->dummy[ofs]++; p = p->ptr; /* 7 */
\r
315 p->dummy[ofs]++; p = p->ptr; /* 8 */
\r
316 p->dummy[ofs]++; p = p->ptr; /* 9 */
\r
317 p->dummy[ofs]++; p = p->ptr; /* 10 */
\r
318 p->dummy[ofs]++; p = p->ptr; /* 11 */
\r
319 p->dummy[ofs]++; p = p->ptr; /* 12 */
\r
320 p->dummy[ofs]++; p = p->ptr; /* 13 */
\r
321 p->dummy[ofs]++; p = p->ptr; /* 14 */
\r
322 p->dummy[ofs]++; p = p->ptr; /* 15 */
\r
323 p->dummy[ofs]++; p = p->ptr; /* 16 */
\r
324 p->dummy[ofs]++; p = p->ptr; /* 17 */
\r
325 p->dummy[ofs]++; p = p->ptr; /* 18 */
\r
326 p->dummy[ofs]++; p = p->ptr; /* 19 */
\r
327 p->dummy[ofs]++; p = p->ptr; /* 20 */
\r
328 p->dummy[ofs]++; p = p->ptr; /* 21 */
\r
329 p->dummy[ofs]++; p = p->ptr; /* 22 */
\r
330 p->dummy[ofs]++; p = p->ptr; /* 23 */
\r
331 p->dummy[ofs]++; p = p->ptr; /* 24 */
\r
332 p->dummy[ofs]++; p = p->ptr; /* 25 */
\r
333 p->dummy[ofs]++; p = p->ptr; /* 26 */
\r
334 p->dummy[ofs]++; p = p->ptr; /* 27 */
\r
335 p->dummy[ofs]++; p = p->ptr; /* 28 */
\r
336 p->dummy[ofs]++; p = p->ptr; /* 29 */
\r
337 p->dummy[ofs]++; p = p->ptr; /* 30 */
\r
338 p->dummy[ofs]++; p = p->ptr; /* 31 */
\r
342 struct benchmark_thread {
\r
345 uint64_t result_integral;
\r
346 uint64_t result_fractional;
\r
350 pthread_barrier_t barrier;
\r
352 struct s array[MAX_CPUS][64*0x100000/sizeof(struct s)] __attribute__ ((aligned (2*1024*1024))) __attribute__ ((section (".bench-array")));
\r
354 struct s array[MAX_CPUS][64*0x100000/sizeof(struct s)] __attribute__ ((aligned (2*1024*1024)));
\r
358 static void *benchmark_thread(void *arg)
\r
360 struct benchmark_thread *me = arg;
\r
364 CPU_SET(me->cpu, &set);
\r
366 if (pthread_setaffinity_np(me->id, sizeof(set), &set) != 0) {
\r
367 perror("pthread_setaffinity_np");
\r
371 prepare(array[me->cpu], me->cfg->size, me->cfg->sequential);
\r
373 pthread_barrier_wait(&barrier);
\r
376 printf("CPU %d starts measurement\n", me->cpu);
\r
379 tic = get_time(me->cfg);
\r
380 if (me->cfg->write == false)
\r
381 do_read(array[me->cpu], me->cfg->read_count);
\r
383 do_write(array[me->cpu], me->cfg->read_count, me->cfg->ofs);
\r
385 tac = get_time(me->cfg);
\r
386 me->result_integral = (tac - tic) / me->cfg->read_count;
\r
387 me->result_fractional = (((tac - tic) * 1000) / me->cfg->read_count) % 1000;
\r
391 static void run_benchmark(struct cfg *cfg)
\r
393 struct benchmark_thread thread[MAX_CPUS];
\r
395 cpu_set_t set = cfg->cpu_set;
\r
396 pthread_barrier_init(&barrier, NULL, cfg->num_threads);
\r
397 for (i = 0; i < cfg->num_threads; i++) {
\r
398 thread[i].cfg = cfg;
\r
399 if (CPU_COUNT(&set) == 0) {
\r
403 for (j = 0; j < MAX_CPUS; j++) {
\r
404 if (CPU_ISSET(j, &set)) {
\r
412 printf( "Running thread %d on CPU %d\n", i, thread[i].cpu);
\r
413 pthread_create(&thread[i].id, NULL, benchmark_thread, &thread[i]);
\r
416 for (i = 0; i < cfg->num_threads; i++) {
\r
417 pthread_join(thread[i].id, NULL);
\r
419 pthread_barrier_destroy(&barrier);
\r
421 printf("%d", cfg->size);
\r
422 for (i = 0; i < cfg->num_threads; i++) {
\r
423 //NOTE: Jailhouse is not able to print doubles.
\r
424 //printf("\t%#.3g", thread[i].result);
\r
425 printf("\t%lu.%03u", thread[i].result_integral, thread[i].result_fractional);
\r
433 const void * image_end;
\r
435 void inmate_main(void)
\r
437 int main(int argc, char *argv[])
\r
441 .sequential = true,
\r
444 .read_count = 0x2000000,
\r
447 .use_cycles = false, // i.e. use nanoseconds /
\r
451 unsigned long tsc_freq;
\r
453 printk_uart_base = UART_BASE;
\r
455 for (n = 0; n < UART_IDLE_LOOPS; n++)
\r
456 if (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))
\r
458 } while (n < UART_IDLE_LOOPS);
\r
460 printk("cmdline opts: '%s'\n", cmdline);
\r
461 comm_region->pm_timer_address = 0x408;
\r
463 cfg.read_count = cmdline_parse_int("-c", cfg.read_count);
\r
464 cfg.ofs = cmdline_parse_int("-o", cfg.ofs);
\r
465 cfg.sequential = !cmdline_parse_bool("-r");
\r
466 cfg.size = cmdline_parse_int("-s", cfg.size);
\r
467 if (cmdline_parse_bool("-t")) {
\r
468 printk("Threads are not supported. '-t' was ignored.\n");
\r
470 if (cmdline_parse_bool("-C")) {
\r
471 printk("CPU selection is not supported. '-C' was ignored.\n");
\r
473 cfg.write = cmdline_parse_bool("-w");
\r
474 cfg.use_cycles = cmdline_parse_bool("-y");
\r
475 //initialize timing
\r
476 tsc_freq = tsc_init();
\r
477 printk("Calibrated TSC frequency: %lu.%03u kHz\n", tsc_freq / 1000,
\r
480 u8 * start_memreg = (u8 *) array;
\r
481 u64 end_memreg_addr = (u64) &image_end;
\r
482 while ( ((u64) start_memreg )< end_memreg_addr) {
\r
483 //printk("%p\n",start_memreg);
\r
484 map_range(start_memreg, HUGE_PAGE_SIZE, MAP_CACHED);
\r
485 start_memreg += HUGE_PAGE_SIZE;
\r
489 #else //Linux param's parsing
\r
490 CPU_ZERO(&cfg.cpu_set);
\r
493 while ((opt = getopt(argc, argv, "c:C:o:rs:t:wy")) != -1) {
\r
496 cfg.read_count = atol(optarg);
\r
499 cfg.ofs = atol(optarg);
\r
501 case 'r': // random //
\r
502 cfg.sequential = false;
\r
505 cfg.size = atol(optarg);
\r
506 assert(cfg.size <= sizeof(array[0]));
\r
509 cfg.num_threads = atol(optarg);
\r
512 CPU_SET(atol(optarg), &cfg.cpu_set);
\r
518 cfg.use_cycles = true;
\r
521 fprintf(stderr, "Usage: %s ... TODO\n", argv[0]);
\r
531 assert(cfg.ofs < ARRAY_SIZE(s.dummy));
\r
534 if (cfg.use_cycles)
\r
537 if (cfg.size != 0) {
\r
538 run_benchmark(&cfg);
\r
540 unsigned order, size, step;
\r
541 for (order = 10; order <= 24; order++) {
\r
542 for (step = 0; step < 2; step++) {
\r
548 run_benchmark(&cfg);
\r
556 comm_region->cell_state = JAILHOUSE_CELL_SHUT_DOWN;
\r