--- /dev/null
+#ifdef JAILHOUSE\r
+\r
+#include <inmate.h>\r
+\r
+\r
+#define CMDLINE_BUFFER_SIZE 256\r
+CMDLINE_BUFFER(CMDLINE_BUFFER_SIZE);\r
+\r
+#define POLLUTE_CACHE_SIZE (512 * 1024)\r
+\r
+#ifdef CONFIG_UART_OXPCIE952\r
+#define UART_BASE 0xe010\r
+#else\r
+#define UART_BASE 0x3f8\r
+#endif\r
+#define UART_LSR 0x5\r
+#define UART_LSR_THRE 0x20\r
+#define UART_IDLE_LOOPS 100\r
+\r
+//uintstd in jailhouse way\r
+#define uint32_t u32\r
+#define uint64_t u64\r
+\r
+#define perror(FUNC) (printk(#FUNC))\r
+\r
+#define printf printk\r
+\r
+#define exit(SIG) { printk("exit with %d\n", SIG); asm volatile("hlt");}\r
+\r
+//-----Time-and-and-randomization-overrides-------------------------------\r
+static inline unsigned long time(unsigned long * seconds)\r
+{\r
+ return (*seconds) = tsc_read();\r
+}\r
+\r
+/*\r
+* Tables of Maximally-Equidistributed Combined Lfsr Generators (1998)\r
+* by Pierre L'Ecuyer\r
+* taken from: http://stackoverflow.com/questions/1167253/implementation-of-rand\r
+*/\r
+static unsigned int z1 = 12345, z2 = 12345, z3 = 12345, z4 = 12345;\r
+\r
+static unsigned int lfsr113_Bits (void)\r
+{\r
+ unsigned int b;\r
+ b = ((z1 << 6) ^ z1) >> 13;\r
+ z1 = ((z1 & 4294967294U) << 18) ^ b;\r
+ b = ((z2 << 2) ^ z2) >> 27;\r
+ z2 = ((z2 & 4294967288U) << 2) ^ b;\r
+ b = ((z3 << 13) ^ z3) >> 21;\r
+ z3 = ((z3 & 4294967280U) << 7) ^ b;\r
+ b = ((z4 << 3) ^ z4) >> 12;\r
+ z4 = ((z4 & 4294967168U) << 13) ^ b;\r
+ return (z1 ^ z2 ^ z3 ^ z4);\r
+}\r
+\r
+static unsigned int rand(void)\r
+{\r
+ return lfsr113_Bits();\r
+}\r
+\r
+static void srand(unsigned int seed)\r
+{\r
+ //seed into z1 z2 z3 z4\r
+ z1 = z2 = z3 = z4 = seed;\r
+}\r
+//-END:-Time-and-and-randomization-overrides-------------------------------\r
+\r
+//-----Assertion-overrides-------------------------------------------------\r
+#ifdef NDEBUG\r
+# define assert(EX)\r
+#else\r
+# define assert(EX) (void)((EX) || (__assert (#EX, __FILE__, __LINE__),0))\r
+#endif\r
+\r
+static inline void __assert(const char *msg, const char *file, int line)\r
+{\r
+ printk("Assertion %s in %s:%s failed.\n", msg, file, line);\r
+}\r
+//-END:-Assertion-overrides-------------------------------------------------\r
+\r
+//------Threads-overrides---------------------------------------------------\r
+ typedef struct\r
+ {\r
+ u8 count;\r
+ u8 bits;\r
+ }cpu_set_t;\r
+\r
+typedef u32 pthread_t;\r
+typedef u32 pthread_barrier_t;\r
+typedef u32 pthread_attr_t;\r
+\r
+static inline void CPU_ZERO(cpu_set_t *set) { set->bits = 0; set->count = 0; }\r
+static inline void CPU_SET(int cpu, cpu_set_t *set) { set->bits |= (1 << cpu); set->count++; }\r
+static inline void CPU_CLR(int cpu, cpu_set_t *set) { set->bits ^= (1 << cpu); set->count--; }\r
+static inline int CPU_ISSET(int cpu, cpu_set_t *set) { return (set->bits & (1 << cpu)); }\r
+static inline int CPU_COUNT(cpu_set_t *set) { return (int) set->count; }\r
+\r
+static inline int pthread_setaffinity_np(pthread_t id, unsigned long affty, cpu_set_t * set) {return 0;}\r
+\r
+static inline void pthread_barrier_init(pthread_barrier_t * bar, pthread_attr_t * attr, unsigned count) {}\r
+static inline void pthread_barrier_wait(pthread_barrier_t * bar) {}\r
+static inline void pthread_barrier_destroy(pthread_barrier_t * bar) {}\r
+static inline void pthread_join(pthread_t id, void * smth){}\r
+\r
+static int pthread_create(pthread_t *thread, const pthread_attr_t *attr,\r
+ void *(*start_routine) (void *), void *arg)\r
+{\r
+ start_routine(arg);\r
+ return 0;\r
+}\r
+\r
+//-END:-Threads-overrides---------------------------------------------------\r
+\r
+//just remove fflush\r
+#define fflush (void)sizeof\r
+#define stdout 0\r
+#define stdin 0\r
+#define stderr 0\r
+\r
+#else //END JAILHOUSE\r
+\r
+#define _GNU_SOURCE\r
+#include <assert.h>\r
+#include <pthread.h>\r
+#include <sched.h>\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <sys/types.h>\r
+#include <time.h>\r
+#include <unistd.h>\r
+\r
+#endif //END Linux\r
+\r
+#define STRINGIFY(val) #val\r
+#define TOSTRING(val) STRINGIFY(val)\r
+#define LOC __FILE__ ":" TOSTRING(__LINE__) ": "\r
+\r
+#define CHECK(cmd) ({ int ret = (cmd); if (ret == -1) { perror(LOC #cmd); exit(1); }; ret; })\r
+#define CHECKPTR(cmd) ({ void *ptr = (cmd); if (ptr == (void*)-1) { perror(LOC #cmd); exit(1); }; ptr; })\r
+#define CHECKNULL(cmd) ({ typeof(cmd) ptr = (cmd); if (ptr == NULL) { perror(LOC #cmd); exit(1); }; ptr; })\r
+//#define CHECKFGETS(s, size, stream) ({ void *ptr = fgets(s, size, stream); if (ptr == NULL) { if (feof(stream)) fprintf(stderr, LOC "fgets(" #s "): Unexpected end of stream\n"); else perror(LOC "fgets(" #s ")"); exit(1); }; ptr; })\r
+#define CHECKTRUE(bool, msg) ({ if (!(bool)) { printf("Error: " msg "\n"); exit(1); }; })\r
+\r
+struct cfg {\r
+ bool sequential;\r
+ unsigned size;\r
+ unsigned num_threads;\r
+ unsigned read_count;\r
+ cpu_set_t cpu_set;\r
+ bool write;\r
+ unsigned ofs;\r
+ bool use_cycles; /* instead of ns */\r
+};\r
+\r
+struct s {\r
+ struct s *ptr;\r
+ uint32_t dummy[(64 - sizeof(struct s*))/sizeof(uint32_t)];\r
+};\r
+\r
+_Static_assert(sizeof(struct s) == 64, "Struct size differs from cacheline size");\r
+\r
+#define MAX_CPUS 8\r
+\r
+#ifdef __aarch64__\r
+#define MRS32(reg) ({ uint32_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })\r
+#define MRS64(reg) ({ uint64_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })\r
+\r
+#define MSR(reg, v) ({ asm volatile ("msr " # reg ",%0" :: "r" (v)); })\r
+\r
+static void ccntr_init(void)\r
+{\r
+ MSR(PMCNTENSET_EL0, 0x80000000);\r
+ MSR(PMCR_EL0, MRS32(PMCR_EL0) | 1);\r
+}\r
+\r
+static uint64_t ccntr_get(void)\r
+{\r
+ return MRS64(PMCCNTR_EL0);\r
+}\r
+#else\r
+static void ccntr_init(void) {}\r
+\r
+static uint64_t ccntr_get(void)\r
+{\r
+#ifdef JAILHOUSE\r
+//taken from lib/timing.c\r
+#ifdef __x86_64__\r
+ u32 lo, hi;\r
+\r
+ asm volatile("rdtsc" : "=a" (lo), "=d" (hi));\r
+ return (u64)lo | (((u64)hi) << 32);\r
+#else\r
+ u64 v;\r
+\r
+ asm volatile("rdtsc" : "=A" (v));\r
+ return v;\r
+#endif\r
+\r
+#else\r
+ return 0;\r
+#endif\r
+}\r
+#endif\r
+\r
+static uint64_t get_time(struct cfg *cfg)\r
+{\r
+ if (cfg->use_cycles == false) {\r
+\r
+#ifdef JAILHOUSE\r
+ return tsc_read();\r
+#else\r
+ struct timespec t;\r
+\r
+ clock_gettime(CLOCK_MONOTONIC, &t);\r
+ return (uint64_t)t.tv_sec * 1000000000 + t.tv_nsec;\r
+#endif\r
+\r
+ } else {\r
+ return ccntr_get();\r
+ }\r
+}\r
+\r
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))\r
+\r
+static void prepare(struct s *array, unsigned size, bool sequential)\r
+{\r
+ int i, j;\r
+ int count = size / sizeof(struct s);\r
+\r
+ if (sequential) {\r
+ for (i = 0; i < count - 1; i++)\r
+ array[i].ptr = &array[i+1];\r
+ array[count - 1].ptr = &array[0];\r
+ } else {\r
+ memset(array, 0, size);\r
+ struct s *p = &array[0];\r
+ for (i = 0; i < count - 1; i++) {\r
+ p->ptr = (struct s*)1; /* Mark as occupied to avoid self-loop */\r
+ for (j = rand() % count;\r
+ array[j].ptr != NULL;\r
+ j = (j >= count) ? 0 : j+1);\r
+ p = p->ptr = &array[j];\r
+ }\r
+ p->ptr = &array[0];\r
+ }\r
+}\r
+\r
+static void do_read(struct s *array, unsigned reads)\r
+{\r
+ unsigned i = reads / 32;\r
+ volatile struct s *p = &array[0];\r
+ while (--i) {\r
+ p = p->ptr; /* 0 */\r
+ p = p->ptr; /* 1 */\r
+ p = p->ptr; /* 2 */\r
+ p = p->ptr; /* 3 */\r
+ p = p->ptr; /* 4 */\r
+ p = p->ptr; /* 5 */\r
+ p = p->ptr; /* 6 */\r
+ p = p->ptr; /* 7 */\r
+ p = p->ptr; /* 8 */\r
+ p = p->ptr; /* 9 */\r
+ p = p->ptr; /* 10 */\r
+ p = p->ptr; /* 11 */\r
+ p = p->ptr; /* 12 */\r
+ p = p->ptr; /* 13 */\r
+ p = p->ptr; /* 14 */\r
+ p = p->ptr; /* 15 */\r
+ p = p->ptr; /* 16 */\r
+ p = p->ptr; /* 17 */\r
+ p = p->ptr; /* 18 */\r
+ p = p->ptr; /* 19 */\r
+ p = p->ptr; /* 20 */\r
+ p = p->ptr; /* 21 */\r
+ p = p->ptr; /* 22 */\r
+ p = p->ptr; /* 23 */\r
+ p = p->ptr; /* 24 */\r
+ p = p->ptr; /* 25 */\r
+ p = p->ptr; /* 26 */\r
+ p = p->ptr; /* 27 */\r
+ p = p->ptr; /* 28 */\r
+ p = p->ptr; /* 29 */\r
+ p = p->ptr; /* 30 */\r
+ p = p->ptr; /* 31 */\r
+ }\r
+}\r
+\r
+static void do_write(struct s *array, unsigned accesses, unsigned ofs)\r
+{\r
+ unsigned i = accesses / 32;\r
+ volatile struct s *p = &array[0];\r
+ while (--i) {\r
+ p->dummy[ofs]++; p = p->ptr; /* 0 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 1 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 2 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 3 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 4 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 5 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 6 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 7 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 8 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 9 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 10 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 11 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 12 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 13 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 14 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 15 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 16 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 17 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 18 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 19 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 20 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 21 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 22 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 23 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 24 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 25 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 26 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 27 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 28 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 29 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 30 */\r
+ p->dummy[ofs]++; p = p->ptr; /* 31 */\r
+ }\r
+}\r
+\r
+struct benchmark_thread {\r
+ pthread_t id;\r
+ unsigned cpu;\r
+ double result;\r
+ struct cfg *cfg;\r
+};\r
+\r
+pthread_barrier_t barrier;\r
+\r
+struct s array[MAX_CPUS][64*0x100000/sizeof(struct s)] __attribute__ ((aligned (2*1024*1024)));\r
+\r
+bool print = true;\r
+\r
+static void *benchmark_thread(void *arg)\r
+{\r
+ struct benchmark_thread *me = arg;\r
+ cpu_set_t set;\r
+\r
+ CPU_ZERO(&set);\r
+ CPU_SET(me->cpu, &set);\r
+\r
+ if (pthread_setaffinity_np(me->id, sizeof(set), &set) != 0) {\r
+ perror("pthread_setaffinity_np");\r
+ exit(1);\r
+ }\r
+\r
+ prepare(array[me->cpu], me->cfg->size, me->cfg->sequential);\r
+\r
+ pthread_barrier_wait(&barrier);\r
+\r
+ if (print)\r
+ printf("CPU %d starts measurement\n", me->cpu);\r
+\r
+ uint64_t tic, tac;\r
+ tic = get_time(me->cfg);\r
+ if (me->cfg->write == false)\r
+ do_read(array[me->cpu], me->cfg->read_count);\r
+ else\r
+ do_write(array[me->cpu], me->cfg->read_count, me->cfg->ofs);\r
+\r
+ tac = get_time(me->cfg);\r
+ me->result = (double)(tac - tic) / me->cfg->read_count;\r
+\r
+ return NULL;\r
+}\r
+\r
+static void run_benchmark(struct cfg *cfg)\r
+{\r
+ struct benchmark_thread thread[MAX_CPUS];\r
+ unsigned i;\r
+ cpu_set_t set = cfg->cpu_set;\r
+ pthread_barrier_init(&barrier, NULL, cfg->num_threads);\r
+ for (i = 0; i < cfg->num_threads; i++) {\r
+ thread[i].cfg = cfg;\r
+ if (CPU_COUNT(&set) == 0) {\r
+ thread[i].cpu = i;\r
+ } else {\r
+ int j;\r
+ for (j = 0; j < MAX_CPUS; j++) {\r
+ if (CPU_ISSET(j, &set)) {\r
+ thread[i].cpu = j;\r
+ CPU_CLR(j, &set);\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ if (print)\r
+ printf( "Running thread %d on CPU %d\n", i, thread[i].cpu);\r
+ pthread_create(&thread[i].id, NULL, benchmark_thread, &thread[i]);\r
+ }\r
+\r
+ for (i = 0; i < cfg->num_threads; i++) {\r
+ pthread_join(thread[i].id, NULL);\r
+ }\r
+ pthread_barrier_destroy(&barrier);\r
+\r
+ printf("%d", cfg->size);\r
+ for (i = 0; i < cfg->num_threads; i++) {\r
+ printf("\t%#.3g", thread[i].result);\r
+ }\r
+ printf("\n");\r
+ fflush(stdout);\r
+ print = false;\r
+}\r
+\r
+#ifdef JAILHOUSE\r
+void inmate_main(void)\r
+#else //Linux\r
+int main(int argc, char *argv[])\r
+#endif\r
+{\r
+ struct cfg cfg = {\r
+ .sequential = true,\r
+ .num_threads = 1,\r
+ .size = 0,\r
+ .read_count = 0x2000000,\r
+ .write = false,\r
+ .ofs = 0,\r
+ .use_cycles = false, /* i.e. use nanoseconds */\r
+ };\r
+\r
+#ifdef JAILHOUSE\r
+ //initialize UART\r
+ unsigned long tsc_freq;\r
+ unsigned int n;\r
+\r
+ printk_uart_base = UART_BASE;\r
+ do {\r
+ for (n = 0; n < UART_IDLE_LOOPS; n++)\r
+ if (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))\r
+ break;\r
+ } while (n < UART_IDLE_LOOPS);\r
+ //parse cmdline\r
+ cfg.read_count = cmdline_parse_int("-c", cfg.read_count);\r
+ cfg.ofs = cmdline_parse_int("-o", cfg.ofs);\r
+ cfg.sequential = cmdline_parse_bool("-r");\r
+ cfg.size = cmdline_parse_int("-s", cfg.size);\r
+ if (cmdline_parse_bool("-t")) {\r
+ printk("Threads are not supported. '-t' was ignored.\n");\r
+ }\r
+ if (cmdline_parse_bool("-C")) {\r
+ printk("CPU selection is not supported. '-C' was ignored.\n");\r
+ }\r
+ cfg.write = cmdline_parse_bool("-w");\r
+ cfg.use_cycles = cmdline_parse_bool("-y");\r
+ //initialize timing\r
+ tsc_freq = tsc_init();\r
+ printk("Calibrated TSC frequency: %lu.%03u kHz\n", tsc_freq / 1000,\r
+ tsc_freq % 1000);\r
+#else //Linux param's parsing\r
+ CPU_ZERO(&cfg.cpu_set);\r
+\r
+ int opt;\r
+ while ((opt = getopt(argc, argv, "c:C:o:rs:t:wy")) != -1) {\r
+ switch (opt) {\r
+ case 'c':\r
+ cfg.read_count = atol(optarg);\r
+ break;\r
+ case 'o':\r
+ cfg.ofs = atol(optarg);\r
+ break;\r
+ case 'r': /* random */\r
+ cfg.sequential = false;\r
+ break;\r
+ case 's':\r
+ cfg.size = atol(optarg);\r
+ assert(cfg.size <= sizeof(array[0]));\r
+ break;\r
+ case 't':\r
+ cfg.num_threads = atol(optarg);\r
+ break;\r
+ case 'C':\r
+ CPU_SET(atol(optarg), &cfg.cpu_set);\r
+ break;\r
+ case 'w':\r
+ cfg.write = true;\r
+ break;\r
+ case 'y':\r
+ cfg.use_cycles = true;\r
+ break;\r
+ default: /* '?' */\r
+ fprintf(stderr, "Usage: %s ... TODO\n", argv[0]);\r
+ exit(1);\r
+ }\r
+ }\r
+#endif\r
+\r
+ srand(time(NULL));\r
+\r
+ if (cfg.write) {\r
+ struct s s;\r
+ assert(cfg.ofs < ARRAY_SIZE(s.dummy));\r
+ }\r
+\r
+ if (cfg.use_cycles)\r
+ ccntr_init();\r
+\r
+ if (cfg.size != 0) {\r
+ run_benchmark(&cfg);\r
+ } else {\r
+ unsigned order, size, step;\r
+ for (order = 10; order <= 24; order++) {\r
+ for (step = 0; step < 2; step++) {\r
+ size = 1 << order;\r
+ if (step == 1)\r
+ size += size / 2;\r
+\r
+ cfg.size = size;\r
+ run_benchmark(&cfg);\r
+ }\r
+ }\r
+ }\r
+#ifndef JAILHOUSE\r
+ return 0;\r
+#endif\r
+}\r