]> rtime.felk.cvut.cz Git - jailhouse.git/commitdiff
Add membench inmate. It has not worked properly yet.
authorMaxim Baryshnikov <barysmax@fel.cvut.cz>
Sat, 30 Jul 2016 17:04:20 +0000 (19:04 +0200)
committerMaxim Baryshnikov <barysmax@fel.cvut.cz>
Sat, 30 Jul 2016 17:04:20 +0000 (19:04 +0200)
inmates/Makefile
inmates/demos/x86/Makefile
inmates/demos/x86/Membench.c [new file with mode: 0644]

index 0e8f258837f4475479ea90a2ba1b3d8a680bb4a3..502425bbeb31ecdf91e4077226841624245cc750 100644 (file)
@@ -22,7 +22,7 @@ KBUILD_AFLAGS += $(INCLUDES)
 KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \
                 -Wmissing-declarations -Wmissing-prototypes \
                 -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-                -fno-common -fno-stack-protector $(INCLUDES)
+                -fno-common -fno-stack-protector -DJAILHOUSE $(INCLUDES)
 ifneq ($(wildcard $(src)/../hypervisor/include/jailhouse/config.h),)
 KBUILD_CFLAGS += -include $(src)/../hypervisor/include/jailhouse/config.h
 endif
index ea72fba7f26d984913757a29824fdcae4b62d917..8ffcf7fe6faa87f352fa932bf7a34cd937d570de 100644 (file)
@@ -14,7 +14,7 @@ include $(INMATES_LIB)/Makefile.lib
 
 INMATES := tiny-demo.bin apic-demo.bin ioapic-demo.bin 32-bit-demo.bin \
        pci-demo.bin e1000-demo.bin ivshmem-demo.bin smp-demo.bin \
-       hpet-inmate.bin
+       hpet-inmate.bin Membench.bin
 
 tiny-demo-y    := tiny-demo.o
 apic-demo-y    := apic-demo.o
@@ -24,6 +24,7 @@ e1000-demo-y  := e1000-demo.o
 ivshmem-demo-y := ivshmem-demo.o
 smp-demo-y     := smp-demo.o
 hpet-inmate-y  := hpet-inmate.o
+Membench-y     := Membench.o
 
 $(eval $(call DECLARE_32_BIT,32-bit-demo))
 32-bit-demo-y  := 32-bit-demo.o
diff --git a/inmates/demos/x86/Membench.c b/inmates/demos/x86/Membench.c
new file mode 100644 (file)
index 0000000..021030d
--- /dev/null
@@ -0,0 +1,527 @@
+#ifdef JAILHOUSE\r
+\r
+#include <inmate.h>\r
+\r
+\r
+#define CMDLINE_BUFFER_SIZE    256\r
+CMDLINE_BUFFER(CMDLINE_BUFFER_SIZE);\r
+\r
+#define POLLUTE_CACHE_SIZE     (512 * 1024)\r
+\r
+#ifdef CONFIG_UART_OXPCIE952\r
+#define UART_BASE              0xe010\r
+#else\r
+#define UART_BASE              0x3f8\r
+#endif\r
+#define UART_LSR               0x5\r
+#define UART_LSR_THRE          0x20\r
+#define UART_IDLE_LOOPS                100\r
+\r
+//uintstd in jailhouse way\r
+#define uint32_t u32\r
+#define uint64_t u64\r
+\r
+#define perror(FUNC) (printk(#FUNC))\r
+\r
+#define printf printk\r
+\r
+#define exit(SIG) { printk("exit with %d\n", SIG); asm volatile("hlt");}\r
+\r
+//-----Time-and-and-randomization-overrides-------------------------------\r
+static inline unsigned long time(unsigned long * seconds)\r
+{\r
+       return (*seconds) = tsc_read();\r
+}\r
+\r
+/*\r
+*      Tables of Maximally-Equidistributed Combined Lfsr Generators (1998)\r
+*      by Pierre L'Ecuyer\r
+*      taken from: http://stackoverflow.com/questions/1167253/implementation-of-rand\r
+*/\r
+static unsigned int z1 = 12345, z2 = 12345, z3 = 12345, z4 = 12345;\r
+\r
+static unsigned int lfsr113_Bits (void)\r
+{\r
+   unsigned int b;\r
+   b  = ((z1 << 6) ^ z1) >> 13;\r
+   z1 = ((z1 & 4294967294U) << 18) ^ b;\r
+   b  = ((z2 << 2) ^ z2) >> 27;\r
+   z2 = ((z2 & 4294967288U) << 2) ^ b;\r
+   b  = ((z3 << 13) ^ z3) >> 21;\r
+   z3 = ((z3 & 4294967280U) << 7) ^ b;\r
+   b  = ((z4 << 3) ^ z4) >> 12;\r
+   z4 = ((z4 & 4294967168U) << 13) ^ b;\r
+   return (z1 ^ z2 ^ z3 ^ z4);\r
+}\r
+\r
+static unsigned int rand(void)\r
+{\r
+       return lfsr113_Bits();\r
+}\r
+\r
+static void srand(unsigned int seed)\r
+{\r
+   //seed into z1 z2 z3 z4\r
+   z1 = z2 = z3 = z4 = seed;\r
+}\r
+//-END:-Time-and-and-randomization-overrides-------------------------------\r
+\r
+//-----Assertion-overrides-------------------------------------------------\r
+#ifdef NDEBUG\r
+# define assert(EX)\r
+#else\r
+# define assert(EX) (void)((EX) || (__assert (#EX, __FILE__, __LINE__),0))\r
+#endif\r
+\r
+static inline void __assert(const char *msg, const char *file, int line)\r
+{\r
+       printk("Assertion %s in %s:%s failed.\n", msg, file, line);\r
+}\r
+//-END:-Assertion-overrides-------------------------------------------------\r
+\r
+//------Threads-overrides---------------------------------------------------\r
+ typedef struct\r
+ {\r
+        u8 count;\r
+        u8 bits;\r
+ }cpu_set_t;\r
+\r
+typedef u32 pthread_t;\r
+typedef u32 pthread_barrier_t;\r
+typedef u32 pthread_attr_t;\r
+\r
+static inline void CPU_ZERO(cpu_set_t *set) { set->bits = 0; set->count = 0; }\r
+static inline void CPU_SET(int cpu, cpu_set_t *set) { set->bits |= (1 << cpu); set->count++; }\r
+static inline void CPU_CLR(int cpu, cpu_set_t *set) { set->bits ^= (1 << cpu); set->count--; }\r
+static inline int  CPU_ISSET(int cpu, cpu_set_t *set) { return (set->bits & (1 << cpu)); }\r
+static inline int CPU_COUNT(cpu_set_t *set) { return (int) set->count; }\r
+\r
+static inline int pthread_setaffinity_np(pthread_t id, unsigned long affty, cpu_set_t * set) {return 0;}\r
+\r
+static inline void pthread_barrier_init(pthread_barrier_t * bar, pthread_attr_t * attr, unsigned count) {}\r
+static inline void pthread_barrier_wait(pthread_barrier_t * bar) {}\r
+static inline void pthread_barrier_destroy(pthread_barrier_t * bar) {}\r
+static inline void pthread_join(pthread_t id, void * smth){}\r
+\r
+static int pthread_create(pthread_t *thread, const pthread_attr_t *attr,\r
+                          void *(*start_routine) (void *), void *arg)\r
+{\r
+       start_routine(arg);\r
+       return 0;\r
+}\r
+\r
+//-END:-Threads-overrides---------------------------------------------------\r
+\r
+//just remove fflush\r
+#define fflush (void)sizeof\r
+#define stdout 0\r
+#define stdin 0\r
+#define stderr 0\r
+\r
+#else //END JAILHOUSE\r
+\r
+#define _GNU_SOURCE\r
+#include <assert.h>\r
+#include <pthread.h>\r
+#include <sched.h>\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include <stdio.h>\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <sys/types.h>\r
+#include <time.h>\r
+#include <unistd.h>\r
+\r
+#endif //END Linux\r
+\r
+#define STRINGIFY(val) #val\r
+#define TOSTRING(val) STRINGIFY(val)\r
+#define LOC __FILE__ ":" TOSTRING(__LINE__) ": "\r
+\r
+#define CHECK(cmd) ({ int ret = (cmd); if (ret == -1) { perror(LOC #cmd); exit(1); }; ret; })\r
+#define CHECKPTR(cmd) ({ void *ptr = (cmd); if (ptr == (void*)-1) { perror(LOC #cmd); exit(1); }; ptr; })\r
+#define CHECKNULL(cmd) ({ typeof(cmd) ptr = (cmd); if (ptr == NULL) { perror(LOC #cmd); exit(1); }; ptr; })\r
+//#define CHECKFGETS(s, size, stream) ({ void *ptr = fgets(s, size, stream); if (ptr == NULL) { if (feof(stream)) fprintf(stderr, LOC "fgets(" #s "): Unexpected end of stream\n"); else perror(LOC "fgets(" #s ")"); exit(1); }; ptr; })\r
+#define CHECKTRUE(bool, msg) ({ if (!(bool)) { printf("Error: " msg "\n"); exit(1); }; })\r
+\r
+struct cfg {\r
+       bool sequential;\r
+       unsigned size;\r
+       unsigned num_threads;\r
+       unsigned read_count;\r
+       cpu_set_t cpu_set;\r
+       bool write;\r
+       unsigned ofs;\r
+       bool use_cycles; /* instead of ns */\r
+};\r
+\r
+struct s {\r
+        struct s *ptr;\r
+        uint32_t dummy[(64 - sizeof(struct s*))/sizeof(uint32_t)];\r
+};\r
+\r
+_Static_assert(sizeof(struct s) == 64, "Struct size differs from cacheline size");\r
+\r
+#define MAX_CPUS 8\r
+\r
+#ifdef __aarch64__\r
+#define MRS32(reg) ({ uint32_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })\r
+#define MRS64(reg) ({ uint64_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; })\r
+\r
+#define MSR(reg, v) ({ asm volatile ("msr " # reg ",%0" :: "r" (v)); })\r
+\r
+static void ccntr_init(void)\r
+{\r
+       MSR(PMCNTENSET_EL0, 0x80000000);\r
+       MSR(PMCR_EL0, MRS32(PMCR_EL0) | 1);\r
+}\r
+\r
+static uint64_t ccntr_get(void)\r
+{\r
+       return MRS64(PMCCNTR_EL0);\r
+}\r
+#else\r
+static void ccntr_init(void) {}\r
+\r
+static uint64_t ccntr_get(void)\r
+{\r
+#ifdef JAILHOUSE\r
+//taken from lib/timing.c\r
+#ifdef __x86_64__\r
+       u32 lo, hi;\r
+\r
+       asm volatile("rdtsc" : "=a" (lo), "=d" (hi));\r
+       return (u64)lo | (((u64)hi) << 32);\r
+#else\r
+       u64 v;\r
+\r
+       asm volatile("rdtsc" : "=A" (v));\r
+       return v;\r
+#endif\r
+\r
+#else\r
+       return 0;\r
+#endif\r
+}\r
+#endif\r
+\r
+static uint64_t get_time(struct cfg *cfg)\r
+{\r
+       if (cfg->use_cycles == false) {\r
+\r
+#ifdef JAILHOUSE\r
+               return tsc_read();\r
+#else\r
+        struct timespec t;\r
+\r
+        clock_gettime(CLOCK_MONOTONIC, &t);\r
+        return (uint64_t)t.tv_sec * 1000000000 + t.tv_nsec;\r
+#endif\r
+\r
+       } else {\r
+               return ccntr_get();\r
+       }\r
+}\r
+\r
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))\r
+\r
+static void prepare(struct s *array, unsigned size, bool sequential)\r
+{\r
+       int i, j;\r
+       int count = size / sizeof(struct s);\r
+\r
+       if (sequential) {\r
+               for (i = 0; i < count - 1; i++)\r
+                       array[i].ptr = &array[i+1];\r
+               array[count - 1].ptr = &array[0];\r
+       } else {\r
+               memset(array, 0, size);\r
+               struct s *p = &array[0];\r
+               for (i = 0; i < count - 1; i++) {\r
+                       p->ptr = (struct s*)1; /* Mark as occupied to avoid self-loop */\r
+                       for (j = rand() % count;\r
+                            array[j].ptr != NULL;\r
+                            j = (j >= count) ? 0 : j+1);\r
+                       p = p->ptr = &array[j];\r
+               }\r
+               p->ptr = &array[0];\r
+       }\r
+}\r
+\r
+static void do_read(struct s *array, unsigned reads)\r
+{\r
+       unsigned i = reads / 32;\r
+       volatile struct s *p = &array[0];\r
+       while (--i) {\r
+               p = p->ptr;     /* 0 */\r
+               p = p->ptr;     /* 1 */\r
+               p = p->ptr;     /* 2 */\r
+               p = p->ptr;     /* 3 */\r
+               p = p->ptr;     /* 4 */\r
+               p = p->ptr;     /* 5 */\r
+               p = p->ptr;     /* 6 */\r
+               p = p->ptr;     /* 7 */\r
+               p = p->ptr;     /* 8 */\r
+               p = p->ptr;     /* 9 */\r
+               p = p->ptr;     /* 10 */\r
+               p = p->ptr;     /* 11 */\r
+               p = p->ptr;     /* 12 */\r
+               p = p->ptr;     /* 13 */\r
+               p = p->ptr;     /* 14 */\r
+               p = p->ptr;     /* 15 */\r
+               p = p->ptr;     /* 16 */\r
+               p = p->ptr;     /* 17 */\r
+               p = p->ptr;     /* 18 */\r
+               p = p->ptr;     /* 19 */\r
+               p = p->ptr;     /* 20 */\r
+               p = p->ptr;     /* 21 */\r
+               p = p->ptr;     /* 22 */\r
+               p = p->ptr;     /* 23 */\r
+               p = p->ptr;     /* 24 */\r
+               p = p->ptr;     /* 25 */\r
+               p = p->ptr;     /* 26 */\r
+               p = p->ptr;     /* 27 */\r
+               p = p->ptr;     /* 28 */\r
+               p = p->ptr;     /* 29 */\r
+               p = p->ptr;     /* 30 */\r
+               p = p->ptr;     /* 31 */\r
+       }\r
+}\r
+\r
+static void do_write(struct s *array, unsigned accesses, unsigned ofs)\r
+{\r
+       unsigned i = accesses / 32;\r
+       volatile struct s *p = &array[0];\r
+       while (--i) {\r
+               p->dummy[ofs]++; p = p->ptr; /* 0 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 1 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 2 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 3 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 4 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 5 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 6 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 7 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 8 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 9 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 10 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 11 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 12 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 13 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 14 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 15 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 16 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 17 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 18 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 19 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 20 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 21 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 22 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 23 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 24 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 25 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 26 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 27 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 28 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 29 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 30 */\r
+               p->dummy[ofs]++; p = p->ptr; /* 31 */\r
+       }\r
+}\r
+\r
+struct benchmark_thread {\r
+       pthread_t id;\r
+       unsigned cpu;\r
+       double result;\r
+       struct cfg *cfg;\r
+};\r
+\r
+pthread_barrier_t barrier;\r
+\r
+struct s array[MAX_CPUS][64*0x100000/sizeof(struct s)] __attribute__ ((aligned (2*1024*1024)));\r
+\r
+bool print = true;\r
+\r
+static void *benchmark_thread(void *arg)\r
+{\r
+       struct benchmark_thread *me = arg;\r
+       cpu_set_t set;\r
+\r
+       CPU_ZERO(&set);\r
+       CPU_SET(me->cpu, &set);\r
+\r
+       if (pthread_setaffinity_np(me->id, sizeof(set), &set) != 0) {\r
+               perror("pthread_setaffinity_np");\r
+               exit(1);\r
+       }\r
+\r
+       prepare(array[me->cpu], me->cfg->size, me->cfg->sequential);\r
+\r
+       pthread_barrier_wait(&barrier);\r
+\r
+       if (print)\r
+               printf("CPU %d starts measurement\n", me->cpu);\r
+\r
+       uint64_t tic, tac;\r
+       tic = get_time(me->cfg);\r
+       if (me->cfg->write == false)\r
+               do_read(array[me->cpu], me->cfg->read_count);\r
+       else\r
+               do_write(array[me->cpu], me->cfg->read_count, me->cfg->ofs);\r
+\r
+       tac = get_time(me->cfg);\r
+       me->result = (double)(tac - tic) / me->cfg->read_count;\r
+\r
+       return NULL;\r
+}\r
+\r
+static void run_benchmark(struct cfg *cfg)\r
+{\r
+       struct benchmark_thread thread[MAX_CPUS];\r
+       unsigned i;\r
+       cpu_set_t set = cfg->cpu_set;\r
+       pthread_barrier_init(&barrier, NULL, cfg->num_threads);\r
+       for (i = 0; i < cfg->num_threads; i++) {\r
+               thread[i].cfg = cfg;\r
+               if (CPU_COUNT(&set) == 0) {\r
+                       thread[i].cpu = i;\r
+               } else {\r
+                       int j;\r
+                       for (j = 0; j < MAX_CPUS; j++) {\r
+                               if (CPU_ISSET(j, &set)) {\r
+                                       thread[i].cpu = j;\r
+                                       CPU_CLR(j, &set);\r
+                                       break;\r
+                               }\r
+                       }\r
+               }\r
+               if (print)\r
+                       printf( "Running thread %d on CPU %d\n", i, thread[i].cpu);\r
+               pthread_create(&thread[i].id, NULL, benchmark_thread, &thread[i]);\r
+       }\r
+\r
+       for (i = 0; i < cfg->num_threads; i++) {\r
+               pthread_join(thread[i].id, NULL);\r
+       }\r
+       pthread_barrier_destroy(&barrier);\r
+\r
+       printf("%d", cfg->size);\r
+       for (i = 0; i < cfg->num_threads; i++) {\r
+               printf("\t%#.3g", thread[i].result);\r
+       }\r
+       printf("\n");\r
+       fflush(stdout);\r
+       print = false;\r
+}\r
+\r
+#ifdef JAILHOUSE\r
+void inmate_main(void)\r
+#else //Linux\r
+int main(int argc, char *argv[])\r
+#endif\r
+{\r
+       struct cfg cfg = {\r
+               .sequential = true,\r
+               .num_threads = 1,\r
+               .size = 0,\r
+               .read_count = 0x2000000,\r
+               .write = false,\r
+               .ofs = 0,\r
+               .use_cycles = false, /* i.e. use nanoseconds */\r
+       };\r
+\r
+#ifdef JAILHOUSE\r
+       //initialize UART\r
+       unsigned long tsc_freq;\r
+       unsigned int n;\r
+\r
+       printk_uart_base = UART_BASE;\r
+       do {\r
+               for (n = 0; n < UART_IDLE_LOOPS; n++)\r
+                       if (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))\r
+                               break;\r
+       } while (n < UART_IDLE_LOOPS);\r
+       //parse cmdline\r
+       cfg.read_count = cmdline_parse_int("-c", cfg.read_count);\r
+       cfg.ofs = cmdline_parse_int("-o", cfg.ofs);\r
+       cfg.sequential = cmdline_parse_bool("-r");\r
+       cfg.size = cmdline_parse_int("-s", cfg.size);\r
+       if (cmdline_parse_bool("-t")) {\r
+               printk("Threads are not supported. '-t' was ignored.\n");\r
+       }\r
+       if (cmdline_parse_bool("-C")) {\r
+               printk("CPU selection is not supported. '-C' was ignored.\n");\r
+       }\r
+       cfg.write = cmdline_parse_bool("-w");\r
+       cfg.use_cycles = cmdline_parse_bool("-y");\r
+       //initialize timing\r
+       tsc_freq = tsc_init();\r
+       printk("Calibrated TSC frequency: %lu.%03u kHz\n", tsc_freq / 1000,\r
+              tsc_freq % 1000);\r
+#else //Linux param's parsing\r
+       CPU_ZERO(&cfg.cpu_set);\r
+\r
+       int opt;\r
+       while ((opt = getopt(argc, argv, "c:C:o:rs:t:wy")) != -1) {\r
+               switch (opt) {\r
+               case 'c':\r
+                       cfg.read_count = atol(optarg);\r
+                       break;\r
+               case 'o':\r
+                       cfg.ofs = atol(optarg);\r
+                       break;\r
+               case 'r':       /* random */\r
+                       cfg.sequential = false;\r
+                       break;\r
+               case 's':\r
+                       cfg.size = atol(optarg);\r
+                       assert(cfg.size <= sizeof(array[0]));\r
+                       break;\r
+               case 't':\r
+                       cfg.num_threads = atol(optarg);\r
+                       break;\r
+               case 'C':\r
+                       CPU_SET(atol(optarg), &cfg.cpu_set);\r
+                       break;\r
+               case 'w':\r
+                       cfg.write = true;\r
+                       break;\r
+               case 'y':\r
+                       cfg.use_cycles = true;\r
+                       break;\r
+               default: /* '?' */\r
+                       fprintf(stderr, "Usage: %s ... TODO\n", argv[0]);\r
+                       exit(1);\r
+               }\r
+       }\r
+#endif\r
+\r
+       srand(time(NULL));\r
+\r
+       if (cfg.write) {\r
+               struct s s;\r
+               assert(cfg.ofs < ARRAY_SIZE(s.dummy));\r
+       }\r
+\r
+       if (cfg.use_cycles)\r
+               ccntr_init();\r
+\r
+       if (cfg.size != 0) {\r
+               run_benchmark(&cfg);\r
+       } else {\r
+               unsigned order, size, step;\r
+               for (order = 10; order <= 24; order++) {\r
+                       for (step = 0; step < 2; step++) {\r
+                               size = 1 << order;\r
+                               if (step == 1)\r
+                                       size += size / 2;\r
+\r
+                               cfg.size = size;\r
+                               run_benchmark(&cfg);\r
+                       }\r
+               }\r
+       }\r
+#ifndef JAILHOUSE\r
+       return 0;\r
+#endif\r
+}\r