From: Maxim Baryshnikov Date: Sat, 30 Jul 2016 17:04:20 +0000 (+0200) Subject: Add membench inmate. It has not worked properly yet. X-Git-Url: http://rtime.felk.cvut.cz/gitweb/jailhouse.git/commitdiff_plain/3020be39f1e00865383b62d594f00554b4ad22c0 Add membench inmate. It has not worked properly yet. --- diff --git a/inmates/Makefile b/inmates/Makefile index 0e8f258..502425b 100644 --- a/inmates/Makefile +++ b/inmates/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS += $(INCLUDES) KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \ -Wmissing-declarations -Wmissing-prototypes \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -fno-common -fno-stack-protector $(INCLUDES) + -fno-common -fno-stack-protector -DJAILHOUSE $(INCLUDES) ifneq ($(wildcard $(src)/../hypervisor/include/jailhouse/config.h),) KBUILD_CFLAGS += -include $(src)/../hypervisor/include/jailhouse/config.h endif diff --git a/inmates/demos/x86/Makefile b/inmates/demos/x86/Makefile index ea72fba..8ffcf7f 100644 --- a/inmates/demos/x86/Makefile +++ b/inmates/demos/x86/Makefile @@ -14,7 +14,7 @@ include $(INMATES_LIB)/Makefile.lib INMATES := tiny-demo.bin apic-demo.bin ioapic-demo.bin 32-bit-demo.bin \ pci-demo.bin e1000-demo.bin ivshmem-demo.bin smp-demo.bin \ - hpet-inmate.bin + hpet-inmate.bin Membench.bin tiny-demo-y := tiny-demo.o apic-demo-y := apic-demo.o @@ -24,6 +24,7 @@ e1000-demo-y := e1000-demo.o ivshmem-demo-y := ivshmem-demo.o smp-demo-y := smp-demo.o hpet-inmate-y := hpet-inmate.o +Membench-y := Membench.o $(eval $(call DECLARE_32_BIT,32-bit-demo)) 32-bit-demo-y := 32-bit-demo.o diff --git a/inmates/demos/x86/Membench.c b/inmates/demos/x86/Membench.c new file mode 100644 index 0000000..021030d --- /dev/null +++ b/inmates/demos/x86/Membench.c @@ -0,0 +1,527 @@ +#ifdef JAILHOUSE + +#include + + +#define CMDLINE_BUFFER_SIZE 256 +CMDLINE_BUFFER(CMDLINE_BUFFER_SIZE); + +#define POLLUTE_CACHE_SIZE (512 * 1024) + +#ifdef CONFIG_UART_OXPCIE952 +#define UART_BASE 0xe010 +#else +#define UART_BASE 0x3f8 +#endif +#define UART_LSR 0x5 +#define UART_LSR_THRE 0x20 +#define UART_IDLE_LOOPS 100 + +//uintstd in jailhouse way +#define uint32_t u32 +#define uint64_t u64 + +#define perror(FUNC) (printk(#FUNC)) + +#define printf printk + +#define exit(SIG) { printk("exit with %d\n", SIG); asm volatile("hlt");} + +//-----Time-and-and-randomization-overrides------------------------------- +static inline unsigned long time(unsigned long * seconds) +{ + return (*seconds) = tsc_read(); +} + +/* +* Tables of Maximally-Equidistributed Combined Lfsr Generators (1998) +* by Pierre L'Ecuyer +* taken from: http://stackoverflow.com/questions/1167253/implementation-of-rand +*/ +static unsigned int z1 = 12345, z2 = 12345, z3 = 12345, z4 = 12345; + +static unsigned int lfsr113_Bits (void) +{ + unsigned int b; + b = ((z1 << 6) ^ z1) >> 13; + z1 = ((z1 & 4294967294U) << 18) ^ b; + b = ((z2 << 2) ^ z2) >> 27; + z2 = ((z2 & 4294967288U) << 2) ^ b; + b = ((z3 << 13) ^ z3) >> 21; + z3 = ((z3 & 4294967280U) << 7) ^ b; + b = ((z4 << 3) ^ z4) >> 12; + z4 = ((z4 & 4294967168U) << 13) ^ b; + return (z1 ^ z2 ^ z3 ^ z4); +} + +static unsigned int rand(void) +{ + return lfsr113_Bits(); +} + +static void srand(unsigned int seed) +{ + //seed into z1 z2 z3 z4 + z1 = z2 = z3 = z4 = seed; +} +//-END:-Time-and-and-randomization-overrides------------------------------- + +//-----Assertion-overrides------------------------------------------------- +#ifdef NDEBUG +# define assert(EX) +#else +# define assert(EX) (void)((EX) || (__assert (#EX, __FILE__, __LINE__),0)) +#endif + +static inline void __assert(const char *msg, const char *file, int line) +{ + printk("Assertion %s in %s:%s failed.\n", msg, file, line); +} +//-END:-Assertion-overrides------------------------------------------------- + +//------Threads-overrides--------------------------------------------------- + typedef struct + { + u8 count; + u8 bits; + }cpu_set_t; + +typedef u32 pthread_t; +typedef u32 pthread_barrier_t; +typedef u32 pthread_attr_t; + +static inline void CPU_ZERO(cpu_set_t *set) { set->bits = 0; set->count = 0; } +static inline void CPU_SET(int cpu, cpu_set_t *set) { set->bits |= (1 << cpu); set->count++; } +static inline void CPU_CLR(int cpu, cpu_set_t *set) { set->bits ^= (1 << cpu); set->count--; } +static inline int CPU_ISSET(int cpu, cpu_set_t *set) { return (set->bits & (1 << cpu)); } +static inline int CPU_COUNT(cpu_set_t *set) { return (int) set->count; } + +static inline int pthread_setaffinity_np(pthread_t id, unsigned long affty, cpu_set_t * set) {return 0;} + +static inline void pthread_barrier_init(pthread_barrier_t * bar, pthread_attr_t * attr, unsigned count) {} +static inline void pthread_barrier_wait(pthread_barrier_t * bar) {} +static inline void pthread_barrier_destroy(pthread_barrier_t * bar) {} +static inline void pthread_join(pthread_t id, void * smth){} + +static int pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine) (void *), void *arg) +{ + start_routine(arg); + return 0; +} + +//-END:-Threads-overrides--------------------------------------------------- + +//just remove fflush +#define fflush (void)sizeof +#define stdout 0 +#define stdin 0 +#define stderr 0 + +#else //END JAILHOUSE + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif //END Linux + +#define STRINGIFY(val) #val +#define TOSTRING(val) STRINGIFY(val) +#define LOC __FILE__ ":" TOSTRING(__LINE__) ": " + +#define CHECK(cmd) ({ int ret = (cmd); if (ret == -1) { perror(LOC #cmd); exit(1); }; ret; }) +#define CHECKPTR(cmd) ({ void *ptr = (cmd); if (ptr == (void*)-1) { perror(LOC #cmd); exit(1); }; ptr; }) +#define CHECKNULL(cmd) ({ typeof(cmd) ptr = (cmd); if (ptr == NULL) { perror(LOC #cmd); exit(1); }; ptr; }) +//#define CHECKFGETS(s, size, stream) ({ void *ptr = fgets(s, size, stream); if (ptr == NULL) { if (feof(stream)) fprintf(stderr, LOC "fgets(" #s "): Unexpected end of stream\n"); else perror(LOC "fgets(" #s ")"); exit(1); }; ptr; }) +#define CHECKTRUE(bool, msg) ({ if (!(bool)) { printf("Error: " msg "\n"); exit(1); }; }) + +struct cfg { + bool sequential; + unsigned size; + unsigned num_threads; + unsigned read_count; + cpu_set_t cpu_set; + bool write; + unsigned ofs; + bool use_cycles; /* instead of ns */ +}; + +struct s { + struct s *ptr; + uint32_t dummy[(64 - sizeof(struct s*))/sizeof(uint32_t)]; +}; + +_Static_assert(sizeof(struct s) == 64, "Struct size differs from cacheline size"); + +#define MAX_CPUS 8 + +#ifdef __aarch64__ +#define MRS32(reg) ({ uint32_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; }) +#define MRS64(reg) ({ uint64_t v; asm volatile ("mrs %0," # reg : "=r" (v)); v; }) + +#define MSR(reg, v) ({ asm volatile ("msr " # reg ",%0" :: "r" (v)); }) + +static void ccntr_init(void) +{ + MSR(PMCNTENSET_EL0, 0x80000000); + MSR(PMCR_EL0, MRS32(PMCR_EL0) | 1); +} + +static uint64_t ccntr_get(void) +{ + return MRS64(PMCCNTR_EL0); +} +#else +static void ccntr_init(void) {} + +static uint64_t ccntr_get(void) +{ +#ifdef JAILHOUSE +//taken from lib/timing.c +#ifdef __x86_64__ + u32 lo, hi; + + asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); + return (u64)lo | (((u64)hi) << 32); +#else + u64 v; + + asm volatile("rdtsc" : "=A" (v)); + return v; +#endif + +#else + return 0; +#endif +} +#endif + +static uint64_t get_time(struct cfg *cfg) +{ + if (cfg->use_cycles == false) { + +#ifdef JAILHOUSE + return tsc_read(); +#else + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + return (uint64_t)t.tv_sec * 1000000000 + t.tv_nsec; +#endif + + } else { + return ccntr_get(); + } +} + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +static void prepare(struct s *array, unsigned size, bool sequential) +{ + int i, j; + int count = size / sizeof(struct s); + + if (sequential) { + for (i = 0; i < count - 1; i++) + array[i].ptr = &array[i+1]; + array[count - 1].ptr = &array[0]; + } else { + memset(array, 0, size); + struct s *p = &array[0]; + for (i = 0; i < count - 1; i++) { + p->ptr = (struct s*)1; /* Mark as occupied to avoid self-loop */ + for (j = rand() % count; + array[j].ptr != NULL; + j = (j >= count) ? 0 : j+1); + p = p->ptr = &array[j]; + } + p->ptr = &array[0]; + } +} + +static void do_read(struct s *array, unsigned reads) +{ + unsigned i = reads / 32; + volatile struct s *p = &array[0]; + while (--i) { + p = p->ptr; /* 0 */ + p = p->ptr; /* 1 */ + p = p->ptr; /* 2 */ + p = p->ptr; /* 3 */ + p = p->ptr; /* 4 */ + p = p->ptr; /* 5 */ + p = p->ptr; /* 6 */ + p = p->ptr; /* 7 */ + p = p->ptr; /* 8 */ + p = p->ptr; /* 9 */ + p = p->ptr; /* 10 */ + p = p->ptr; /* 11 */ + p = p->ptr; /* 12 */ + p = p->ptr; /* 13 */ + p = p->ptr; /* 14 */ + p = p->ptr; /* 15 */ + p = p->ptr; /* 16 */ + p = p->ptr; /* 17 */ + p = p->ptr; /* 18 */ + p = p->ptr; /* 19 */ + p = p->ptr; /* 20 */ + p = p->ptr; /* 21 */ + p = p->ptr; /* 22 */ + p = p->ptr; /* 23 */ + p = p->ptr; /* 24 */ + p = p->ptr; /* 25 */ + p = p->ptr; /* 26 */ + p = p->ptr; /* 27 */ + p = p->ptr; /* 28 */ + p = p->ptr; /* 29 */ + p = p->ptr; /* 30 */ + p = p->ptr; /* 31 */ + } +} + +static void do_write(struct s *array, unsigned accesses, unsigned ofs) +{ + unsigned i = accesses / 32; + volatile struct s *p = &array[0]; + while (--i) { + p->dummy[ofs]++; p = p->ptr; /* 0 */ + p->dummy[ofs]++; p = p->ptr; /* 1 */ + p->dummy[ofs]++; p = p->ptr; /* 2 */ + p->dummy[ofs]++; p = p->ptr; /* 3 */ + p->dummy[ofs]++; p = p->ptr; /* 4 */ + p->dummy[ofs]++; p = p->ptr; /* 5 */ + p->dummy[ofs]++; p = p->ptr; /* 6 */ + p->dummy[ofs]++; p = p->ptr; /* 7 */ + p->dummy[ofs]++; p = p->ptr; /* 8 */ + p->dummy[ofs]++; p = p->ptr; /* 9 */ + p->dummy[ofs]++; p = p->ptr; /* 10 */ + p->dummy[ofs]++; p = p->ptr; /* 11 */ + p->dummy[ofs]++; p = p->ptr; /* 12 */ + p->dummy[ofs]++; p = p->ptr; /* 13 */ + p->dummy[ofs]++; p = p->ptr; /* 14 */ + p->dummy[ofs]++; p = p->ptr; /* 15 */ + p->dummy[ofs]++; p = p->ptr; /* 16 */ + p->dummy[ofs]++; p = p->ptr; /* 17 */ + p->dummy[ofs]++; p = p->ptr; /* 18 */ + p->dummy[ofs]++; p = p->ptr; /* 19 */ + p->dummy[ofs]++; p = p->ptr; /* 20 */ + p->dummy[ofs]++; p = p->ptr; /* 21 */ + p->dummy[ofs]++; p = p->ptr; /* 22 */ + p->dummy[ofs]++; p = p->ptr; /* 23 */ + p->dummy[ofs]++; p = p->ptr; /* 24 */ + p->dummy[ofs]++; p = p->ptr; /* 25 */ + p->dummy[ofs]++; p = p->ptr; /* 26 */ + p->dummy[ofs]++; p = p->ptr; /* 27 */ + p->dummy[ofs]++; p = p->ptr; /* 28 */ + p->dummy[ofs]++; p = p->ptr; /* 29 */ + p->dummy[ofs]++; p = p->ptr; /* 30 */ + p->dummy[ofs]++; p = p->ptr; /* 31 */ + } +} + +struct benchmark_thread { + pthread_t id; + unsigned cpu; + double result; + struct cfg *cfg; +}; + +pthread_barrier_t barrier; + +struct s array[MAX_CPUS][64*0x100000/sizeof(struct s)] __attribute__ ((aligned (2*1024*1024))); + +bool print = true; + +static void *benchmark_thread(void *arg) +{ + struct benchmark_thread *me = arg; + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(me->cpu, &set); + + if (pthread_setaffinity_np(me->id, sizeof(set), &set) != 0) { + perror("pthread_setaffinity_np"); + exit(1); + } + + prepare(array[me->cpu], me->cfg->size, me->cfg->sequential); + + pthread_barrier_wait(&barrier); + + if (print) + printf("CPU %d starts measurement\n", me->cpu); + + uint64_t tic, tac; + tic = get_time(me->cfg); + if (me->cfg->write == false) + do_read(array[me->cpu], me->cfg->read_count); + else + do_write(array[me->cpu], me->cfg->read_count, me->cfg->ofs); + + tac = get_time(me->cfg); + me->result = (double)(tac - tic) / me->cfg->read_count; + + return NULL; +} + +static void run_benchmark(struct cfg *cfg) +{ + struct benchmark_thread thread[MAX_CPUS]; + unsigned i; + cpu_set_t set = cfg->cpu_set; + pthread_barrier_init(&barrier, NULL, cfg->num_threads); + for (i = 0; i < cfg->num_threads; i++) { + thread[i].cfg = cfg; + if (CPU_COUNT(&set) == 0) { + thread[i].cpu = i; + } else { + int j; + for (j = 0; j < MAX_CPUS; j++) { + if (CPU_ISSET(j, &set)) { + thread[i].cpu = j; + CPU_CLR(j, &set); + break; + } + } + } + if (print) + printf( "Running thread %d on CPU %d\n", i, thread[i].cpu); + pthread_create(&thread[i].id, NULL, benchmark_thread, &thread[i]); + } + + for (i = 0; i < cfg->num_threads; i++) { + pthread_join(thread[i].id, NULL); + } + pthread_barrier_destroy(&barrier); + + printf("%d", cfg->size); + for (i = 0; i < cfg->num_threads; i++) { + printf("\t%#.3g", thread[i].result); + } + printf("\n"); + fflush(stdout); + print = false; +} + +#ifdef JAILHOUSE +void inmate_main(void) +#else //Linux +int main(int argc, char *argv[]) +#endif +{ + struct cfg cfg = { + .sequential = true, + .num_threads = 1, + .size = 0, + .read_count = 0x2000000, + .write = false, + .ofs = 0, + .use_cycles = false, /* i.e. use nanoseconds */ + }; + +#ifdef JAILHOUSE + //initialize UART + unsigned long tsc_freq; + unsigned int n; + + printk_uart_base = UART_BASE; + do { + for (n = 0; n < UART_IDLE_LOOPS; n++) + if (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE)) + break; + } while (n < UART_IDLE_LOOPS); + //parse cmdline + cfg.read_count = cmdline_parse_int("-c", cfg.read_count); + cfg.ofs = cmdline_parse_int("-o", cfg.ofs); + cfg.sequential = cmdline_parse_bool("-r"); + cfg.size = cmdline_parse_int("-s", cfg.size); + if (cmdline_parse_bool("-t")) { + printk("Threads are not supported. '-t' was ignored.\n"); + } + if (cmdline_parse_bool("-C")) { + printk("CPU selection is not supported. '-C' was ignored.\n"); + } + cfg.write = cmdline_parse_bool("-w"); + cfg.use_cycles = cmdline_parse_bool("-y"); + //initialize timing + tsc_freq = tsc_init(); + printk("Calibrated TSC frequency: %lu.%03u kHz\n", tsc_freq / 1000, + tsc_freq % 1000); +#else //Linux param's parsing + CPU_ZERO(&cfg.cpu_set); + + int opt; + while ((opt = getopt(argc, argv, "c:C:o:rs:t:wy")) != -1) { + switch (opt) { + case 'c': + cfg.read_count = atol(optarg); + break; + case 'o': + cfg.ofs = atol(optarg); + break; + case 'r': /* random */ + cfg.sequential = false; + break; + case 's': + cfg.size = atol(optarg); + assert(cfg.size <= sizeof(array[0])); + break; + case 't': + cfg.num_threads = atol(optarg); + break; + case 'C': + CPU_SET(atol(optarg), &cfg.cpu_set); + break; + case 'w': + cfg.write = true; + break; + case 'y': + cfg.use_cycles = true; + break; + default: /* '?' */ + fprintf(stderr, "Usage: %s ... TODO\n", argv[0]); + exit(1); + } + } +#endif + + srand(time(NULL)); + + if (cfg.write) { + struct s s; + assert(cfg.ofs < ARRAY_SIZE(s.dummy)); + } + + if (cfg.use_cycles) + ccntr_init(); + + if (cfg.size != 0) { + run_benchmark(&cfg); + } else { + unsigned order, size, step; + for (order = 10; order <= 24; order++) { + for (step = 0; step < 2; step++) { + size = 1 << order; + if (step == 1) + size += size / 2; + + cfg.size = size; + run_benchmark(&cfg); + } + } + } +#ifndef JAILHOUSE + return 0; +#endif +}