]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/control.c
081b0363c0d21df3783842220f2eebf56748fdd6
[jailhouse.git] / hypervisor / control.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/entry.h>
14 #include <jailhouse/control.h>
15 #include <jailhouse/printk.h>
16 #include <jailhouse/paging.h>
17 #include <jailhouse/processor.h>
18 #include <jailhouse/string.h>
19 #include <asm/bitops.h>
20 #include <asm/spinlock.h>
21
22 struct jailhouse_system *system_config;
23
24 static DEFINE_SPINLOCK(shutdown_lock);
25 static unsigned int num_cells = 1;
26
27 #define for_each_cell(c)        for ((c) = &root_cell; (c); (c) = (c)->next)
28 #define for_each_non_root_cell(c) \
29         for ((c) = root_cell.next; (c); (c) = (c)->next)
30
31 unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set, int exception)
32 {
33         do
34                 cpu++;
35         while (cpu <= cpu_set->max_cpu_id &&
36                (cpu == exception || !test_bit(cpu, cpu_set->bitmap)));
37         return cpu;
38 }
39
40 bool cpu_id_valid(unsigned long cpu_id)
41 {
42         const unsigned long *system_cpu_set =
43                 jailhouse_cell_cpu_set(&system_config->system);
44
45         return (cpu_id < system_config->system.cpu_set_size * 8 &&
46                 test_bit(cpu_id, system_cpu_set));
47 }
48
49 static void cell_suspend(struct cell *cell, struct per_cpu *cpu_data)
50 {
51         unsigned int cpu;
52
53         for_each_cpu_except(cpu, cell->cpu_set, cpu_data->cpu_id)
54                 arch_suspend_cpu(cpu);
55 }
56
57 static void cell_resume(struct per_cpu *cpu_data)
58 {
59         unsigned int cpu;
60
61         for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
62                 arch_resume_cpu(cpu);
63 }
64
65 static unsigned int get_free_cell_id(void)
66 {
67         unsigned int id = 0;
68         struct cell *cell;
69
70 retry:
71         for_each_cell(cell)
72                 if (cell->id == id) {
73                         id++;
74                         goto retry;
75                 }
76
77         return id;
78 }
79
80 /* cell must be zero-initialized */
81 int cell_init(struct cell *cell, bool copy_cpu_set)
82 {
83         const unsigned long *config_cpu_set =
84                 jailhouse_cell_cpu_set(cell->config);
85         unsigned long cpu_set_size = cell->config->cpu_set_size;
86         struct cpu_set *cpu_set;
87
88         cell->id = get_free_cell_id();
89
90         if (cpu_set_size > PAGE_SIZE)
91                 return -EINVAL;
92         else if (cpu_set_size > sizeof(cell->small_cpu_set.bitmap)) {
93                 cpu_set = page_alloc(&mem_pool, 1);
94                 if (!cpu_set)
95                         return -ENOMEM;
96                 cpu_set->max_cpu_id =
97                         ((PAGE_SIZE - sizeof(unsigned long)) * 8) - 1;
98         } else {
99                 cpu_set = &cell->small_cpu_set;
100                 cpu_set->max_cpu_id =
101                         (sizeof(cell->small_cpu_set.bitmap) * 8) - 1;
102         }
103         cell->cpu_set = cpu_set;
104         if (copy_cpu_set)
105                 memcpy(cell->cpu_set->bitmap, config_cpu_set, cpu_set_size);
106
107         return 0;
108 }
109
110 static void destroy_cpu_set(struct cell *cell)
111 {
112         if (cell->cpu_set != &cell->small_cpu_set)
113                 page_free(&mem_pool, cell->cpu_set, 1);
114 }
115
116 int check_mem_regions(const struct jailhouse_cell_desc *config)
117 {
118         const struct jailhouse_memory *mem =
119                 jailhouse_cell_mem_regions(config);
120         unsigned int n;
121
122         for (n = 0; n < config->num_memory_regions; n++, mem++) {
123                 if (mem->phys_start & ~PAGE_MASK ||
124                     mem->virt_start & ~PAGE_MASK ||
125                     mem->size & ~PAGE_MASK ||
126                     mem->flags & ~JAILHOUSE_MEM_VALID_FLAGS) {
127                         printk("FATAL: Invalid memory bar (%p, %p, %p, %x)\n",
128                                mem->phys_start, mem->virt_start, mem->size,
129                                mem->flags);
130                         return -EINVAL;
131                 }
132         }
133         return 0;
134 }
135
136 static bool address_in_region(unsigned long addr,
137                               const struct jailhouse_memory *region)
138 {
139         return addr >= region->phys_start &&
140                addr < (region->phys_start + region->size);
141 }
142
143 static void remap_to_root_cell(const struct jailhouse_memory *mem)
144 {
145         const struct jailhouse_memory *root_mem =
146                 jailhouse_cell_mem_regions(root_cell.config);
147         struct jailhouse_memory overlap;
148         unsigned int n;
149
150         for (n = 0; n < root_cell.config->num_memory_regions;
151              n++, root_mem++) {
152                 if (address_in_region(mem->phys_start, root_mem)) {
153                         overlap.phys_start = mem->phys_start;
154                         overlap.size = root_mem->size -
155                                 (overlap.phys_start - root_mem->phys_start);
156                         if (overlap.size > mem->size)
157                                 overlap.size = mem->size;
158                 } else if (address_in_region(root_mem->phys_start, mem)) {
159                         overlap.phys_start = root_mem->phys_start;
160                         overlap.size = mem->size -
161                                 (overlap.phys_start - mem->phys_start);
162                         if (overlap.size > root_mem->size)
163                                 overlap.size = root_mem->size;
164                 } else
165                         continue;
166
167                 overlap.virt_start = root_mem->virt_start +
168                         overlap.phys_start - root_mem->phys_start;
169                 overlap.flags = root_mem->flags;
170
171                 if (arch_map_memory_region(&root_cell, &overlap) != 0)
172                         printk("WARNING: Failed to re-assign memory region "
173                                "to root cell\n");
174         }
175 }
176
177 static int cell_create(struct per_cpu *cpu_data, unsigned long config_address)
178 {
179         unsigned long mapping_addr = TEMPORARY_MAPPING_CPU_BASE(cpu_data);
180         unsigned long cfg_page_offs = config_address & ~PAGE_MASK;
181         unsigned long cfg_header_size, cfg_total_size;
182         const struct jailhouse_memory *mem;
183         struct jailhouse_cell_desc *cfg;
184         unsigned int cell_pages, cpu, n;
185         struct cpu_set *shrinking_set;
186         struct jailhouse_memory tmp;
187         struct cell *cell, *last;
188         int err;
189
190         /* We do not support creation over non-root cells. */
191         if (cpu_data->cell != &root_cell)
192                 return -EPERM;
193
194         cell_suspend(&root_cell, cpu_data);
195
196         cfg_header_size = (config_address & ~PAGE_MASK) +
197                 sizeof(struct jailhouse_cell_desc);
198
199         err = page_map_create(&hv_paging_structs, config_address & PAGE_MASK,
200                               cfg_header_size, mapping_addr,
201                               PAGE_READONLY_FLAGS, PAGE_MAP_NON_COHERENT);
202         if (err)
203                 goto err_resume;
204
205         cfg = (struct jailhouse_cell_desc *)(mapping_addr + cfg_page_offs);
206         cfg_total_size = jailhouse_cell_config_size(cfg);
207         if (cfg_total_size + cfg_page_offs > NUM_TEMPORARY_PAGES * PAGE_SIZE) {
208                 err = -E2BIG;
209                 goto err_resume;
210         }
211
212         for_each_cell(cell)
213                 if (strcmp(cell->config->name, cfg->name) == 0) {
214                         err = -EEXIST;
215                         goto err_resume;
216                 }
217
218         err = page_map_create(&hv_paging_structs, config_address & PAGE_MASK,
219                               cfg_total_size + cfg_page_offs, mapping_addr,
220                               PAGE_READONLY_FLAGS, PAGE_MAP_NON_COHERENT);
221         if (err)
222                 goto err_resume;
223
224         err = check_mem_regions(cfg);
225         if (err)
226                 goto err_resume;
227
228         cell_pages = PAGE_ALIGN(sizeof(*cell) + cfg_total_size) / PAGE_SIZE;
229         cell = page_alloc(&mem_pool, cell_pages);
230         if (!cell) {
231                 err = -ENOMEM;
232                 goto err_resume;
233         }
234
235         cell->data_pages = cell_pages;
236         cell->config = ((void *)cell) + sizeof(*cell);
237         memcpy(cell->config, cfg, cfg_total_size);
238
239         err = cell_init(cell, true);
240         if (err)
241                 goto err_free_cell;
242
243         /* don't assign the CPU we are currently running on */
244         if (cpu_data->cpu_id <= cell->cpu_set->max_cpu_id &&
245             test_bit(cpu_data->cpu_id, cell->cpu_set->bitmap)) {
246                 err = -EBUSY;
247                 goto err_free_cpu_set;
248         }
249
250         shrinking_set = cpu_data->cell->cpu_set;
251
252         /* shrinking set must be super-set of new cell's cpu set */
253         if (shrinking_set->max_cpu_id < cell->cpu_set->max_cpu_id) {
254                 err = -EBUSY;
255                 goto err_free_cpu_set;
256         }
257         for_each_cpu(cpu, cell->cpu_set)
258                 if (!test_bit(cpu, shrinking_set->bitmap)) {
259                         err = -EBUSY;
260                         goto err_free_cpu_set;
261                 }
262
263         for_each_cpu(cpu, cell->cpu_set)
264                 clear_bit(cpu, shrinking_set->bitmap);
265
266         /* unmap the new cell's memory regions from the root cell */
267         mem = jailhouse_cell_mem_regions(cell->config);
268         for (n = 0; n < cell->config->num_memory_regions; n++, mem++)
269                 /*
270                  * Exceptions:
271                  *  - the communication region is not backed by root memory
272                  */
273                 if (!(mem->flags & JAILHOUSE_MEM_COMM_REGION)) {
274                         /*
275                          * arch_unmap_memory_region uses the virtual address of
276                          * the memory region. As only the root cell has a
277                          * guaranteed 1:1 mapping, make a copy where we ensure
278                          * this.
279                          */
280                         tmp = *mem;
281                         tmp.virt_start = tmp.phys_start;
282                         err = arch_unmap_memory_region(&root_cell, &tmp);
283                         if (err)
284                                 goto err_restore_root;
285                 }
286
287         err = arch_cell_create(cpu_data, cell);
288         if (err)
289                 goto err_restore_root;
290
291         cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_SHUT_DOWN;
292
293         last = &root_cell;
294         while (last->next)
295                 last = last->next;
296         last->next = cell;
297         num_cells++;
298
299         for_each_cpu(cpu, cell->cpu_set) {
300                 per_cpu(cpu)->cell = cell;
301                 arch_park_cpu(cpu);
302         }
303
304         printk("Created cell \"%s\"\n", cell->config->name);
305
306         page_map_dump_stats("after cell creation");
307
308         cell_resume(cpu_data);
309
310         return cell->id;
311
312 err_restore_root:
313         mem = jailhouse_cell_mem_regions(cell->config);
314         for (n = 0; n < cell->config->num_memory_regions; n++, mem++)
315                 remap_to_root_cell(mem);
316         for_each_cpu(cpu, cell->cpu_set)
317                 set_bit(cpu, shrinking_set->bitmap);
318 err_free_cpu_set:
319         destroy_cpu_set(cell);
320 err_free_cell:
321         page_free(&mem_pool, cell, cell_pages);
322
323 err_resume:
324         cell_resume(cpu_data);
325
326         return err;
327 }
328
329 static bool cell_shutdown_ok(struct cell *cell)
330 {
331         volatile u32 *reply = &cell->comm_page.comm_region.reply_from_cell;
332         volatile u32 *cell_state = &cell->comm_page.comm_region.cell_state;
333
334         if (cell->config->flags & JAILHOUSE_CELL_PASSIVE_COMMREG)
335                 return true;
336
337         jailhouse_send_msg_to_cell(&cell->comm_page.comm_region,
338                                    JAILHOUSE_MSG_SHUTDOWN_REQUESTED);
339
340         while (*reply != JAILHOUSE_MSG_SHUTDOWN_DENIED) {
341                 if (*reply == JAILHOUSE_MSG_SHUTDOWN_OK ||
342                     *cell_state == JAILHOUSE_CELL_SHUT_DOWN ||
343                     *cell_state == JAILHOUSE_CELL_FAILED)
344                         return true;
345                 cpu_relax();
346         }
347         return false;
348 }
349
350 static int cell_management_prologue(struct per_cpu *cpu_data, unsigned long id,
351                                     struct cell **cell_ptr)
352 {
353         /* We do not support management commands over non-root cells. */
354         if (cpu_data->cell != &root_cell)
355                 return -EPERM;
356
357         cell_suspend(&root_cell, cpu_data);
358
359         for_each_cell(*cell_ptr)
360                 if ((*cell_ptr)->id == id)
361                         break;
362
363         if (!*cell_ptr) {
364                 cell_resume(cpu_data);
365                 return -ENOENT;
366         }
367
368         /* root cell cannot be managed */
369         if (*cell_ptr == &root_cell) {
370                 cell_resume(cpu_data);
371                 return -EINVAL;
372         }
373
374         if (!cell_shutdown_ok(*cell_ptr)) {
375                 cell_resume(cpu_data);
376                 return -EPERM;
377         }
378
379         cell_suspend(*cell_ptr, cpu_data);
380
381         return 0;
382 }
383
384 static int cell_start(struct per_cpu *cpu_data, unsigned long id)
385 {
386         struct cell *cell;
387         unsigned int cpu;
388         int err;
389
390         err = cell_management_prologue(cpu_data, id, &cell);
391         if (err)
392                 return err;
393
394         /* present a consistent Communication Region state to the cell */
395         cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_RUNNING;
396         cell->comm_page.comm_region.msg_to_cell = JAILHOUSE_MSG_NONE;
397
398         for_each_cpu(cpu, cell->cpu_set) {
399                 per_cpu(cpu)->failed = false;
400                 arch_reset_cpu(cpu);
401         }
402
403         printk("Started cell \"%s\"\n", cell->config->name);
404
405         cell_resume(cpu_data);
406
407         return 0;
408 }
409
410 static int cell_destroy(struct per_cpu *cpu_data, unsigned long id)
411 {
412         const struct jailhouse_memory *mem;
413         struct cell *cell, *previous;
414         unsigned int cpu, n;
415         int err;
416
417         err = cell_management_prologue(cpu_data, id, &cell);
418         if (err)
419                 return err;
420
421         printk("Closing cell \"%s\"\n", cell->config->name);
422
423         for_each_cpu(cpu, cell->cpu_set) {
424                 arch_park_cpu(cpu);
425
426                 set_bit(cpu, root_cell.cpu_set->bitmap);
427                 per_cpu(cpu)->cell = &root_cell;
428                 per_cpu(cpu)->failed = false;
429         }
430
431         mem = jailhouse_cell_mem_regions(cell->config);
432         for (n = 0; n < cell->config->num_memory_regions; n++, mem++) {
433                 /*
434                  * This cannot fail. The region was mapped as a whole before,
435                  * thus no hugepages need to be broken up to unmap it.
436                  */
437                 arch_unmap_memory_region(cell, mem);
438                 if (!(mem->flags & JAILHOUSE_MEM_COMM_REGION))
439                         remap_to_root_cell(mem);
440         }
441
442         arch_cell_destroy(cpu_data, cell);
443
444         previous = &root_cell;
445         while (previous->next != cell)
446                 previous = previous->next;
447         previous->next = cell->next;
448         num_cells--;
449
450         page_free(&mem_pool, cell, cell->data_pages);
451         page_map_dump_stats("after cell destruction");
452
453         cell_resume(cpu_data);
454
455         return 0;
456 }
457
458 static int cell_get_state(struct per_cpu *cpu_data, unsigned long id)
459 {
460         struct cell *cell;
461
462         if (cpu_data->cell != &root_cell)
463                 return -EPERM;
464
465         /*
466          * We do not need explicit synchronization with cell_create/destroy
467          * because their cell_suspend(root_cell) will not return before we left
468          * this hypercall.
469          */
470         for_each_cell(cell)
471                 if (cell->id == id) {
472                         u32 state = cell->comm_page.comm_region.cell_state;
473
474                         switch (state) {
475                         case JAILHOUSE_CELL_RUNNING:
476                         case JAILHOUSE_CELL_SHUT_DOWN:
477                         case JAILHOUSE_CELL_FAILED:
478                                 return state;
479                         default:
480                                 return -EINVAL;
481                         }
482                 }
483         return -ENOENT;
484 }
485
486 static int shutdown(struct per_cpu *cpu_data)
487 {
488         unsigned int this_cpu = cpu_data->cpu_id;
489         struct cell *cell;
490         unsigned int cpu;
491         int state, ret;
492
493         /* We do not support shutdown over non-root cells. */
494         if (cpu_data->cell != &root_cell)
495                 return -EPERM;
496
497         spin_lock(&shutdown_lock);
498
499         if (cpu_data->shutdown_state == SHUTDOWN_NONE) {
500                 state = SHUTDOWN_STARTED;
501                 for_each_non_root_cell(cell)
502                         if (!cell_shutdown_ok(cell))
503                                 state = -EPERM;
504
505                 if (state == SHUTDOWN_STARTED) {
506                         printk("Shutting down hypervisor\n");
507
508                         for_each_non_root_cell(cell) {
509                                 cell_suspend(cell, cpu_data);
510
511                                 printk("Closing cell \"%s\"\n",
512                                        cell->config->name);
513
514                                 for_each_cpu(cpu, cell->cpu_set) {
515                                         printk(" Releasing CPU %d\n", cpu);
516                                         arch_shutdown_cpu(cpu);
517                                 }
518                         }
519
520                         printk("Closing root cell \"%s\"\n",
521                                root_cell.config->name);
522                         arch_shutdown();
523                 }
524
525                 for_each_cpu(cpu, root_cell.cpu_set)
526                         per_cpu(cpu)->shutdown_state = state;
527         }
528
529         if (cpu_data->shutdown_state == SHUTDOWN_STARTED) {
530                 printk(" Releasing CPU %d\n", this_cpu);
531                 ret = 0;
532         } else
533                 ret = cpu_data->shutdown_state;
534         cpu_data->shutdown_state = SHUTDOWN_NONE;
535
536         spin_unlock(&shutdown_lock);
537
538         return ret;
539 }
540
541 static long hypervisor_get_info(struct per_cpu *cpu_data, unsigned long type)
542 {
543         switch (type) {
544         case JAILHOUSE_INFO_MEM_POOL_SIZE:
545                 return mem_pool.pages;
546         case JAILHOUSE_INFO_MEM_POOL_USED:
547                 return mem_pool.used_pages;
548         case JAILHOUSE_INFO_REMAP_POOL_SIZE:
549                 return remap_pool.pages;
550         case JAILHOUSE_INFO_REMAP_POOL_USED:
551                 return remap_pool.used_pages;
552         case JAILHOUSE_INFO_NUM_CELLS:
553                 return num_cells;
554         default:
555                 return -EINVAL;
556         }
557 }
558
559 static int cpu_get_state(struct per_cpu *cpu_data, unsigned long cpu_id)
560 {
561         if (!cpu_id_valid(cpu_id))
562                 return -EINVAL;
563
564         /*
565          * We do not need explicit synchronization with cell_destroy because
566          * its cell_suspend(root_cell + this_cell) will not return before we
567          * left this hypercall.
568          */
569         if (cpu_data->cell != &root_cell &&
570             (cpu_id > cpu_data->cell->cpu_set->max_cpu_id ||
571              !test_bit(cpu_id, cpu_data->cell->cpu_set->bitmap)))
572                 return -EPERM;
573
574         return per_cpu(cpu_id)->failed ? JAILHOUSE_CPU_FAILED :
575                 JAILHOUSE_CPU_RUNNING;
576 }
577
578 long hypercall(struct per_cpu *cpu_data, unsigned long code, unsigned long arg)
579 {
580         switch (code) {
581         case JAILHOUSE_HC_DISABLE:
582                 return shutdown(cpu_data);
583         case JAILHOUSE_HC_CELL_CREATE:
584                 return cell_create(cpu_data, arg);
585         case JAILHOUSE_HC_CELL_START:
586                 return cell_start(cpu_data, arg);
587         case JAILHOUSE_HC_CELL_DESTROY:
588                 return cell_destroy(cpu_data, arg);
589         case JAILHOUSE_HC_HYPERVISOR_GET_INFO:
590                 return hypervisor_get_info(cpu_data, arg);
591         case JAILHOUSE_HC_CELL_GET_STATE:
592                 return cell_get_state(cpu_data, arg);
593         case JAILHOUSE_HC_CPU_GET_STATE:
594                 return cpu_get_state(cpu_data, arg);
595         default:
596                 return -ENOSYS;
597         }
598 }
599
600 void panic_stop(struct per_cpu *cpu_data)
601 {
602         panic_printk("Stopping CPU");
603         if (cpu_data) {
604                 panic_printk(" %d", cpu_data->cpu_id);
605                 cpu_data->cpu_stopped = true;
606         }
607         panic_printk("\n");
608
609         if (phys_processor_id() == panic_cpu)
610                 panic_in_progress = 0;
611
612         arch_panic_stop(cpu_data);
613 }
614
615 void panic_halt(struct per_cpu *cpu_data)
616 {
617         struct cell *cell = cpu_data->cell;
618         bool cell_failed = true;
619         unsigned int cpu;
620
621         panic_printk("Parking CPU %d\n", cpu_data->cpu_id);
622
623         cpu_data->failed = true;
624         for_each_cpu(cpu, cell->cpu_set)
625                 if (!per_cpu(cpu)->failed) {
626                         cell_failed = false;
627                         break;
628                 }
629         if (cell_failed)
630                 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_FAILED;
631
632         arch_panic_halt(cpu_data);
633
634         if (phys_processor_id() == panic_cpu)
635                 panic_in_progress = 0;
636 }