2 * Jailhouse, a Linux-based partitioning hypervisor
4 * Copyright (c) Siemens AG, 2013-2016
7 * Jan Kiszka <jan.kiszka@siemens.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 #include <jailhouse/entry.h>
14 #include <jailhouse/control.h>
15 #include <jailhouse/mmio.h>
16 #include <jailhouse/printk.h>
17 #include <jailhouse/paging.h>
18 #include <jailhouse/processor.h>
19 #include <jailhouse/string.h>
20 #include <jailhouse/utils.h>
21 #include <asm/bitops.h>
22 #include <asm/spinlock.h>
24 enum msg_type {MSG_REQUEST, MSG_INFORMATION};
25 enum failure_mode {ABORT_ON_ERROR, WARN_ON_ERROR};
26 enum management_task {CELL_START, CELL_SET_LOADABLE, CELL_DESTROY};
28 /** System configuration as used while activating the hypervisor. */
29 struct jailhouse_system *system_config;
30 /** State structure of the root cell. @ingroup Control */
31 struct cell root_cell;
33 static DEFINE_SPINLOCK(shutdown_lock);
34 static unsigned int num_cells = 1;
38 * @param cpu Previous CPU ID.
39 * @param cpu_set CPU set to iterate over.
40 * @param exception CPU ID to skip if it is contained.
42 * @return Next CPU ID in the set.
44 * @note For internal use only. Use for_each_cpu() or for_each_cpu_except()
47 unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set, int exception)
51 while (cpu <= cpu_set->max_cpu_id &&
52 (cpu == exception || !test_bit(cpu, cpu_set->bitmap)));
57 * Check if a CPU ID is contained in the system's CPU set, i.e. the initial CPU
58 * set of the root cell.
59 * @param cpu_id CPU ID to check.
61 * @return True if CPU ID is valid.
63 bool cpu_id_valid(unsigned long cpu_id)
65 const unsigned long *system_cpu_set =
66 jailhouse_cell_cpu_set(&system_config->root_cell);
68 return (cpu_id < system_config->root_cell.cpu_set_size * 8 &&
69 test_bit(cpu_id, system_cpu_set));
72 static void cell_suspend(struct cell *cell, struct per_cpu *cpu_data)
76 for_each_cpu_except(cpu, cell->cpu_set, cpu_data->cpu_id)
77 arch_suspend_cpu(cpu);
80 static void cell_resume(struct per_cpu *cpu_data)
84 for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
89 * Deliver a message to cell and wait for the reply.
90 * @param cell Target cell.
91 * @param message Message code to be sent (JAILHOUSE_MSG_*).
92 * @param type Message type, defines the valid replies.
94 * @return True if a request message was approved or reception of an
95 * informational message was acknowledged by the target cell. It also
96 * returns true if the target cell does not support an active
97 * communication region, is shut down or in failed state. Returns
98 * false on request denial or invalid replies.
100 static bool cell_send_message(struct cell *cell, u32 message,
103 if (cell->config->flags & JAILHOUSE_CELL_PASSIVE_COMMREG)
106 jailhouse_send_msg_to_cell(&cell->comm_page.comm_region, message);
109 u32 reply = cell->comm_page.comm_region.reply_from_cell;
110 u32 cell_state = cell->comm_page.comm_region.cell_state;
112 if (cell_state == JAILHOUSE_CELL_SHUT_DOWN ||
113 cell_state == JAILHOUSE_CELL_FAILED)
116 if ((type == MSG_REQUEST &&
117 reply == JAILHOUSE_MSG_REQUEST_APPROVED) ||
118 (type == MSG_INFORMATION &&
119 reply == JAILHOUSE_MSG_RECEIVED))
122 if (reply != JAILHOUSE_MSG_NONE)
129 static bool cell_reconfig_ok(struct cell *excluded_cell)
133 for_each_non_root_cell(cell)
134 if (cell != excluded_cell &&
135 cell->comm_page.comm_region.cell_state ==
136 JAILHOUSE_CELL_RUNNING_LOCKED)
141 static void cell_reconfig_completed(void)
145 for_each_non_root_cell(cell)
146 cell_send_message(cell, JAILHOUSE_MSG_RECONFIG_COMPLETED,
150 static unsigned int get_free_cell_id(void)
157 if (cell->id == id) {
166 * Initialize a new cell.
167 * @param cell Cell to be initializes.
169 * @return 0 on success, negative error code otherwise.
171 * @note The cell data structure must be zero-initialized.
173 int cell_init(struct cell *cell)
175 const unsigned long *config_cpu_set =
176 jailhouse_cell_cpu_set(cell->config);
177 unsigned long cpu_set_size = cell->config->cpu_set_size;
178 struct cpu_set *cpu_set;
181 cell->id = get_free_cell_id();
183 if (cpu_set_size > PAGE_SIZE)
184 return trace_error(-EINVAL);
185 if (cpu_set_size > sizeof(cell->small_cpu_set.bitmap)) {
186 cpu_set = page_alloc(&mem_pool, 1);
190 cpu_set = &cell->small_cpu_set;
192 cpu_set->max_cpu_id = cpu_set_size * 8 - 1;
193 memcpy(cpu_set->bitmap, config_cpu_set, cpu_set_size);
195 cell->cpu_set = cpu_set;
197 err = mmio_cell_init(cell);
198 if (err && cell->cpu_set != &cell->small_cpu_set)
199 page_free(&mem_pool, cell->cpu_set, 1);
204 static void cell_exit(struct cell *cell)
206 mmio_cell_exit(cell);
208 if (cell->cpu_set != &cell->small_cpu_set)
209 page_free(&mem_pool, cell->cpu_set, 1);
213 * Apply system configuration changes.
214 * @param cell_added_removed Cell that was added or removed to/from the
217 * @see arch_config_commit
218 * @see pci_config_commit
220 void config_commit(struct cell *cell_added_removed)
222 arch_flush_cell_vcpu_caches(&root_cell);
223 if (cell_added_removed && cell_added_removed != &root_cell)
224 arch_flush_cell_vcpu_caches(cell_added_removed);
226 arch_config_commit(cell_added_removed);
229 static bool address_in_region(unsigned long addr,
230 const struct jailhouse_memory *region)
232 return addr >= region->phys_start &&
233 addr < (region->phys_start + region->size);
236 static int unmap_from_root_cell(const struct jailhouse_memory *mem)
239 * arch_unmap_memory_region and mmio_subpage_unregister use the
240 * virtual address of the memory region for their job. As only the root
241 * cell has a guaranteed 1:1 mapping, make a copy where we ensure this.
243 struct jailhouse_memory tmp = *mem;
245 tmp.virt_start = tmp.phys_start;
247 if (JAILHOUSE_MEMORY_IS_SUBPAGE(&tmp)) {
248 mmio_subpage_unregister(&root_cell, &tmp);
252 return arch_unmap_memory_region(&root_cell, &tmp);
255 static int remap_to_root_cell(const struct jailhouse_memory *mem,
256 enum failure_mode mode)
258 const struct jailhouse_memory *root_mem;
259 struct jailhouse_memory overlap;
263 for_each_mem_region(root_mem, root_cell.config, n) {
264 if (address_in_region(mem->phys_start, root_mem)) {
265 overlap.phys_start = mem->phys_start;
266 overlap.size = root_mem->size -
267 (overlap.phys_start - root_mem->phys_start);
268 if (overlap.size > mem->size)
269 overlap.size = mem->size;
270 } else if (address_in_region(root_mem->phys_start, mem)) {
271 overlap.phys_start = root_mem->phys_start;
272 overlap.size = mem->size -
273 (overlap.phys_start - mem->phys_start);
274 if (overlap.size > root_mem->size)
275 overlap.size = root_mem->size;
279 overlap.virt_start = root_mem->virt_start +
280 overlap.phys_start - root_mem->phys_start;
281 overlap.flags = root_mem->flags;
283 if (JAILHOUSE_MEMORY_IS_SUBPAGE(&overlap))
284 err = mmio_subpage_register(&root_cell, &overlap);
286 err = arch_map_memory_region(&root_cell, &overlap);
288 if (mode == ABORT_ON_ERROR)
290 printk("WARNING: Failed to re-assign memory region "
297 static void cell_destroy_internal(struct per_cpu *cpu_data, struct cell *cell)
299 const struct jailhouse_memory *mem;
302 for_each_cpu(cpu, cell->cpu_set) {
305 set_bit(cpu, root_cell.cpu_set->bitmap);
306 per_cpu(cpu)->cell = &root_cell;
307 per_cpu(cpu)->failed = false;
308 memset(per_cpu(cpu)->stats, 0, sizeof(per_cpu(cpu)->stats));
311 for_each_mem_region(mem, cell->config, n) {
312 if (!JAILHOUSE_MEMORY_IS_SUBPAGE(mem))
314 * This cannot fail. The region was mapped as a whole
315 * before, thus no hugepages need to be broken up to
318 arch_unmap_memory_region(cell, mem);
320 if (!(mem->flags & (JAILHOUSE_MEM_COMM_REGION |
321 JAILHOUSE_MEM_ROOTSHARED)))
322 remap_to_root_cell(mem, WARN_ON_ERROR);
325 arch_cell_destroy(cell);
332 static int cell_create(struct per_cpu *cpu_data, unsigned long config_address)
334 unsigned long cfg_page_offs = config_address & ~PAGE_MASK;
335 unsigned int cfg_pages, cell_pages, cpu, n;
336 const struct jailhouse_memory *mem;
337 struct jailhouse_cell_desc *cfg;
338 unsigned long cfg_total_size;
339 struct cell *cell, *last;
343 /* We do not support creation over non-root cells. */
344 if (cpu_data->cell != &root_cell)
347 cell_suspend(&root_cell, cpu_data);
349 if (!cell_reconfig_ok(NULL)) {
354 cfg_pages = PAGES(cfg_page_offs + sizeof(struct jailhouse_cell_desc));
355 cfg_mapping = paging_get_guest_pages(NULL, config_address, cfg_pages,
356 PAGE_READONLY_FLAGS);
362 cfg = (struct jailhouse_cell_desc *)(cfg_mapping + cfg_page_offs);
366 * No bound checking needed, thus strcmp is safe here because
367 * sizeof(cell->config->name) == sizeof(cfg->name) and
368 * cell->config->name is guaranteed to be null-terminated.
370 if (strcmp(cell->config->name, cfg->name) == 0) {
375 cfg_total_size = jailhouse_cell_config_size(cfg);
376 cfg_pages = PAGES(cfg_page_offs + cfg_total_size);
377 if (cfg_pages > NUM_TEMPORARY_PAGES) {
378 err = trace_error(-E2BIG);
382 if (!paging_get_guest_pages(NULL, config_address, cfg_pages,
383 PAGE_READONLY_FLAGS)) {
388 cell_pages = PAGES(sizeof(*cell) + cfg_total_size);
389 cell = page_alloc(&mem_pool, cell_pages);
395 cell->data_pages = cell_pages;
396 cell->config = ((void *)cell) + sizeof(*cell);
397 memcpy(cell->config, cfg, cfg_total_size);
399 err = cell_init(cell);
403 /* don't assign the CPU we are currently running on */
404 if (cell_owns_cpu(cell, cpu_data->cpu_id)) {
405 err = trace_error(-EBUSY);
409 /* the root cell's cpu set must be super-set of new cell's set */
410 for_each_cpu(cpu, cell->cpu_set)
411 if (!cell_owns_cpu(&root_cell, cpu)) {
412 err = trace_error(-EBUSY);
416 err = arch_cell_create(cell);
420 for_each_cpu(cpu, cell->cpu_set) {
423 clear_bit(cpu, root_cell.cpu_set->bitmap);
424 per_cpu(cpu)->cell = cell;
425 memset(per_cpu(cpu)->stats, 0, sizeof(per_cpu(cpu)->stats));
429 * Unmap the cell's memory regions from the root cell and map them to
430 * the new cell instead.
432 for_each_mem_region(mem, cell->config, n) {
435 * - the communication region is not backed by root memory
436 * - regions that may be shared with the root cell
438 if (!(mem->flags & (JAILHOUSE_MEM_COMM_REGION |
439 JAILHOUSE_MEM_ROOTSHARED))) {
440 err = unmap_from_root_cell(mem);
442 goto err_destroy_cell;
445 if (JAILHOUSE_MEMORY_IS_SUBPAGE(mem))
446 err = mmio_subpage_register(cell, mem);
448 err = arch_map_memory_region(cell, mem);
450 goto err_destroy_cell;
455 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_SHUT_DOWN;
463 cell_reconfig_completed();
465 printk("Created cell \"%s\"\n", cell->config->name);
467 paging_dump_stats("after cell creation");
469 cell_resume(cpu_data);
474 cell_destroy_internal(cpu_data, cell);
475 /* cell_destroy_internal already calls cell_exit */
480 page_free(&mem_pool, cell, cell_pages);
482 cell_resume(cpu_data);
487 static bool cell_shutdown_ok(struct cell *cell)
489 return cell_send_message(cell, JAILHOUSE_MSG_SHUTDOWN_REQUEST,
493 static int cell_management_prologue(enum management_task task,
494 struct per_cpu *cpu_data, unsigned long id,
495 struct cell **cell_ptr)
497 /* We do not support management commands over non-root cells. */
498 if (cpu_data->cell != &root_cell)
501 cell_suspend(&root_cell, cpu_data);
503 for_each_cell(*cell_ptr)
504 if ((*cell_ptr)->id == id)
508 cell_resume(cpu_data);
512 /* root cell cannot be managed */
513 if (*cell_ptr == &root_cell) {
514 cell_resume(cpu_data);
518 if ((task == CELL_DESTROY && !cell_reconfig_ok(*cell_ptr)) ||
519 !cell_shutdown_ok(*cell_ptr)) {
520 cell_resume(cpu_data);
524 cell_suspend(*cell_ptr, cpu_data);
529 static int cell_start(struct per_cpu *cpu_data, unsigned long id)
531 const struct jailhouse_memory *mem;
536 err = cell_management_prologue(CELL_START, cpu_data, id, &cell);
540 if (cell->loadable) {
541 /* unmap all loadable memory regions from the root cell */
542 for_each_mem_region(mem, cell->config, n)
543 if (mem->flags & JAILHOUSE_MEM_LOADABLE) {
544 err = unmap_from_root_cell(mem);
551 cell->loadable = false;
554 /* present a consistent Communication Region state to the cell */
555 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_RUNNING;
556 cell->comm_page.comm_region.msg_to_cell = JAILHOUSE_MSG_NONE;
558 for_each_cpu(cpu, cell->cpu_set) {
559 per_cpu(cpu)->failed = false;
563 printk("Started cell \"%s\"\n", cell->config->name);
566 cell_resume(cpu_data);
571 static int cell_set_loadable(struct per_cpu *cpu_data, unsigned long id)
573 const struct jailhouse_memory *mem;
578 err = cell_management_prologue(CELL_SET_LOADABLE, cpu_data, id, &cell);
582 for_each_cpu(cpu, cell->cpu_set) {
583 per_cpu(cpu)->failed = false;
590 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_SHUT_DOWN;
591 cell->loadable = true;
593 /* map all loadable memory regions into the root cell */
594 for_each_mem_region(mem, cell->config, n)
595 if (mem->flags & JAILHOUSE_MEM_LOADABLE) {
596 err = remap_to_root_cell(mem, ABORT_ON_ERROR);
603 printk("Cell \"%s\" can be loaded\n", cell->config->name);
606 cell_resume(cpu_data);
611 static int cell_destroy(struct per_cpu *cpu_data, unsigned long id)
613 struct cell *cell, *previous;
616 err = cell_management_prologue(CELL_DESTROY, cpu_data, id, &cell);
620 printk("Closing cell \"%s\"\n", cell->config->name);
622 cell_destroy_internal(cpu_data, cell);
624 previous = &root_cell;
625 while (previous->next != cell)
626 previous = previous->next;
627 previous->next = cell->next;
630 page_free(&mem_pool, cell, cell->data_pages);
631 paging_dump_stats("after cell destruction");
633 cell_reconfig_completed();
635 cell_resume(cpu_data);
640 static int cell_get_state(struct per_cpu *cpu_data, unsigned long id)
644 if (cpu_data->cell != &root_cell)
648 * We do not need explicit synchronization with cell_create/destroy
649 * because their cell_suspend(root_cell) will not return before we left
653 if (cell->id == id) {
654 u32 state = cell->comm_page.comm_region.cell_state;
657 case JAILHOUSE_CELL_RUNNING:
658 case JAILHOUSE_CELL_RUNNING_LOCKED:
659 case JAILHOUSE_CELL_SHUT_DOWN:
660 case JAILHOUSE_CELL_FAILED:
669 static int shutdown(struct per_cpu *cpu_data)
671 unsigned int this_cpu = cpu_data->cpu_id;
676 /* We do not support shutdown over non-root cells. */
677 if (cpu_data->cell != &root_cell)
680 spin_lock(&shutdown_lock);
682 if (cpu_data->shutdown_state == SHUTDOWN_NONE) {
683 state = SHUTDOWN_STARTED;
684 for_each_non_root_cell(cell)
685 if (!cell_shutdown_ok(cell))
688 if (state == SHUTDOWN_STARTED) {
689 printk("Shutting down hypervisor\n");
691 for_each_non_root_cell(cell) {
692 cell_suspend(cell, cpu_data);
694 printk("Closing cell \"%s\"\n",
697 for_each_cpu(cpu, cell->cpu_set) {
698 printk(" Releasing CPU %d\n", cpu);
699 arch_shutdown_cpu(cpu);
703 printk("Closing root cell \"%s\"\n",
704 root_cell.config->name);
708 for_each_cpu(cpu, root_cell.cpu_set)
709 per_cpu(cpu)->shutdown_state = state;
712 if (cpu_data->shutdown_state == SHUTDOWN_STARTED) {
713 printk(" Releasing CPU %d\n", this_cpu);
716 ret = cpu_data->shutdown_state;
717 cpu_data->shutdown_state = SHUTDOWN_NONE;
719 spin_unlock(&shutdown_lock);
724 static long hypervisor_get_info(struct per_cpu *cpu_data, unsigned long type)
727 case JAILHOUSE_INFO_MEM_POOL_SIZE:
728 return mem_pool.pages;
729 case JAILHOUSE_INFO_MEM_POOL_USED:
730 return mem_pool.used_pages;
731 case JAILHOUSE_INFO_REMAP_POOL_SIZE:
732 return remap_pool.pages;
733 case JAILHOUSE_INFO_REMAP_POOL_USED:
734 return remap_pool.used_pages;
735 case JAILHOUSE_INFO_NUM_CELLS:
742 static int cpu_get_info(struct per_cpu *cpu_data, unsigned long cpu_id,
745 if (!cpu_id_valid(cpu_id))
749 * We do not need explicit synchronization with cell_destroy because
750 * its cell_suspend(root_cell + this_cell) will not return before we
751 * left this hypercall.
753 if (cpu_data->cell != &root_cell &&
754 !cell_owns_cpu(cpu_data->cell, cpu_id))
757 if (type == JAILHOUSE_CPU_INFO_STATE) {
758 return per_cpu(cpu_id)->failed ? JAILHOUSE_CPU_FAILED :
759 JAILHOUSE_CPU_RUNNING;
760 } else if (type >= JAILHOUSE_CPU_INFO_STAT_BASE &&
761 type - JAILHOUSE_CPU_INFO_STAT_BASE < JAILHOUSE_NUM_CPU_STATS) {
762 type -= JAILHOUSE_CPU_INFO_STAT_BASE;
763 return per_cpu(cpu_id)->stats[type] & BIT_MASK(30, 0);
769 * Handle hypercall invoked by a cell.
770 * @param code Hypercall code.
771 * @param arg1 First hypercall argument.
772 * @param arg2 Seconds hypercall argument.
774 * @return Value that shall be passed to the caller of the hypercall on return.
776 * @note If @c arg1 and @c arg2 are valid depends on the hypercall code.
778 long hypercall(unsigned long code, unsigned long arg1, unsigned long arg2)
780 struct per_cpu *cpu_data = this_cpu_data();
782 cpu_data->stats[JAILHOUSE_CPU_STAT_VMEXITS_HYPERCALL]++;
785 case JAILHOUSE_HC_DISABLE:
786 return shutdown(cpu_data);
787 case JAILHOUSE_HC_CELL_CREATE:
788 return cell_create(cpu_data, arg1);
789 case JAILHOUSE_HC_CELL_START:
790 return cell_start(cpu_data, arg1);
791 case JAILHOUSE_HC_CELL_SET_LOADABLE:
792 return cell_set_loadable(cpu_data, arg1);
793 case JAILHOUSE_HC_CELL_DESTROY:
794 return cell_destroy(cpu_data, arg1);
795 case JAILHOUSE_HC_HYPERVISOR_GET_INFO:
796 return hypervisor_get_info(cpu_data, arg1);
797 case JAILHOUSE_HC_CELL_GET_STATE:
798 return cell_get_state(cpu_data, arg1);
799 case JAILHOUSE_HC_CPU_GET_INFO:
800 return cpu_get_info(cpu_data, arg1, arg2);
807 * Stops the current CPU on panic and prevents any execution on it until the
808 * system is rebooted.
810 * @note This service should be used when facing an unrecoverable error of the
815 void __attribute__((noreturn)) panic_stop(void)
817 struct cell *cell = this_cell();
819 panic_printk("Stopping CPU %d (Cell: \"%s\")\n", this_cpu_id(),
820 cell && cell->config ? cell->config->name : "<UNSET>");
822 if (phys_processor_id() == panic_cpu)
823 panic_in_progress = 0;
829 * Parks the current CPU on panic, allowing to restart it by resetting the
832 * @note This service should be used when facing an error of a cell CPU, e.g. a
833 * cell boundary violation.
837 void panic_park(void)
839 struct cell *cell = this_cell();
840 bool cell_failed = true;
843 panic_printk("Parking CPU %d (Cell: \"%s\")\n", this_cpu_id(),
846 this_cpu_data()->failed = true;
847 for_each_cpu(cpu, cell->cpu_set)
848 if (!per_cpu(cpu)->failed) {
853 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_FAILED;
857 if (phys_processor_id() == panic_cpu)
858 panic_in_progress = 0;