]> rtime.felk.cvut.cz Git - jailhouse.git/blob - hypervisor/control.c
core: Add reconfiguration completion notification
[jailhouse.git] / hypervisor / control.c
1 /*
2  * Jailhouse, a Linux-based partitioning hypervisor
3  *
4  * Copyright (c) Siemens AG, 2013
5  *
6  * Authors:
7  *  Jan Kiszka <jan.kiszka@siemens.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12
13 #include <jailhouse/entry.h>
14 #include <jailhouse/control.h>
15 #include <jailhouse/printk.h>
16 #include <jailhouse/paging.h>
17 #include <jailhouse/processor.h>
18 #include <jailhouse/string.h>
19 #include <asm/bitops.h>
20 #include <asm/spinlock.h>
21
22 enum msg_type {MSG_REQUEST, MSG_INFORMATION};
23 enum management_task {CELL_START, CELL_DESTROY};
24
25 struct jailhouse_system *system_config;
26
27 static DEFINE_SPINLOCK(shutdown_lock);
28 static unsigned int num_cells = 1;
29
30 #define for_each_cell(c)        for ((c) = &root_cell; (c); (c) = (c)->next)
31 #define for_each_non_root_cell(c) \
32         for ((c) = root_cell.next; (c); (c) = (c)->next)
33
34 unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set, int exception)
35 {
36         do
37                 cpu++;
38         while (cpu <= cpu_set->max_cpu_id &&
39                (cpu == exception || !test_bit(cpu, cpu_set->bitmap)));
40         return cpu;
41 }
42
43 bool cpu_id_valid(unsigned long cpu_id)
44 {
45         const unsigned long *system_cpu_set =
46                 jailhouse_cell_cpu_set(&system_config->system);
47
48         return (cpu_id < system_config->system.cpu_set_size * 8 &&
49                 test_bit(cpu_id, system_cpu_set));
50 }
51
52 static void cell_suspend(struct cell *cell, struct per_cpu *cpu_data)
53 {
54         unsigned int cpu;
55
56         for_each_cpu_except(cpu, cell->cpu_set, cpu_data->cpu_id)
57                 arch_suspend_cpu(cpu);
58 }
59
60 static void cell_resume(struct per_cpu *cpu_data)
61 {
62         unsigned int cpu;
63
64         for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
65                 arch_resume_cpu(cpu);
66 }
67
68 /**
69  * cell_send_message - Deliver a message to cell and wait for the reply
70  * @cell: target cell
71  * @message: message code to be sent (JAILHOUSE_MSG_*)
72  * @type: message type, defines the valid replies
73  *
74  * Returns true if a request message was approved or reception of an
75  * information message was acknowledged by the target cell. It also return true
76  * of the target cell does not support a communication region, is shut down or
77  * in failed state. Return false on request denial or invalid replies.
78  */
79 static bool cell_send_message(struct cell *cell, u32 message,
80                               enum msg_type type)
81 {
82         if (cell->config->flags & JAILHOUSE_CELL_PASSIVE_COMMREG)
83                 return true;
84
85         jailhouse_send_msg_to_cell(&cell->comm_page.comm_region, message);
86
87         while (1) {
88                 u32 reply = cell->comm_page.comm_region.reply_from_cell;
89                 u32 cell_state = cell->comm_page.comm_region.cell_state;
90
91                 if (cell_state == JAILHOUSE_CELL_SHUT_DOWN ||
92                     cell_state == JAILHOUSE_CELL_FAILED)
93                         return true;
94
95                 if ((type == MSG_REQUEST &&
96                      reply == JAILHOUSE_MSG_REQUEST_APPROVED) ||
97                     (type == MSG_INFORMATION &&
98                      reply == JAILHOUSE_MSG_RECEIVED))
99                         return true;
100
101                 if (reply != JAILHOUSE_MSG_NONE)
102                         return false;
103
104                 cpu_relax();
105         }
106 }
107
108 static bool cell_reconfig_ok(struct cell *excluded_cell)
109 {
110         struct cell *cell;
111
112         for_each_non_root_cell(cell)
113                 if (cell != excluded_cell &&
114                     cell->comm_page.comm_region.cell_state ==
115                                 JAILHOUSE_CELL_RUNNING_LOCKED)
116                         return false;
117         return true;
118 }
119
120 static void cell_reconfig_completed(void)
121 {
122         struct cell *cell;
123
124         for_each_non_root_cell(cell)
125                 cell_send_message(cell, JAILHOUSE_MSG_RECONFIG_COMPLETED,
126                                   MSG_INFORMATION);
127 }
128
129 static unsigned int get_free_cell_id(void)
130 {
131         unsigned int id = 0;
132         struct cell *cell;
133
134 retry:
135         for_each_cell(cell)
136                 if (cell->id == id) {
137                         id++;
138                         goto retry;
139                 }
140
141         return id;
142 }
143
144 /* cell must be zero-initialized */
145 int cell_init(struct cell *cell, bool copy_cpu_set)
146 {
147         const unsigned long *config_cpu_set =
148                 jailhouse_cell_cpu_set(cell->config);
149         unsigned long cpu_set_size = cell->config->cpu_set_size;
150         struct cpu_set *cpu_set;
151
152         cell->id = get_free_cell_id();
153
154         if (cpu_set_size > PAGE_SIZE)
155                 return -EINVAL;
156         else if (cpu_set_size > sizeof(cell->small_cpu_set.bitmap)) {
157                 cpu_set = page_alloc(&mem_pool, 1);
158                 if (!cpu_set)
159                         return -ENOMEM;
160                 cpu_set->max_cpu_id =
161                         ((PAGE_SIZE - sizeof(unsigned long)) * 8) - 1;
162         } else {
163                 cpu_set = &cell->small_cpu_set;
164                 cpu_set->max_cpu_id =
165                         (sizeof(cell->small_cpu_set.bitmap) * 8) - 1;
166         }
167         cell->cpu_set = cpu_set;
168         if (copy_cpu_set)
169                 memcpy(cell->cpu_set->bitmap, config_cpu_set, cpu_set_size);
170
171         return 0;
172 }
173
174 static void destroy_cpu_set(struct cell *cell)
175 {
176         if (cell->cpu_set != &cell->small_cpu_set)
177                 page_free(&mem_pool, cell->cpu_set, 1);
178 }
179
180 int check_mem_regions(const struct jailhouse_cell_desc *config)
181 {
182         const struct jailhouse_memory *mem =
183                 jailhouse_cell_mem_regions(config);
184         unsigned int n;
185
186         for (n = 0; n < config->num_memory_regions; n++, mem++) {
187                 if (mem->phys_start & ~PAGE_MASK ||
188                     mem->virt_start & ~PAGE_MASK ||
189                     mem->size & ~PAGE_MASK ||
190                     mem->flags & ~JAILHOUSE_MEM_VALID_FLAGS) {
191                         printk("FATAL: Invalid memory bar (%p, %p, %p, %x)\n",
192                                mem->phys_start, mem->virt_start, mem->size,
193                                mem->flags);
194                         return -EINVAL;
195                 }
196         }
197         return 0;
198 }
199
200 static bool address_in_region(unsigned long addr,
201                               const struct jailhouse_memory *region)
202 {
203         return addr >= region->phys_start &&
204                addr < (region->phys_start + region->size);
205 }
206
207 static void remap_to_root_cell(const struct jailhouse_memory *mem)
208 {
209         const struct jailhouse_memory *root_mem =
210                 jailhouse_cell_mem_regions(root_cell.config);
211         struct jailhouse_memory overlap;
212         unsigned int n;
213
214         for (n = 0; n < root_cell.config->num_memory_regions;
215              n++, root_mem++) {
216                 if (address_in_region(mem->phys_start, root_mem)) {
217                         overlap.phys_start = mem->phys_start;
218                         overlap.size = root_mem->size -
219                                 (overlap.phys_start - root_mem->phys_start);
220                         if (overlap.size > mem->size)
221                                 overlap.size = mem->size;
222                 } else if (address_in_region(root_mem->phys_start, mem)) {
223                         overlap.phys_start = root_mem->phys_start;
224                         overlap.size = mem->size -
225                                 (overlap.phys_start - mem->phys_start);
226                         if (overlap.size > root_mem->size)
227                                 overlap.size = root_mem->size;
228                 } else
229                         continue;
230
231                 overlap.virt_start = root_mem->virt_start +
232                         overlap.phys_start - root_mem->phys_start;
233                 overlap.flags = root_mem->flags;
234
235                 if (arch_map_memory_region(&root_cell, &overlap) != 0)
236                         printk("WARNING: Failed to re-assign memory region "
237                                "to root cell\n");
238         }
239 }
240
241 static int cell_create(struct per_cpu *cpu_data, unsigned long config_address)
242 {
243         unsigned long mapping_addr = TEMPORARY_MAPPING_CPU_BASE(cpu_data);
244         unsigned long cfg_page_offs = config_address & ~PAGE_MASK;
245         unsigned long cfg_header_size, cfg_total_size;
246         const struct jailhouse_memory *mem;
247         struct jailhouse_cell_desc *cfg;
248         unsigned int cell_pages, cpu, n;
249         struct cpu_set *shrinking_set;
250         struct jailhouse_memory tmp;
251         struct cell *cell, *last;
252         int err;
253
254         /* We do not support creation over non-root cells. */
255         if (cpu_data->cell != &root_cell)
256                 return -EPERM;
257
258         cell_suspend(&root_cell, cpu_data);
259
260         if (!cell_reconfig_ok(NULL)) {
261                 err = -EPERM;
262                 goto err_resume;
263         }
264
265         cfg_header_size = (config_address & ~PAGE_MASK) +
266                 sizeof(struct jailhouse_cell_desc);
267
268         err = page_map_create(&hv_paging_structs, config_address & PAGE_MASK,
269                               cfg_header_size, mapping_addr,
270                               PAGE_READONLY_FLAGS, PAGE_MAP_NON_COHERENT);
271         if (err)
272                 goto err_resume;
273
274         cfg = (struct jailhouse_cell_desc *)(mapping_addr + cfg_page_offs);
275         cfg_total_size = jailhouse_cell_config_size(cfg);
276         if (cfg_total_size + cfg_page_offs > NUM_TEMPORARY_PAGES * PAGE_SIZE) {
277                 err = -E2BIG;
278                 goto err_resume;
279         }
280
281         for_each_cell(cell)
282                 if (strcmp(cell->config->name, cfg->name) == 0) {
283                         err = -EEXIST;
284                         goto err_resume;
285                 }
286
287         err = page_map_create(&hv_paging_structs, config_address & PAGE_MASK,
288                               cfg_total_size + cfg_page_offs, mapping_addr,
289                               PAGE_READONLY_FLAGS, PAGE_MAP_NON_COHERENT);
290         if (err)
291                 goto err_resume;
292
293         err = check_mem_regions(cfg);
294         if (err)
295                 goto err_resume;
296
297         cell_pages = PAGE_ALIGN(sizeof(*cell) + cfg_total_size) / PAGE_SIZE;
298         cell = page_alloc(&mem_pool, cell_pages);
299         if (!cell) {
300                 err = -ENOMEM;
301                 goto err_resume;
302         }
303
304         cell->data_pages = cell_pages;
305         cell->config = ((void *)cell) + sizeof(*cell);
306         memcpy(cell->config, cfg, cfg_total_size);
307
308         err = cell_init(cell, true);
309         if (err)
310                 goto err_free_cell;
311
312         /* don't assign the CPU we are currently running on */
313         if (cpu_data->cpu_id <= cell->cpu_set->max_cpu_id &&
314             test_bit(cpu_data->cpu_id, cell->cpu_set->bitmap)) {
315                 err = -EBUSY;
316                 goto err_free_cpu_set;
317         }
318
319         shrinking_set = cpu_data->cell->cpu_set;
320
321         /* shrinking set must be super-set of new cell's cpu set */
322         if (shrinking_set->max_cpu_id < cell->cpu_set->max_cpu_id) {
323                 err = -EBUSY;
324                 goto err_free_cpu_set;
325         }
326         for_each_cpu(cpu, cell->cpu_set)
327                 if (!test_bit(cpu, shrinking_set->bitmap)) {
328                         err = -EBUSY;
329                         goto err_free_cpu_set;
330                 }
331
332         for_each_cpu(cpu, cell->cpu_set)
333                 clear_bit(cpu, shrinking_set->bitmap);
334
335         /* unmap the new cell's memory regions from the root cell */
336         mem = jailhouse_cell_mem_regions(cell->config);
337         for (n = 0; n < cell->config->num_memory_regions; n++, mem++)
338                 /*
339                  * Exceptions:
340                  *  - the communication region is not backed by root memory
341                  */
342                 if (!(mem->flags & JAILHOUSE_MEM_COMM_REGION)) {
343                         /*
344                          * arch_unmap_memory_region uses the virtual address of
345                          * the memory region. As only the root cell has a
346                          * guaranteed 1:1 mapping, make a copy where we ensure
347                          * this.
348                          */
349                         tmp = *mem;
350                         tmp.virt_start = tmp.phys_start;
351                         err = arch_unmap_memory_region(&root_cell, &tmp);
352                         if (err)
353                                 goto err_restore_root;
354                 }
355
356         err = arch_cell_create(cpu_data, cell);
357         if (err)
358                 goto err_restore_root;
359
360         cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_SHUT_DOWN;
361
362         last = &root_cell;
363         while (last->next)
364                 last = last->next;
365         last->next = cell;
366         num_cells++;
367
368         for_each_cpu(cpu, cell->cpu_set) {
369                 per_cpu(cpu)->cell = cell;
370                 arch_park_cpu(cpu);
371         }
372
373         cell_reconfig_completed();
374
375         printk("Created cell \"%s\"\n", cell->config->name);
376
377         page_map_dump_stats("after cell creation");
378
379         cell_resume(cpu_data);
380
381         return cell->id;
382
383 err_restore_root:
384         mem = jailhouse_cell_mem_regions(cell->config);
385         for (n = 0; n < cell->config->num_memory_regions; n++, mem++)
386                 remap_to_root_cell(mem);
387         for_each_cpu(cpu, cell->cpu_set)
388                 set_bit(cpu, shrinking_set->bitmap);
389 err_free_cpu_set:
390         destroy_cpu_set(cell);
391 err_free_cell:
392         page_free(&mem_pool, cell, cell_pages);
393
394 err_resume:
395         cell_resume(cpu_data);
396
397         return err;
398 }
399
400 static bool cell_shutdown_ok(struct cell *cell)
401 {
402         return cell_send_message(cell, JAILHOUSE_MSG_SHUTDOWN_REQUEST,
403                                  MSG_REQUEST);
404 }
405
406 static int cell_management_prologue(enum management_task task,
407                                     struct per_cpu *cpu_data, unsigned long id,
408                                     struct cell **cell_ptr)
409 {
410         /* We do not support management commands over non-root cells. */
411         if (cpu_data->cell != &root_cell)
412                 return -EPERM;
413
414         cell_suspend(&root_cell, cpu_data);
415
416         for_each_cell(*cell_ptr)
417                 if ((*cell_ptr)->id == id)
418                         break;
419
420         if (!*cell_ptr) {
421                 cell_resume(cpu_data);
422                 return -ENOENT;
423         }
424
425         /* root cell cannot be managed */
426         if (*cell_ptr == &root_cell) {
427                 cell_resume(cpu_data);
428                 return -EINVAL;
429         }
430
431         if ((task == CELL_DESTROY && !cell_reconfig_ok(*cell_ptr)) ||
432             !cell_shutdown_ok(*cell_ptr)) {
433                 cell_resume(cpu_data);
434                 return -EPERM;
435         }
436
437         cell_suspend(*cell_ptr, cpu_data);
438
439         return 0;
440 }
441
442 static int cell_start(struct per_cpu *cpu_data, unsigned long id)
443 {
444         struct cell *cell;
445         unsigned int cpu;
446         int err;
447
448         err = cell_management_prologue(CELL_START, cpu_data, id, &cell);
449         if (err)
450                 return err;
451
452         /* present a consistent Communication Region state to the cell */
453         cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_RUNNING;
454         cell->comm_page.comm_region.msg_to_cell = JAILHOUSE_MSG_NONE;
455
456         for_each_cpu(cpu, cell->cpu_set) {
457                 per_cpu(cpu)->failed = false;
458                 arch_reset_cpu(cpu);
459         }
460
461         printk("Started cell \"%s\"\n", cell->config->name);
462
463         cell_resume(cpu_data);
464
465         return 0;
466 }
467
468 static int cell_destroy(struct per_cpu *cpu_data, unsigned long id)
469 {
470         const struct jailhouse_memory *mem;
471         struct cell *cell, *previous;
472         unsigned int cpu, n;
473         int err;
474
475         err = cell_management_prologue(CELL_DESTROY, cpu_data, id, &cell);
476         if (err)
477                 return err;
478
479         printk("Closing cell \"%s\"\n", cell->config->name);
480
481         for_each_cpu(cpu, cell->cpu_set) {
482                 arch_park_cpu(cpu);
483
484                 set_bit(cpu, root_cell.cpu_set->bitmap);
485                 per_cpu(cpu)->cell = &root_cell;
486                 per_cpu(cpu)->failed = false;
487         }
488
489         mem = jailhouse_cell_mem_regions(cell->config);
490         for (n = 0; n < cell->config->num_memory_regions; n++, mem++) {
491                 /*
492                  * This cannot fail. The region was mapped as a whole before,
493                  * thus no hugepages need to be broken up to unmap it.
494                  */
495                 arch_unmap_memory_region(cell, mem);
496                 if (!(mem->flags & JAILHOUSE_MEM_COMM_REGION))
497                         remap_to_root_cell(mem);
498         }
499
500         arch_cell_destroy(cpu_data, cell);
501
502         previous = &root_cell;
503         while (previous->next != cell)
504                 previous = previous->next;
505         previous->next = cell->next;
506         num_cells--;
507
508         page_free(&mem_pool, cell, cell->data_pages);
509         page_map_dump_stats("after cell destruction");
510
511         cell_reconfig_completed();
512
513         cell_resume(cpu_data);
514
515         return 0;
516 }
517
518 static int cell_get_state(struct per_cpu *cpu_data, unsigned long id)
519 {
520         struct cell *cell;
521
522         if (cpu_data->cell != &root_cell)
523                 return -EPERM;
524
525         /*
526          * We do not need explicit synchronization with cell_create/destroy
527          * because their cell_suspend(root_cell) will not return before we left
528          * this hypercall.
529          */
530         for_each_cell(cell)
531                 if (cell->id == id) {
532                         u32 state = cell->comm_page.comm_region.cell_state;
533
534                         switch (state) {
535                         case JAILHOUSE_CELL_RUNNING:
536                         case JAILHOUSE_CELL_RUNNING_LOCKED:
537                         case JAILHOUSE_CELL_SHUT_DOWN:
538                         case JAILHOUSE_CELL_FAILED:
539                                 return state;
540                         default:
541                                 return -EINVAL;
542                         }
543                 }
544         return -ENOENT;
545 }
546
547 static int shutdown(struct per_cpu *cpu_data)
548 {
549         unsigned int this_cpu = cpu_data->cpu_id;
550         struct cell *cell;
551         unsigned int cpu;
552         int state, ret;
553
554         /* We do not support shutdown over non-root cells. */
555         if (cpu_data->cell != &root_cell)
556                 return -EPERM;
557
558         spin_lock(&shutdown_lock);
559
560         if (cpu_data->shutdown_state == SHUTDOWN_NONE) {
561                 state = SHUTDOWN_STARTED;
562                 for_each_non_root_cell(cell)
563                         if (!cell_shutdown_ok(cell))
564                                 state = -EPERM;
565
566                 if (state == SHUTDOWN_STARTED) {
567                         printk("Shutting down hypervisor\n");
568
569                         for_each_non_root_cell(cell) {
570                                 cell_suspend(cell, cpu_data);
571
572                                 printk("Closing cell \"%s\"\n",
573                                        cell->config->name);
574
575                                 for_each_cpu(cpu, cell->cpu_set) {
576                                         printk(" Releasing CPU %d\n", cpu);
577                                         arch_shutdown_cpu(cpu);
578                                 }
579                         }
580
581                         printk("Closing root cell \"%s\"\n",
582                                root_cell.config->name);
583                         arch_shutdown();
584                 }
585
586                 for_each_cpu(cpu, root_cell.cpu_set)
587                         per_cpu(cpu)->shutdown_state = state;
588         }
589
590         if (cpu_data->shutdown_state == SHUTDOWN_STARTED) {
591                 printk(" Releasing CPU %d\n", this_cpu);
592                 ret = 0;
593         } else
594                 ret = cpu_data->shutdown_state;
595         cpu_data->shutdown_state = SHUTDOWN_NONE;
596
597         spin_unlock(&shutdown_lock);
598
599         return ret;
600 }
601
602 static long hypervisor_get_info(struct per_cpu *cpu_data, unsigned long type)
603 {
604         switch (type) {
605         case JAILHOUSE_INFO_MEM_POOL_SIZE:
606                 return mem_pool.pages;
607         case JAILHOUSE_INFO_MEM_POOL_USED:
608                 return mem_pool.used_pages;
609         case JAILHOUSE_INFO_REMAP_POOL_SIZE:
610                 return remap_pool.pages;
611         case JAILHOUSE_INFO_REMAP_POOL_USED:
612                 return remap_pool.used_pages;
613         case JAILHOUSE_INFO_NUM_CELLS:
614                 return num_cells;
615         default:
616                 return -EINVAL;
617         }
618 }
619
620 static int cpu_get_state(struct per_cpu *cpu_data, unsigned long cpu_id)
621 {
622         if (!cpu_id_valid(cpu_id))
623                 return -EINVAL;
624
625         /*
626          * We do not need explicit synchronization with cell_destroy because
627          * its cell_suspend(root_cell + this_cell) will not return before we
628          * left this hypercall.
629          */
630         if (cpu_data->cell != &root_cell &&
631             (cpu_id > cpu_data->cell->cpu_set->max_cpu_id ||
632              !test_bit(cpu_id, cpu_data->cell->cpu_set->bitmap)))
633                 return -EPERM;
634
635         return per_cpu(cpu_id)->failed ? JAILHOUSE_CPU_FAILED :
636                 JAILHOUSE_CPU_RUNNING;
637 }
638
639 long hypercall(struct per_cpu *cpu_data, unsigned long code, unsigned long arg)
640 {
641         switch (code) {
642         case JAILHOUSE_HC_DISABLE:
643                 return shutdown(cpu_data);
644         case JAILHOUSE_HC_CELL_CREATE:
645                 return cell_create(cpu_data, arg);
646         case JAILHOUSE_HC_CELL_START:
647                 return cell_start(cpu_data, arg);
648         case JAILHOUSE_HC_CELL_DESTROY:
649                 return cell_destroy(cpu_data, arg);
650         case JAILHOUSE_HC_HYPERVISOR_GET_INFO:
651                 return hypervisor_get_info(cpu_data, arg);
652         case JAILHOUSE_HC_CELL_GET_STATE:
653                 return cell_get_state(cpu_data, arg);
654         case JAILHOUSE_HC_CPU_GET_STATE:
655                 return cpu_get_state(cpu_data, arg);
656         default:
657                 return -ENOSYS;
658         }
659 }
660
661 void panic_stop(struct per_cpu *cpu_data)
662 {
663         panic_printk("Stopping CPU");
664         if (cpu_data) {
665                 panic_printk(" %d", cpu_data->cpu_id);
666                 cpu_data->cpu_stopped = true;
667         }
668         panic_printk("\n");
669
670         if (phys_processor_id() == panic_cpu)
671                 panic_in_progress = 0;
672
673         arch_panic_stop(cpu_data);
674 }
675
676 void panic_halt(struct per_cpu *cpu_data)
677 {
678         struct cell *cell = cpu_data->cell;
679         bool cell_failed = true;
680         unsigned int cpu;
681
682         panic_printk("Parking CPU %d\n", cpu_data->cpu_id);
683
684         cpu_data->failed = true;
685         for_each_cpu(cpu, cell->cpu_set)
686                 if (!per_cpu(cpu)->failed) {
687                         cell_failed = false;
688                         break;
689                 }
690         if (cell_failed)
691                 cell->comm_page.comm_region.cell_state = JAILHOUSE_CELL_FAILED;
692
693         arch_panic_halt(cpu_data);
694
695         if (phys_processor_id() == panic_cpu)
696                 panic_in_progress = 0;
697 }