]> rtime.felk.cvut.cz Git - lisovros/qemu_apohw.git/blob - migration.c
target-i386: Call x86_cpu_load_def() earlier
[lisovros/qemu_apohw.git] / migration.c
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15
16 #include "qemu-common.h"
17 #include "qemu/main-loop.h"
18 #include "migration/migration.h"
19 #include "monitor/monitor.h"
20 #include "migration/qemu-file.h"
21 #include "sysemu/sysemu.h"
22 #include "block/block.h"
23 #include "qemu/sockets.h"
24 #include "migration/block.h"
25 #include "qemu/thread.h"
26 #include "qmp-commands.h"
27 #include "trace.h"
28
29 //#define DEBUG_MIGRATION
30
31 #ifdef DEBUG_MIGRATION
32 #define DPRINTF(fmt, ...) \
33     do { printf("migration: " fmt, ## __VA_ARGS__); } while (0)
34 #else
35 #define DPRINTF(fmt, ...) \
36     do { } while (0)
37 #endif
38
39 enum {
40     MIG_STATE_ERROR = -1,
41     MIG_STATE_NONE,
42     MIG_STATE_SETUP,
43     MIG_STATE_CANCELLING,
44     MIG_STATE_CANCELLED,
45     MIG_STATE_ACTIVE,
46     MIG_STATE_COMPLETED,
47 };
48
49 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
50
51 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
52  * data. */
53 #define BUFFER_DELAY     100
54 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
55
56 /* Migration XBZRLE default cache size */
57 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
58
59 static NotifierList migration_state_notifiers =
60     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
61
62 /* When we add fault tolerance, we could have several
63    migrations at once.  For now we don't need to add
64    dynamic creation of migration */
65
66 MigrationState *migrate_get_current(void)
67 {
68     static MigrationState current_migration = {
69         .state = MIG_STATE_NONE,
70         .bandwidth_limit = MAX_THROTTLE,
71         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
72         .mbps = -1,
73     };
74
75     return &current_migration;
76 }
77
78 void qemu_start_incoming_migration(const char *uri, Error **errp)
79 {
80     const char *p;
81
82     if (strstart(uri, "tcp:", &p))
83         tcp_start_incoming_migration(p, errp);
84 #ifdef CONFIG_RDMA
85     else if (strstart(uri, "rdma:", &p))
86         rdma_start_incoming_migration(p, errp);
87 #endif
88 #if !defined(WIN32)
89     else if (strstart(uri, "exec:", &p))
90         exec_start_incoming_migration(p, errp);
91     else if (strstart(uri, "unix:", &p))
92         unix_start_incoming_migration(p, errp);
93     else if (strstart(uri, "fd:", &p))
94         fd_start_incoming_migration(p, errp);
95 #endif
96     else {
97         error_setg(errp, "unknown migration protocol: %s", uri);
98     }
99 }
100
101 static void process_incoming_migration_co(void *opaque)
102 {
103     QEMUFile *f = opaque;
104     int ret;
105
106     ret = qemu_loadvm_state(f);
107     qemu_fclose(f);
108     free_xbzrle_decoded_buf();
109     if (ret < 0) {
110         fprintf(stderr, "load of migration failed\n");
111         exit(EXIT_FAILURE);
112     }
113     qemu_announce_self();
114     DPRINTF("successfully loaded vm state\n");
115
116     bdrv_clear_incoming_migration_all();
117     /* Make sure all file formats flush their mutable metadata */
118     bdrv_invalidate_cache_all();
119
120     if (autostart) {
121         vm_start();
122     } else {
123         runstate_set(RUN_STATE_PAUSED);
124     }
125 }
126
127 void process_incoming_migration(QEMUFile *f)
128 {
129     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
130     int fd = qemu_get_fd(f);
131
132     assert(fd != -1);
133     qemu_set_nonblock(fd);
134     qemu_coroutine_enter(co, f);
135 }
136
137 /* amount of nanoseconds we are willing to wait for migration to be down.
138  * the choice of nanoseconds is because it is the maximum resolution that
139  * get_clock() can achieve. It is an internal measure. All user-visible
140  * units must be in seconds */
141 static uint64_t max_downtime = 30000000;
142
143 uint64_t migrate_max_downtime(void)
144 {
145     return max_downtime;
146 }
147
148 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
149 {
150     MigrationCapabilityStatusList *head = NULL;
151     MigrationCapabilityStatusList *caps;
152     MigrationState *s = migrate_get_current();
153     int i;
154
155     caps = NULL; /* silence compiler warning */
156     for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
157         if (head == NULL) {
158             head = g_malloc0(sizeof(*caps));
159             caps = head;
160         } else {
161             caps->next = g_malloc0(sizeof(*caps));
162             caps = caps->next;
163         }
164         caps->value =
165             g_malloc(sizeof(*caps->value));
166         caps->value->capability = i;
167         caps->value->state = s->enabled_capabilities[i];
168     }
169
170     return head;
171 }
172
173 static void get_xbzrle_cache_stats(MigrationInfo *info)
174 {
175     if (migrate_use_xbzrle()) {
176         info->has_xbzrle_cache = true;
177         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
178         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
179         info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
180         info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
181         info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
182         info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
183     }
184 }
185
186 MigrationInfo *qmp_query_migrate(Error **errp)
187 {
188     MigrationInfo *info = g_malloc0(sizeof(*info));
189     MigrationState *s = migrate_get_current();
190
191     switch (s->state) {
192     case MIG_STATE_NONE:
193         /* no migration has happened ever */
194         break;
195     case MIG_STATE_SETUP:
196         info->has_status = true;
197         info->status = g_strdup("setup");
198         info->has_total_time = false;
199         break;
200     case MIG_STATE_ACTIVE:
201     case MIG_STATE_CANCELLING:
202         info->has_status = true;
203         info->status = g_strdup("active");
204         info->has_total_time = true;
205         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
206             - s->total_time;
207         info->has_expected_downtime = true;
208         info->expected_downtime = s->expected_downtime;
209         info->has_setup_time = true;
210         info->setup_time = s->setup_time;
211
212         info->has_ram = true;
213         info->ram = g_malloc0(sizeof(*info->ram));
214         info->ram->transferred = ram_bytes_transferred();
215         info->ram->remaining = ram_bytes_remaining();
216         info->ram->total = ram_bytes_total();
217         info->ram->duplicate = dup_mig_pages_transferred();
218         info->ram->skipped = skipped_mig_pages_transferred();
219         info->ram->normal = norm_mig_pages_transferred();
220         info->ram->normal_bytes = norm_mig_bytes_transferred();
221         info->ram->dirty_pages_rate = s->dirty_pages_rate;
222         info->ram->mbps = s->mbps;
223
224         if (blk_mig_active()) {
225             info->has_disk = true;
226             info->disk = g_malloc0(sizeof(*info->disk));
227             info->disk->transferred = blk_mig_bytes_transferred();
228             info->disk->remaining = blk_mig_bytes_remaining();
229             info->disk->total = blk_mig_bytes_total();
230         }
231
232         get_xbzrle_cache_stats(info);
233         break;
234     case MIG_STATE_COMPLETED:
235         get_xbzrle_cache_stats(info);
236
237         info->has_status = true;
238         info->status = g_strdup("completed");
239         info->has_total_time = true;
240         info->total_time = s->total_time;
241         info->has_downtime = true;
242         info->downtime = s->downtime;
243         info->has_setup_time = true;
244         info->setup_time = s->setup_time;
245
246         info->has_ram = true;
247         info->ram = g_malloc0(sizeof(*info->ram));
248         info->ram->transferred = ram_bytes_transferred();
249         info->ram->remaining = 0;
250         info->ram->total = ram_bytes_total();
251         info->ram->duplicate = dup_mig_pages_transferred();
252         info->ram->skipped = skipped_mig_pages_transferred();
253         info->ram->normal = norm_mig_pages_transferred();
254         info->ram->normal_bytes = norm_mig_bytes_transferred();
255         info->ram->mbps = s->mbps;
256         break;
257     case MIG_STATE_ERROR:
258         info->has_status = true;
259         info->status = g_strdup("failed");
260         break;
261     case MIG_STATE_CANCELLED:
262         info->has_status = true;
263         info->status = g_strdup("cancelled");
264         break;
265     }
266
267     return info;
268 }
269
270 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
271                                   Error **errp)
272 {
273     MigrationState *s = migrate_get_current();
274     MigrationCapabilityStatusList *cap;
275
276     if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
277         error_set(errp, QERR_MIGRATION_ACTIVE);
278         return;
279     }
280
281     for (cap = params; cap; cap = cap->next) {
282         s->enabled_capabilities[cap->value->capability] = cap->value->state;
283     }
284 }
285
286 /* shared migration helpers */
287
288 static void migrate_set_state(MigrationState *s, int old_state, int new_state)
289 {
290     if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
291         trace_migrate_set_state(new_state);
292     }
293 }
294
295 static void migrate_fd_cleanup(void *opaque)
296 {
297     MigrationState *s = opaque;
298
299     qemu_bh_delete(s->cleanup_bh);
300     s->cleanup_bh = NULL;
301
302     if (s->file) {
303         DPRINTF("closing file\n");
304         qemu_mutex_unlock_iothread();
305         qemu_thread_join(&s->thread);
306         qemu_mutex_lock_iothread();
307
308         qemu_fclose(s->file);
309         s->file = NULL;
310     }
311
312     assert(s->state != MIG_STATE_ACTIVE);
313
314     if (s->state != MIG_STATE_COMPLETED) {
315         qemu_savevm_state_cancel();
316         if (s->state == MIG_STATE_CANCELLING) {
317             migrate_set_state(s, MIG_STATE_CANCELLING, MIG_STATE_CANCELLED);
318         }
319     }
320
321     notifier_list_notify(&migration_state_notifiers, s);
322 }
323
324 void migrate_fd_error(MigrationState *s)
325 {
326     DPRINTF("setting error state\n");
327     assert(s->file == NULL);
328     s->state = MIG_STATE_ERROR;
329     trace_migrate_set_state(MIG_STATE_ERROR);
330     notifier_list_notify(&migration_state_notifiers, s);
331 }
332
333 static void migrate_fd_cancel(MigrationState *s)
334 {
335     int old_state ;
336     DPRINTF("cancelling migration\n");
337
338     do {
339         old_state = s->state;
340         if (old_state != MIG_STATE_SETUP && old_state != MIG_STATE_ACTIVE) {
341             break;
342         }
343         migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
344     } while (s->state != MIG_STATE_CANCELLING);
345 }
346
347 void add_migration_state_change_notifier(Notifier *notify)
348 {
349     notifier_list_add(&migration_state_notifiers, notify);
350 }
351
352 void remove_migration_state_change_notifier(Notifier *notify)
353 {
354     notifier_remove(notify);
355 }
356
357 bool migration_in_setup(MigrationState *s)
358 {
359     return s->state == MIG_STATE_SETUP;
360 }
361
362 bool migration_has_finished(MigrationState *s)
363 {
364     return s->state == MIG_STATE_COMPLETED;
365 }
366
367 bool migration_has_failed(MigrationState *s)
368 {
369     return (s->state == MIG_STATE_CANCELLED ||
370             s->state == MIG_STATE_ERROR);
371 }
372
373 static MigrationState *migrate_init(const MigrationParams *params)
374 {
375     MigrationState *s = migrate_get_current();
376     int64_t bandwidth_limit = s->bandwidth_limit;
377     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
378     int64_t xbzrle_cache_size = s->xbzrle_cache_size;
379
380     memcpy(enabled_capabilities, s->enabled_capabilities,
381            sizeof(enabled_capabilities));
382
383     memset(s, 0, sizeof(*s));
384     s->params = *params;
385     memcpy(s->enabled_capabilities, enabled_capabilities,
386            sizeof(enabled_capabilities));
387     s->xbzrle_cache_size = xbzrle_cache_size;
388
389     s->bandwidth_limit = bandwidth_limit;
390     s->state = MIG_STATE_SETUP;
391     trace_migrate_set_state(MIG_STATE_SETUP);
392
393     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
394     return s;
395 }
396
397 static GSList *migration_blockers;
398
399 void migrate_add_blocker(Error *reason)
400 {
401     migration_blockers = g_slist_prepend(migration_blockers, reason);
402 }
403
404 void migrate_del_blocker(Error *reason)
405 {
406     migration_blockers = g_slist_remove(migration_blockers, reason);
407 }
408
409 void qmp_migrate(const char *uri, bool has_blk, bool blk,
410                  bool has_inc, bool inc, bool has_detach, bool detach,
411                  Error **errp)
412 {
413     Error *local_err = NULL;
414     MigrationState *s = migrate_get_current();
415     MigrationParams params;
416     const char *p;
417
418     params.blk = has_blk && blk;
419     params.shared = has_inc && inc;
420
421     if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
422         s->state == MIG_STATE_CANCELLING) {
423         error_set(errp, QERR_MIGRATION_ACTIVE);
424         return;
425     }
426
427     if (qemu_savevm_state_blocked(errp)) {
428         return;
429     }
430
431     if (migration_blockers) {
432         *errp = error_copy(migration_blockers->data);
433         return;
434     }
435
436     s = migrate_init(&params);
437
438     if (strstart(uri, "tcp:", &p)) {
439         tcp_start_outgoing_migration(s, p, &local_err);
440 #ifdef CONFIG_RDMA
441     } else if (strstart(uri, "rdma:", &p)) {
442         rdma_start_outgoing_migration(s, p, &local_err);
443 #endif
444 #if !defined(WIN32)
445     } else if (strstart(uri, "exec:", &p)) {
446         exec_start_outgoing_migration(s, p, &local_err);
447     } else if (strstart(uri, "unix:", &p)) {
448         unix_start_outgoing_migration(s, p, &local_err);
449     } else if (strstart(uri, "fd:", &p)) {
450         fd_start_outgoing_migration(s, p, &local_err);
451 #endif
452     } else {
453         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol");
454         s->state = MIG_STATE_ERROR;
455         return;
456     }
457
458     if (local_err) {
459         migrate_fd_error(s);
460         error_propagate(errp, local_err);
461         return;
462     }
463 }
464
465 void qmp_migrate_cancel(Error **errp)
466 {
467     migrate_fd_cancel(migrate_get_current());
468 }
469
470 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
471 {
472     MigrationState *s = migrate_get_current();
473     int64_t new_size;
474
475     /* Check for truncation */
476     if (value != (size_t)value) {
477         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
478                   "exceeding address space");
479         return;
480     }
481
482     /* Cache should not be larger than guest ram size */
483     if (value > ram_bytes_total()) {
484         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
485                   "exceeds guest ram size ");
486         return;
487     }
488
489     new_size = xbzrle_cache_resize(value);
490     if (new_size < 0) {
491         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
492                   "is smaller than page size");
493         return;
494     }
495
496     s->xbzrle_cache_size = new_size;
497 }
498
499 int64_t qmp_query_migrate_cache_size(Error **errp)
500 {
501     return migrate_xbzrle_cache_size();
502 }
503
504 void qmp_migrate_set_speed(int64_t value, Error **errp)
505 {
506     MigrationState *s;
507
508     if (value < 0) {
509         value = 0;
510     }
511     if (value > SIZE_MAX) {
512         value = SIZE_MAX;
513     }
514
515     s = migrate_get_current();
516     s->bandwidth_limit = value;
517     if (s->file) {
518         qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
519     }
520 }
521
522 void qmp_migrate_set_downtime(double value, Error **errp)
523 {
524     value *= 1e9;
525     value = MAX(0, MIN(UINT64_MAX, value));
526     max_downtime = (uint64_t)value;
527 }
528
529 bool migrate_rdma_pin_all(void)
530 {
531     MigrationState *s;
532
533     s = migrate_get_current();
534
535     return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
536 }
537
538 bool migrate_auto_converge(void)
539 {
540     MigrationState *s;
541
542     s = migrate_get_current();
543
544     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
545 }
546
547 bool migrate_zero_blocks(void)
548 {
549     MigrationState *s;
550
551     s = migrate_get_current();
552
553     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
554 }
555
556 int migrate_use_xbzrle(void)
557 {
558     MigrationState *s;
559
560     s = migrate_get_current();
561
562     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
563 }
564
565 int64_t migrate_xbzrle_cache_size(void)
566 {
567     MigrationState *s;
568
569     s = migrate_get_current();
570
571     return s->xbzrle_cache_size;
572 }
573
574 /* migration thread support */
575
576 static void *migration_thread(void *opaque)
577 {
578     MigrationState *s = opaque;
579     int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
580     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
581     int64_t initial_bytes = 0;
582     int64_t max_size = 0;
583     int64_t start_time = initial_time;
584     bool old_vm_running = false;
585
586     DPRINTF("beginning savevm\n");
587     qemu_savevm_state_begin(s->file, &s->params);
588
589     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
590     migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE);
591
592     DPRINTF("setup complete\n");
593
594     while (s->state == MIG_STATE_ACTIVE) {
595         int64_t current_time;
596         uint64_t pending_size;
597
598         if (!qemu_file_rate_limit(s->file)) {
599             DPRINTF("iterate\n");
600             pending_size = qemu_savevm_state_pending(s->file, max_size);
601             DPRINTF("pending size %" PRIu64 " max %" PRIu64 "\n",
602                     pending_size, max_size);
603             if (pending_size && pending_size >= max_size) {
604                 qemu_savevm_state_iterate(s->file);
605             } else {
606                 int ret;
607
608                 DPRINTF("done iterating\n");
609                 qemu_mutex_lock_iothread();
610                 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
611                 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
612                 old_vm_running = runstate_is_running();
613
614                 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
615                 if (ret >= 0) {
616                     qemu_file_set_rate_limit(s->file, INT64_MAX);
617                     qemu_savevm_state_complete(s->file);
618                 }
619                 qemu_mutex_unlock_iothread();
620
621                 if (ret < 0) {
622                     migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
623                     break;
624                 }
625
626                 if (!qemu_file_get_error(s->file)) {
627                     migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
628                     break;
629                 }
630             }
631         }
632
633         if (qemu_file_get_error(s->file)) {
634             migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
635             break;
636         }
637         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
638         if (current_time >= initial_time + BUFFER_DELAY) {
639             uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
640             uint64_t time_spent = current_time - initial_time;
641             double bandwidth = transferred_bytes / time_spent;
642             max_size = bandwidth * migrate_max_downtime() / 1000000;
643
644             s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
645                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;
646
647             DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
648                     " bandwidth %g max_size %" PRId64 "\n",
649                     transferred_bytes, time_spent, bandwidth, max_size);
650             /* if we haven't sent anything, we don't want to recalculate
651                10000 is a small enough number for our purposes */
652             if (s->dirty_bytes_rate && transferred_bytes > 10000) {
653                 s->expected_downtime = s->dirty_bytes_rate / bandwidth;
654             }
655
656             qemu_file_reset_rate_limit(s->file);
657             initial_time = current_time;
658             initial_bytes = qemu_ftell(s->file);
659         }
660         if (qemu_file_rate_limit(s->file)) {
661             /* usleep expects microseconds */
662             g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
663         }
664     }
665
666     qemu_mutex_lock_iothread();
667     if (s->state == MIG_STATE_COMPLETED) {
668         int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
669         s->total_time = end_time - s->total_time;
670         s->downtime = end_time - start_time;
671         runstate_set(RUN_STATE_POSTMIGRATE);
672     } else {
673         if (old_vm_running) {
674             vm_start();
675         }
676     }
677     qemu_bh_schedule(s->cleanup_bh);
678     qemu_mutex_unlock_iothread();
679
680     return NULL;
681 }
682
683 void migrate_fd_connect(MigrationState *s)
684 {
685     s->state = MIG_STATE_SETUP;
686     trace_migrate_set_state(MIG_STATE_SETUP);
687
688     /* This is a best 1st approximation. ns to ms */
689     s->expected_downtime = max_downtime/1000000;
690     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
691
692     qemu_file_set_rate_limit(s->file,
693                              s->bandwidth_limit / XFER_LIMIT_RATIO);
694
695     /* Notify before starting migration thread */
696     notifier_list_notify(&migration_state_notifiers, s);
697
698     qemu_thread_create(&s->thread, "migration", migration_thread, s,
699                        QEMU_THREAD_JOINABLE);
700 }