]> rtime.felk.cvut.cz Git - lisovros/qemu_apohw.git/blob - block-migration.c
AioContext: do not rely on aio_poll(ctx, true) result to end a loop
[lisovros/qemu_apohw.git] / block-migration.c
1 /*
2  * QEMU live block migration
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Liran Schour   <lirans@il.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15
16 #include "qemu-common.h"
17 #include "block/block_int.h"
18 #include "hw/hw.h"
19 #include "qemu/queue.h"
20 #include "qemu/timer.h"
21 #include "migration/block.h"
22 #include "migration/migration.h"
23 #include "sysemu/blockdev.h"
24 #include <assert.h>
25
26 #define BLOCK_SIZE                       (1 << 20)
27 #define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
28
29 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
30 #define BLK_MIG_FLAG_EOS                0x02
31 #define BLK_MIG_FLAG_PROGRESS           0x04
32 #define BLK_MIG_FLAG_ZERO_BLOCK         0x08
33
34 #define MAX_IS_ALLOCATED_SEARCH 65536
35
36 //#define DEBUG_BLK_MIGRATION
37
38 #ifdef DEBUG_BLK_MIGRATION
39 #define DPRINTF(fmt, ...) \
40     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
41 #else
42 #define DPRINTF(fmt, ...) \
43     do { } while (0)
44 #endif
45
46 typedef struct BlkMigDevState {
47     /* Written during setup phase.  Can be read without a lock.  */
48     BlockDriverState *bs;
49     int shared_base;
50     int64_t total_sectors;
51     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52
53     /* Only used by migration thread.  Does not need a lock.  */
54     int bulk_completed;
55     int64_t cur_sector;
56     int64_t cur_dirty;
57
58     /* Protected by block migration lock.  */
59     unsigned long *aio_bitmap;
60     int64_t completed_sectors;
61     BdrvDirtyBitmap *dirty_bitmap;
62     Error *blocker;
63 } BlkMigDevState;
64
65 typedef struct BlkMigBlock {
66     /* Only used by migration thread.  */
67     uint8_t *buf;
68     BlkMigDevState *bmds;
69     int64_t sector;
70     int nr_sectors;
71     struct iovec iov;
72     QEMUIOVector qiov;
73     BlockDriverAIOCB *aiocb;
74
75     /* Protected by block migration lock.  */
76     int ret;
77     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
78 } BlkMigBlock;
79
80 typedef struct BlkMigState {
81     /* Written during setup phase.  Can be read without a lock.  */
82     int blk_enable;
83     int shared_base;
84     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
85     int64_t total_sector_sum;
86     bool zero_blocks;
87
88     /* Protected by lock.  */
89     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
90     int submitted;
91     int read_done;
92
93     /* Only used by migration thread.  Does not need a lock.  */
94     int transferred;
95     int prev_progress;
96     int bulk_completed;
97
98     /* Lock must be taken _inside_ the iothread lock.  */
99     QemuMutex lock;
100 } BlkMigState;
101
102 static BlkMigState block_mig_state;
103
104 static void blk_mig_lock(void)
105 {
106     qemu_mutex_lock(&block_mig_state.lock);
107 }
108
109 static void blk_mig_unlock(void)
110 {
111     qemu_mutex_unlock(&block_mig_state.lock);
112 }
113
114 /* Must run outside of the iothread lock during the bulk phase,
115  * or the VM will stall.
116  */
117
118 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
119 {
120     int len;
121     uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
122
123     if (block_mig_state.zero_blocks &&
124         buffer_is_zero(blk->buf, BLOCK_SIZE)) {
125         flags |= BLK_MIG_FLAG_ZERO_BLOCK;
126     }
127
128     /* sector number and flags */
129     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
130                      | flags);
131
132     /* device name */
133     len = strlen(blk->bmds->bs->device_name);
134     qemu_put_byte(f, len);
135     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
136
137     /* if a block is zero we need to flush here since the network
138      * bandwidth is now a lot higher than the storage device bandwidth.
139      * thus if we queue zero blocks we slow down the migration */
140     if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
141         qemu_fflush(f);
142         return;
143     }
144
145     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
146 }
147
148 int blk_mig_active(void)
149 {
150     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
151 }
152
153 uint64_t blk_mig_bytes_transferred(void)
154 {
155     BlkMigDevState *bmds;
156     uint64_t sum = 0;
157
158     blk_mig_lock();
159     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
160         sum += bmds->completed_sectors;
161     }
162     blk_mig_unlock();
163     return sum << BDRV_SECTOR_BITS;
164 }
165
166 uint64_t blk_mig_bytes_remaining(void)
167 {
168     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
169 }
170
171 uint64_t blk_mig_bytes_total(void)
172 {
173     BlkMigDevState *bmds;
174     uint64_t sum = 0;
175
176     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
177         sum += bmds->total_sectors;
178     }
179     return sum << BDRV_SECTOR_BITS;
180 }
181
182
183 /* Called with migration lock held.  */
184
185 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
186 {
187     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
188
189     if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
190         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
191             (1UL << (chunk % (sizeof(unsigned long) * 8))));
192     } else {
193         return 0;
194     }
195 }
196
197 /* Called with migration lock held.  */
198
199 static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
200                              int nb_sectors, int set)
201 {
202     int64_t start, end;
203     unsigned long val, idx, bit;
204
205     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
206     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
207
208     for (; start <= end; start++) {
209         idx = start / (sizeof(unsigned long) * 8);
210         bit = start % (sizeof(unsigned long) * 8);
211         val = bmds->aio_bitmap[idx];
212         if (set) {
213             val |= 1UL << bit;
214         } else {
215             val &= ~(1UL << bit);
216         }
217         bmds->aio_bitmap[idx] = val;
218     }
219 }
220
221 static void alloc_aio_bitmap(BlkMigDevState *bmds)
222 {
223     BlockDriverState *bs = bmds->bs;
224     int64_t bitmap_size;
225
226     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
227             BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
228     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
229
230     bmds->aio_bitmap = g_malloc0(bitmap_size);
231 }
232
233 /* Never hold migration lock when yielding to the main loop!  */
234
235 static void blk_mig_read_cb(void *opaque, int ret)
236 {
237     BlkMigBlock *blk = opaque;
238
239     blk_mig_lock();
240     blk->ret = ret;
241
242     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
243     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
244
245     block_mig_state.submitted--;
246     block_mig_state.read_done++;
247     assert(block_mig_state.submitted >= 0);
248     blk_mig_unlock();
249 }
250
251 /* Called with no lock taken.  */
252
253 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
254 {
255     int64_t total_sectors = bmds->total_sectors;
256     int64_t cur_sector = bmds->cur_sector;
257     BlockDriverState *bs = bmds->bs;
258     BlkMigBlock *blk;
259     int nr_sectors;
260
261     if (bmds->shared_base) {
262         qemu_mutex_lock_iothread();
263         while (cur_sector < total_sectors &&
264                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
265                                   &nr_sectors)) {
266             cur_sector += nr_sectors;
267         }
268         qemu_mutex_unlock_iothread();
269     }
270
271     if (cur_sector >= total_sectors) {
272         bmds->cur_sector = bmds->completed_sectors = total_sectors;
273         return 1;
274     }
275
276     bmds->completed_sectors = cur_sector;
277
278     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
279
280     /* we are going to transfer a full block even if it is not allocated */
281     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
282
283     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
284         nr_sectors = total_sectors - cur_sector;
285     }
286
287     blk = g_malloc(sizeof(BlkMigBlock));
288     blk->buf = g_malloc(BLOCK_SIZE);
289     blk->bmds = bmds;
290     blk->sector = cur_sector;
291     blk->nr_sectors = nr_sectors;
292
293     blk->iov.iov_base = blk->buf;
294     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
295     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
296
297     blk_mig_lock();
298     block_mig_state.submitted++;
299     blk_mig_unlock();
300
301     qemu_mutex_lock_iothread();
302     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
303                                 nr_sectors, blk_mig_read_cb, blk);
304
305     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
306     qemu_mutex_unlock_iothread();
307
308     bmds->cur_sector = cur_sector + nr_sectors;
309     return (bmds->cur_sector >= total_sectors);
310 }
311
312 /* Called with iothread lock taken.  */
313
314 static int set_dirty_tracking(void)
315 {
316     BlkMigDevState *bmds;
317     int ret;
318
319     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
320         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
321                                                       NULL);
322         if (!bmds->dirty_bitmap) {
323             ret = -errno;
324             goto fail;
325         }
326     }
327     return 0;
328
329 fail:
330     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
331         if (bmds->dirty_bitmap) {
332             bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
333         }
334     }
335     return ret;
336 }
337
338 static void unset_dirty_tracking(void)
339 {
340     BlkMigDevState *bmds;
341
342     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
343         bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
344     }
345 }
346
347 static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
348 {
349     BlkMigDevState *bmds;
350     int64_t sectors;
351
352     if (!bdrv_is_read_only(bs)) {
353         sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
354         if (sectors <= 0) {
355             return;
356         }
357
358         bmds = g_malloc0(sizeof(BlkMigDevState));
359         bmds->bs = bs;
360         bmds->bulk_completed = 0;
361         bmds->total_sectors = sectors;
362         bmds->completed_sectors = 0;
363         bmds->shared_base = block_mig_state.shared_base;
364         alloc_aio_bitmap(bmds);
365         error_setg(&bmds->blocker, "block device is in use by migration");
366         bdrv_op_block_all(bs, bmds->blocker);
367         bdrv_ref(bs);
368
369         block_mig_state.total_sector_sum += sectors;
370
371         if (bmds->shared_base) {
372             DPRINTF("Start migration for %s with shared base image\n",
373                     bs->device_name);
374         } else {
375             DPRINTF("Start full migration for %s\n", bs->device_name);
376         }
377
378         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
379     }
380 }
381
382 static void init_blk_migration(QEMUFile *f)
383 {
384     block_mig_state.submitted = 0;
385     block_mig_state.read_done = 0;
386     block_mig_state.transferred = 0;
387     block_mig_state.total_sector_sum = 0;
388     block_mig_state.prev_progress = -1;
389     block_mig_state.bulk_completed = 0;
390     block_mig_state.zero_blocks = migrate_zero_blocks();
391
392     bdrv_iterate(init_blk_migration_it, NULL);
393 }
394
395 /* Called with no lock taken.  */
396
397 static int blk_mig_save_bulked_block(QEMUFile *f)
398 {
399     int64_t completed_sector_sum = 0;
400     BlkMigDevState *bmds;
401     int progress;
402     int ret = 0;
403
404     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
405         if (bmds->bulk_completed == 0) {
406             if (mig_save_device_bulk(f, bmds) == 1) {
407                 /* completed bulk section for this device */
408                 bmds->bulk_completed = 1;
409             }
410             completed_sector_sum += bmds->completed_sectors;
411             ret = 1;
412             break;
413         } else {
414             completed_sector_sum += bmds->completed_sectors;
415         }
416     }
417
418     if (block_mig_state.total_sector_sum != 0) {
419         progress = completed_sector_sum * 100 /
420                    block_mig_state.total_sector_sum;
421     } else {
422         progress = 100;
423     }
424     if (progress != block_mig_state.prev_progress) {
425         block_mig_state.prev_progress = progress;
426         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
427                          | BLK_MIG_FLAG_PROGRESS);
428         DPRINTF("Completed %d %%\r", progress);
429     }
430
431     return ret;
432 }
433
434 static void blk_mig_reset_dirty_cursor(void)
435 {
436     BlkMigDevState *bmds;
437
438     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
439         bmds->cur_dirty = 0;
440     }
441 }
442
443 /* Called with iothread lock taken.  */
444
445 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
446                                  int is_async)
447 {
448     BlkMigBlock *blk;
449     int64_t total_sectors = bmds->total_sectors;
450     int64_t sector;
451     int nr_sectors;
452     int ret = -EIO;
453
454     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
455         blk_mig_lock();
456         if (bmds_aio_inflight(bmds, sector)) {
457             blk_mig_unlock();
458             bdrv_drain_all();
459         } else {
460             blk_mig_unlock();
461         }
462         if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
463
464             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
465                 nr_sectors = total_sectors - sector;
466             } else {
467                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
468             }
469             blk = g_malloc(sizeof(BlkMigBlock));
470             blk->buf = g_malloc(BLOCK_SIZE);
471             blk->bmds = bmds;
472             blk->sector = sector;
473             blk->nr_sectors = nr_sectors;
474
475             if (is_async) {
476                 blk->iov.iov_base = blk->buf;
477                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
478                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
479
480                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
481                                             nr_sectors, blk_mig_read_cb, blk);
482
483                 blk_mig_lock();
484                 block_mig_state.submitted++;
485                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
486                 blk_mig_unlock();
487             } else {
488                 ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
489                 if (ret < 0) {
490                     goto error;
491                 }
492                 blk_send(f, blk);
493
494                 g_free(blk->buf);
495                 g_free(blk);
496             }
497
498             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
499             break;
500         }
501         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
502         bmds->cur_dirty = sector;
503     }
504
505     return (bmds->cur_dirty >= bmds->total_sectors);
506
507 error:
508     DPRINTF("Error reading sector %" PRId64 "\n", sector);
509     g_free(blk->buf);
510     g_free(blk);
511     return ret;
512 }
513
514 /* Called with iothread lock taken.
515  *
516  * return value:
517  * 0: too much data for max_downtime
518  * 1: few enough data for max_downtime
519 */
520 static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
521 {
522     BlkMigDevState *bmds;
523     int ret = 1;
524
525     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
526         ret = mig_save_device_dirty(f, bmds, is_async);
527         if (ret <= 0) {
528             break;
529         }
530     }
531
532     return ret;
533 }
534
535 /* Called with no locks taken.  */
536
537 static int flush_blks(QEMUFile *f)
538 {
539     BlkMigBlock *blk;
540     int ret = 0;
541
542     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
543             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
544             block_mig_state.transferred);
545
546     blk_mig_lock();
547     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
548         if (qemu_file_rate_limit(f)) {
549             break;
550         }
551         if (blk->ret < 0) {
552             ret = blk->ret;
553             break;
554         }
555
556         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
557         blk_mig_unlock();
558         blk_send(f, blk);
559         blk_mig_lock();
560
561         g_free(blk->buf);
562         g_free(blk);
563
564         block_mig_state.read_done--;
565         block_mig_state.transferred++;
566         assert(block_mig_state.read_done >= 0);
567     }
568     blk_mig_unlock();
569
570     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
571             block_mig_state.submitted, block_mig_state.read_done,
572             block_mig_state.transferred);
573     return ret;
574 }
575
576 /* Called with iothread lock taken.  */
577
578 static int64_t get_remaining_dirty(void)
579 {
580     BlkMigDevState *bmds;
581     int64_t dirty = 0;
582
583     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
584         dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
585     }
586
587     return dirty << BDRV_SECTOR_BITS;
588 }
589
590 /* Called with iothread lock taken.  */
591
592 static void blk_mig_cleanup(void)
593 {
594     BlkMigDevState *bmds;
595     BlkMigBlock *blk;
596
597     bdrv_drain_all();
598
599     unset_dirty_tracking();
600
601     blk_mig_lock();
602     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
603         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
604         bdrv_op_unblock_all(bmds->bs, bmds->blocker);
605         error_free(bmds->blocker);
606         bdrv_unref(bmds->bs);
607         g_free(bmds->aio_bitmap);
608         g_free(bmds);
609     }
610
611     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
612         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
613         g_free(blk->buf);
614         g_free(blk);
615     }
616     blk_mig_unlock();
617 }
618
619 static void block_migration_cancel(void *opaque)
620 {
621     blk_mig_cleanup();
622 }
623
624 static int block_save_setup(QEMUFile *f, void *opaque)
625 {
626     int ret;
627
628     DPRINTF("Enter save live setup submitted %d transferred %d\n",
629             block_mig_state.submitted, block_mig_state.transferred);
630
631     qemu_mutex_lock_iothread();
632     init_blk_migration(f);
633
634     /* start track dirty blocks */
635     ret = set_dirty_tracking();
636
637     if (ret) {
638         qemu_mutex_unlock_iothread();
639         return ret;
640     }
641
642     qemu_mutex_unlock_iothread();
643
644     ret = flush_blks(f);
645     blk_mig_reset_dirty_cursor();
646     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
647
648     return ret;
649 }
650
651 static int block_save_iterate(QEMUFile *f, void *opaque)
652 {
653     int ret;
654     int64_t last_ftell = qemu_ftell(f);
655
656     DPRINTF("Enter save live iterate submitted %d transferred %d\n",
657             block_mig_state.submitted, block_mig_state.transferred);
658
659     ret = flush_blks(f);
660     if (ret) {
661         return ret;
662     }
663
664     blk_mig_reset_dirty_cursor();
665
666     /* control the rate of transfer */
667     blk_mig_lock();
668     while ((block_mig_state.submitted +
669             block_mig_state.read_done) * BLOCK_SIZE <
670            qemu_file_get_rate_limit(f)) {
671         blk_mig_unlock();
672         if (block_mig_state.bulk_completed == 0) {
673             /* first finish the bulk phase */
674             if (blk_mig_save_bulked_block(f) == 0) {
675                 /* finished saving bulk on all devices */
676                 block_mig_state.bulk_completed = 1;
677             }
678             ret = 0;
679         } else {
680             /* Always called with iothread lock taken for
681              * simplicity, block_save_complete also calls it.
682              */
683             qemu_mutex_lock_iothread();
684             ret = blk_mig_save_dirty_block(f, 1);
685             qemu_mutex_unlock_iothread();
686         }
687         if (ret < 0) {
688             return ret;
689         }
690         blk_mig_lock();
691         if (ret != 0) {
692             /* no more dirty blocks */
693             break;
694         }
695     }
696     blk_mig_unlock();
697
698     ret = flush_blks(f);
699     if (ret) {
700         return ret;
701     }
702
703     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
704     return qemu_ftell(f) - last_ftell;
705 }
706
707 /* Called with iothread lock taken.  */
708
709 static int block_save_complete(QEMUFile *f, void *opaque)
710 {
711     int ret;
712
713     DPRINTF("Enter save live complete submitted %d transferred %d\n",
714             block_mig_state.submitted, block_mig_state.transferred);
715
716     ret = flush_blks(f);
717     if (ret) {
718         return ret;
719     }
720
721     blk_mig_reset_dirty_cursor();
722
723     /* we know for sure that save bulk is completed and
724        all async read completed */
725     blk_mig_lock();
726     assert(block_mig_state.submitted == 0);
727     blk_mig_unlock();
728
729     do {
730         ret = blk_mig_save_dirty_block(f, 0);
731         if (ret < 0) {
732             return ret;
733         }
734     } while (ret == 0);
735
736     /* report completion */
737     qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
738
739     DPRINTF("Block migration completed\n");
740
741     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
742
743     blk_mig_cleanup();
744     return 0;
745 }
746
747 static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
748 {
749     /* Estimate pending number of bytes to send */
750     uint64_t pending;
751
752     qemu_mutex_lock_iothread();
753     blk_mig_lock();
754     pending = get_remaining_dirty() +
755                        block_mig_state.submitted * BLOCK_SIZE +
756                        block_mig_state.read_done * BLOCK_SIZE;
757
758     /* Report at least one block pending during bulk phase */
759     if (pending == 0 && !block_mig_state.bulk_completed) {
760         pending = BLOCK_SIZE;
761     }
762     blk_mig_unlock();
763     qemu_mutex_unlock_iothread();
764
765     DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
766     return pending;
767 }
768
769 static int block_load(QEMUFile *f, void *opaque, int version_id)
770 {
771     static int banner_printed;
772     int len, flags;
773     char device_name[256];
774     int64_t addr;
775     BlockDriverState *bs, *bs_prev = NULL;
776     uint8_t *buf;
777     int64_t total_sectors = 0;
778     int nr_sectors;
779     int ret;
780
781     do {
782         addr = qemu_get_be64(f);
783
784         flags = addr & ~BDRV_SECTOR_MASK;
785         addr >>= BDRV_SECTOR_BITS;
786
787         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
788             /* get device name */
789             len = qemu_get_byte(f);
790             qemu_get_buffer(f, (uint8_t *)device_name, len);
791             device_name[len] = '\0';
792
793             bs = bdrv_find(device_name);
794             if (!bs) {
795                 fprintf(stderr, "Error unknown block device %s\n",
796                         device_name);
797                 return -EINVAL;
798             }
799
800             if (bs != bs_prev) {
801                 bs_prev = bs;
802                 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
803                 if (total_sectors <= 0) {
804                     error_report("Error getting length of block device %s",
805                                  device_name);
806                     return -EINVAL;
807                 }
808             }
809
810             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
811                 nr_sectors = total_sectors - addr;
812             } else {
813                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
814             }
815
816             if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
817                 ret = bdrv_write_zeroes(bs, addr, nr_sectors,
818                                         BDRV_REQ_MAY_UNMAP);
819             } else {
820                 buf = g_malloc(BLOCK_SIZE);
821                 qemu_get_buffer(f, buf, BLOCK_SIZE);
822                 ret = bdrv_write(bs, addr, buf, nr_sectors);
823                 g_free(buf);
824             }
825
826             if (ret < 0) {
827                 return ret;
828             }
829         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
830             if (!banner_printed) {
831                 printf("Receiving block device images\n");
832                 banner_printed = 1;
833             }
834             printf("Completed %d %%%c", (int)addr,
835                    (addr == 100) ? '\n' : '\r');
836             fflush(stdout);
837         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
838             fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
839             return -EINVAL;
840         }
841         ret = qemu_file_get_error(f);
842         if (ret != 0) {
843             return ret;
844         }
845     } while (!(flags & BLK_MIG_FLAG_EOS));
846
847     return 0;
848 }
849
850 static void block_set_params(const MigrationParams *params, void *opaque)
851 {
852     block_mig_state.blk_enable = params->blk;
853     block_mig_state.shared_base = params->shared;
854
855     /* shared base means that blk_enable = 1 */
856     block_mig_state.blk_enable |= params->shared;
857 }
858
859 static bool block_is_active(void *opaque)
860 {
861     return block_mig_state.blk_enable == 1;
862 }
863
864 SaveVMHandlers savevm_block_handlers = {
865     .set_params = block_set_params,
866     .save_live_setup = block_save_setup,
867     .save_live_iterate = block_save_iterate,
868     .save_live_complete = block_save_complete,
869     .save_live_pending = block_save_pending,
870     .load_state = block_load,
871     .cancel = block_migration_cancel,
872     .is_active = block_is_active,
873 };
874
875 void blk_mig_init(void)
876 {
877     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
878     QSIMPLEQ_INIT(&block_mig_state.blk_list);
879     qemu_mutex_init(&block_mig_state.lock);
880
881     register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
882                          &block_mig_state);
883 }