2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "monitor/monitor.h"
28 #include "block/block_int.h"
29 #include "block/blockjob.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qjson.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
40 #include <sys/types.h>
42 #include <sys/ioctl.h>
43 #include <sys/queue.h>
53 struct BdrvDirtyBitmap {
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
58 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
61 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63 BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
73 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
75 BdrvRequestFlags flags);
76 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78 BdrvRequestFlags flags);
79 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 BdrvRequestFlags flags,
84 BlockDriverCompletionFunc *cb,
87 static void coroutine_fn bdrv_co_do_rw(void *opaque);
88 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
89 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
91 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
94 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
97 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist;
104 static int is_windows_drive_prefix(const char *filename)
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename)
113 if (is_windows_drive_prefix(filename) &&
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
123 /* throttling disk I/O limits */
124 void bdrv_set_io_limits(BlockDriverState *bs,
129 throttle_config(&bs->throttle_state, cfg);
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
136 /* this function drain all the throttled IOs */
137 static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
143 bs->io_limits_enabled = false;
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
151 bs->io_limits_enabled = enabled;
156 void bdrv_io_limits_disable(BlockDriverState *bs)
158 bs->io_limits_enabled = false;
160 bdrv_start_throttled_reqs(bs);
162 throttle_destroy(&bs->throttle_state);
165 static void bdrv_throttle_read_timer_cb(void *opaque)
167 BlockDriverState *bs = opaque;
168 qemu_co_enter_next(&bs->throttled_reqs[0]);
171 static void bdrv_throttle_write_timer_cb(void *opaque)
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
177 /* should be called before bdrv_set_io_limits if a limit is set */
178 void bdrv_io_limits_enable(BlockDriverState *bs)
180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
186 bs->io_limits_enabled = true;
189 /* This function makes an IO wait if needed
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
194 static void bdrv_io_limits_intercept(BlockDriverState *bs,
198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
201 /* if must wait or any request of this type throttled queue the IO */
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
207 /* the IO will be executed, do the accounting */
208 throttle_account(&bs->throttle_state, is_write, bytes);
211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
220 size_t bdrv_opt_mem_align(BlockDriverState *bs)
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
227 return bs->bl.opt_mem_alignment;
230 /* check if the path starts with "<protocol>:" */
231 static int path_has_protocol(const char *path)
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
240 p = path + strcspn(path, ":/\\");
242 p = path + strcspn(path, ":/");
248 int path_is_absolute(const char *path)
251 /* specific case for names like: "\\.\d:" */
252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
255 return (*path == '/' || *path == '\\');
257 return (*path == '/');
261 /* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
264 void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
276 p = strchr(base_path, ':');
281 p1 = strrchr(base_path, '/');
285 p2 = strrchr(base_path, '\\');
297 if (len > dest_size - 1)
299 memcpy(dest, base_path, len);
301 pstrcat(dest, dest_size, filename);
305 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
310 path_combine(dest, sz, bs->filename, bs->backing_file);
314 void bdrv_register(BlockDriver *bdrv)
316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
334 /* create a new block device (by default it is empty) */
335 BlockDriverState *bdrv_new(const char *device_name)
337 BlockDriverState *bs;
339 bs = g_malloc0(sizeof(BlockDriverState));
340 QLIST_INIT(&bs->dirty_bitmaps);
341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
342 if (device_name[0] != '\0') {
343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
345 bdrv_iostatus_disable(bs);
346 notifier_list_init(&bs->close_notifiers);
347 notifier_with_return_list_init(&bs->before_write_notifiers);
348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
355 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
357 notifier_list_add(&bs->close_notifiers, notify);
360 BlockDriver *bdrv_find_format(const char *format_name)
363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
371 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
382 return 1; /* no whitelist, anything goes */
385 for (p = whitelist_rw; *p; p++) {
386 if (!strcmp(drv->format_name, *p)) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
400 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
403 BlockDriver *drv = bdrv_find_format(format_name);
404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
407 typedef struct CreateCo {
410 QEMUOptionParameter *options;
415 static void coroutine_fn bdrv_create_co_entry(void *opaque)
417 Error *local_err = NULL;
420 CreateCo *cco = opaque;
423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
425 error_propagate(&cco->err, local_err);
430 int bdrv_create(BlockDriver *drv, const char* filename,
431 QEMUOptionParameter *options, Error **errp)
438 .filename = g_strdup(filename),
444 if (!drv->bdrv_create) {
445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
464 error_propagate(errp, cco.err);
466 error_setg_errno(errp, -ret, "Could not create image");
471 g_free(cco.filename);
475 int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
479 Error *local_err = NULL;
482 drv = bdrv_find_protocol(filename, true);
484 error_setg(errp, "Could not find protocol for file '%s'", filename);
488 ret = bdrv_create(drv, filename, options, &local_err);
490 error_propagate(errp, local_err);
495 int bdrv_refresh_limits(BlockDriverState *bs)
497 BlockDriver *drv = bs->drv;
499 memset(&bs->bl, 0, sizeof(bs->bl));
505 /* Take some limits from the children as a default */
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
511 bs->bl.opt_mem_alignment = 512;
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
526 return drv->bdrv_refresh_limits(bs);
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
536 int get_tmp_filename(char *filename, int size)
539 char temp_dir[MAX_PATH];
540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
549 tmpdir = getenv("TMPDIR");
552 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
555 fd = mkstemp(filename);
559 if (close(fd) != 0) {
568 * Detect host devices. By convention, /dev/cdrom[N] is always
569 * recognized as a host CDROM.
571 static BlockDriver *find_hdev_driver(const char *filename)
573 int score_max = 0, score;
574 BlockDriver *drv = NULL, *d;
576 QLIST_FOREACH(d, &bdrv_drivers, list) {
577 if (d->bdrv_probe_device) {
578 score = d->bdrv_probe_device(filename);
579 if (score > score_max) {
589 BlockDriver *bdrv_find_protocol(const char *filename,
590 bool allow_protocol_prefix)
597 /* TODO Drivers without bdrv_file_open must be specified explicitly */
600 * XXX(hch): we really should not let host device detection
601 * override an explicit protocol specification, but moving this
602 * later breaks access to device names with colons in them.
603 * Thanks to the brain-dead persistent naming schemes on udev-
604 * based Linux systems those actually are quite common.
606 drv1 = find_hdev_driver(filename);
611 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
612 return bdrv_find_format("file");
615 p = strchr(filename, ':');
618 if (len > sizeof(protocol) - 1)
619 len = sizeof(protocol) - 1;
620 memcpy(protocol, filename, len);
621 protocol[len] = '\0';
622 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
623 if (drv1->protocol_name &&
624 !strcmp(drv1->protocol_name, protocol)) {
631 static int find_image_format(BlockDriverState *bs, const char *filename,
632 BlockDriver **pdrv, Error **errp)
634 int score, score_max;
635 BlockDriver *drv1, *drv;
639 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
640 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
641 drv = bdrv_find_format("raw");
643 error_setg(errp, "Could not find raw image format");
650 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
652 error_setg_errno(errp, -ret, "Could not read image for determining its "
660 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
661 if (drv1->bdrv_probe) {
662 score = drv1->bdrv_probe(buf, ret, filename);
663 if (score > score_max) {
670 error_setg(errp, "Could not determine image format: No compatible "
679 * Set the current 'total_sectors' value
681 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
683 BlockDriver *drv = bs->drv;
685 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
689 /* query actual device if possible, otherwise just trust the hint */
690 if (drv->bdrv_getlength) {
691 int64_t length = drv->bdrv_getlength(bs);
695 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
698 bs->total_sectors = hint;
703 * Set open flags for a given discard mode
705 * Return 0 on success, -1 if the discard mode was invalid.
707 int bdrv_parse_discard_flags(const char *mode, int *flags)
709 *flags &= ~BDRV_O_UNMAP;
711 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
713 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
714 *flags |= BDRV_O_UNMAP;
723 * Set open flags for a given cache mode
725 * Return 0 on success, -1 if the cache mode was invalid.
727 int bdrv_parse_cache_flags(const char *mode, int *flags)
729 *flags &= ~BDRV_O_CACHE_MASK;
731 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
732 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
733 } else if (!strcmp(mode, "directsync")) {
734 *flags |= BDRV_O_NOCACHE;
735 } else if (!strcmp(mode, "writeback")) {
736 *flags |= BDRV_O_CACHE_WB;
737 } else if (!strcmp(mode, "unsafe")) {
738 *flags |= BDRV_O_CACHE_WB;
739 *flags |= BDRV_O_NO_FLUSH;
740 } else if (!strcmp(mode, "writethrough")) {
741 /* this is the default */
750 * The copy-on-read flag is actually a reference count so multiple users may
751 * use the feature without worrying about clobbering its previous state.
752 * Copy-on-read stays enabled until all users have called to disable it.
754 void bdrv_enable_copy_on_read(BlockDriverState *bs)
759 void bdrv_disable_copy_on_read(BlockDriverState *bs)
761 assert(bs->copy_on_read > 0);
765 static int bdrv_open_flags(BlockDriverState *bs, int flags)
767 int open_flags = flags | BDRV_O_CACHE_WB;
770 * Clear flags that are internal to the block layer before opening the
773 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
776 * Snapshots should be writable.
778 if (bs->is_temporary) {
779 open_flags |= BDRV_O_RDWR;
785 static int bdrv_assign_node_name(BlockDriverState *bs,
786 const char *node_name,
793 /* empty string node name is invalid */
794 if (node_name[0] == '\0') {
795 error_setg(errp, "Empty node name");
799 /* takes care of avoiding namespaces collisions */
800 if (bdrv_find(node_name)) {
801 error_setg(errp, "node-name=%s is conflicting with a device id",
806 /* takes care of avoiding duplicates node names */
807 if (bdrv_find_node(node_name)) {
808 error_setg(errp, "Duplicate node name");
812 /* copy node name into the bs and insert it into the graph list */
813 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
814 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
820 * Common part for opening disk images and files
822 * Removes all processed options from *options.
824 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
825 QDict *options, int flags, BlockDriver *drv, Error **errp)
828 const char *filename;
829 const char *node_name = NULL;
830 Error *local_err = NULL;
833 assert(bs->file == NULL);
834 assert(options != NULL && bs->options != options);
837 filename = file->filename;
839 filename = qdict_get_try_str(options, "filename");
842 if (drv->bdrv_needs_filename && !filename) {
843 error_setg(errp, "The '%s' block driver requires a file name",
848 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
850 node_name = qdict_get_try_str(options, "node-name");
851 ret = bdrv_assign_node_name(bs, node_name, errp);
855 qdict_del(options, "node-name");
857 /* bdrv_open() with directly using a protocol as drv. This layer is already
858 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
859 * and return immediately. */
860 if (file != NULL && drv->bdrv_file_open) {
865 bs->open_flags = flags;
866 bs->guest_block_size = 512;
867 bs->request_alignment = 512;
868 bs->zero_beyond_eof = true;
869 open_flags = bdrv_open_flags(bs, flags);
870 bs->read_only = !(open_flags & BDRV_O_RDWR);
872 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
874 !bs->read_only && bdrv_is_whitelisted(drv, true)
875 ? "Driver '%s' can only be used for read-only devices"
876 : "Driver '%s' is not whitelisted",
881 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
882 if (flags & BDRV_O_COPY_ON_READ) {
883 if (!bs->read_only) {
884 bdrv_enable_copy_on_read(bs);
886 error_setg(errp, "Can't use copy-on-read on read-only device");
891 if (filename != NULL) {
892 pstrcpy(bs->filename, sizeof(bs->filename), filename);
894 bs->filename[0] = '\0';
898 bs->opaque = g_malloc0(drv->instance_size);
900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
902 /* Open the image, either directly or using a protocol */
903 if (drv->bdrv_file_open) {
904 assert(file == NULL);
905 assert(!drv->bdrv_needs_filename || filename != NULL);
906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
909 error_setg(errp, "Can't use '%s' as a block driver for the "
910 "protocol level", drv->format_name);
915 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
920 error_propagate(errp, local_err);
921 } else if (bs->filename[0]) {
922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
924 error_setg_errno(errp, -ret, "Could not open image");
929 ret = refresh_total_sectors(bs, bs->total_sectors);
931 error_setg_errno(errp, -ret, "Could not refresh total sector count");
935 bdrv_refresh_limits(bs);
936 assert(bdrv_opt_mem_align(bs) != 0);
937 assert(bs->request_alignment != 0);
940 if (bs->is_temporary) {
941 assert(bs->filename[0] != '\0');
942 unlink(bs->filename);
956 * Opens a file using a protocol (file, host_device, nbd, ...)
958 * options is a QDict of options to pass to the block drivers, or NULL for an
959 * empty set of options. The reference to the QDict belongs to the block layer
960 * after the call (even on failure), so if the caller intends to reuse the
961 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
963 static int bdrv_file_open(BlockDriverState *bs, const char *filename,
964 QDict *options, int flags, Error **errp)
968 bool allow_protocol_prefix = false;
969 Error *local_err = NULL;
972 /* Fetch the file name from the options QDict if necessary */
974 filename = qdict_get_try_str(options, "filename");
975 } else if (filename && !qdict_haskey(options, "filename")) {
976 qdict_put(options, "filename", qstring_from_str(filename));
977 allow_protocol_prefix = true;
979 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
985 /* Find the right block driver */
986 drvname = qdict_get_try_str(options, "driver");
988 drv = bdrv_find_format(drvname);
990 error_setg(errp, "Unknown driver '%s'", drvname);
992 qdict_del(options, "driver");
993 } else if (filename) {
994 drv = bdrv_find_protocol(filename, allow_protocol_prefix);
996 error_setg(errp, "Unknown protocol");
999 error_setg(errp, "Must specify either driver or file");
1004 /* errp has been set already */
1009 /* Parse the filename and open it */
1010 if (drv->bdrv_parse_filename && filename) {
1011 drv->bdrv_parse_filename(filename, options, &local_err);
1013 error_propagate(errp, local_err);
1017 qdict_del(options, "filename");
1020 if (!drv->bdrv_file_open) {
1021 ret = bdrv_open(&bs, filename, NULL, options, flags, drv, &local_err);
1024 ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
1027 error_propagate(errp, local_err);
1031 /* Check if any unknown options were used */
1032 if (options && (qdict_size(options) != 0)) {
1033 const QDictEntry *entry = qdict_first(options);
1034 error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
1035 drv->format_name, entry->key);
1050 * Opens the backing file for a BlockDriverState if not yet open
1052 * options is a QDict of options to pass to the block drivers, or NULL for an
1053 * empty set of options. The reference to the QDict is transferred to this
1054 * function (even on failure), so if the caller intends to reuse the dictionary,
1055 * it needs to use QINCREF() before calling bdrv_file_open.
1057 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1059 char backing_filename[PATH_MAX];
1060 int back_flags, ret;
1061 BlockDriver *back_drv = NULL;
1062 Error *local_err = NULL;
1064 if (bs->backing_hd != NULL) {
1069 /* NULL means an empty set of options */
1070 if (options == NULL) {
1071 options = qdict_new();
1074 bs->open_flags &= ~BDRV_O_NO_BACKING;
1075 if (qdict_haskey(options, "file.filename")) {
1076 backing_filename[0] = '\0';
1077 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1081 bdrv_get_full_backing_filename(bs, backing_filename,
1082 sizeof(backing_filename));
1085 if (bs->backing_format[0] != '\0') {
1086 back_drv = bdrv_find_format(bs->backing_format);
1089 /* backing files always opened read-only */
1090 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1091 BDRV_O_COPY_ON_READ);
1093 assert(bs->backing_hd == NULL);
1094 ret = bdrv_open(&bs->backing_hd,
1095 *backing_filename ? backing_filename : NULL, NULL, options,
1096 back_flags, back_drv, &local_err);
1098 bs->backing_hd = NULL;
1099 bs->open_flags |= BDRV_O_NO_BACKING;
1100 error_setg(errp, "Could not open backing file: %s",
1101 error_get_pretty(local_err));
1102 error_free(local_err);
1106 if (bs->backing_hd->file) {
1107 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1108 bs->backing_hd->file->filename);
1111 /* Recalculate the BlockLimits with the backing file */
1112 bdrv_refresh_limits(bs);
1118 * Opens a disk image whose options are given as BlockdevRef in another block
1121 * If force_raw is true, bdrv_file_open() will be used, thereby preventing any
1122 * image format auto-detection. If it is false and a filename is given,
1123 * bdrv_open() will be used for auto-detection.
1125 * If allow_none is true, no image will be opened if filename is false and no
1126 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1128 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1129 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1130 * itself, all options starting with "${bdref_key}." are considered part of the
1133 * The BlockdevRef will be removed from the options QDict.
1135 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1137 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1138 QDict *options, const char *bdref_key, int flags,
1139 bool force_raw, bool allow_none, Error **errp)
1141 QDict *image_options;
1143 char *bdref_key_dot;
1144 const char *reference;
1147 assert(*pbs == NULL);
1149 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1150 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1151 g_free(bdref_key_dot);
1153 reference = qdict_get_try_str(options, bdref_key);
1154 if (!filename && !reference && !qdict_size(image_options)) {
1158 error_setg(errp, "A block device must be specified for \"%s\"",
1165 if (filename && !force_raw) {
1166 /* If a filename is given and the block driver should be detected
1167 automatically (instead of using none), use bdrv_open() in order to do
1168 that auto-detection. */
1170 error_setg(errp, "Cannot reference an existing block device while "
1171 "giving a filename");
1176 ret = bdrv_open(pbs, filename, NULL, image_options, flags, NULL, errp);
1178 ret = bdrv_open(pbs, filename, reference, image_options,
1179 flags | BDRV_O_PROTOCOL, NULL, errp);
1183 qdict_del(options, bdref_key);
1188 * Opens a disk image (raw, qcow2, vmdk, ...)
1190 * options is a QDict of options to pass to the block drivers, or NULL for an
1191 * empty set of options. The reference to the QDict belongs to the block layer
1192 * after the call (even on failure), so if the caller intends to reuse the
1193 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1195 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1196 * If it is not NULL, the referenced BDS will be reused.
1198 * The reference parameter may be used to specify an existing block device which
1199 * should be opened. If specified, neither options nor a filename may be given,
1200 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1202 int bdrv_open(BlockDriverState **pbs, const char *filename,
1203 const char *reference, QDict *options, int flags,
1204 BlockDriver *drv, Error **errp)
1207 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1208 char tmp_filename[PATH_MAX + 1];
1209 BlockDriverState *file = NULL, *bs;
1210 const char *drvname;
1211 Error *local_err = NULL;
1216 bool options_non_empty = options ? qdict_size(options) : false;
1220 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1221 "another block device");
1225 if (filename || options_non_empty) {
1226 error_setg(errp, "Cannot reference an existing block device with "
1227 "additional options or a new filename");
1231 bs = bdrv_lookup_bs(reference, reference, errp);
1246 /* NULL means an empty set of options */
1247 if (options == NULL) {
1248 options = qdict_new();
1251 bs->options = options;
1252 options = qdict_clone_shallow(options);
1254 if (flags & BDRV_O_PROTOCOL) {
1256 ret = bdrv_file_open(bs, filename, options, flags & ~BDRV_O_PROTOCOL,
1262 } else if (bs->drv) {
1263 goto close_and_fail;
1269 /* For snapshot=on, create a temporary qcow2 overlay */
1270 if (flags & BDRV_O_SNAPSHOT) {
1271 BlockDriverState *bs1;
1273 BlockDriver *bdrv_qcow2;
1274 QEMUOptionParameter *create_options;
1275 QDict *snapshot_options;
1277 /* if snapshot, we create a temporary backing file and open it
1278 instead of opening 'filename' directly */
1280 /* Get the required size from the image */
1283 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
1288 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
1292 /* Create the temporary image */
1293 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1295 error_setg_errno(errp, -ret, "Could not get temporary filename");
1299 bdrv_qcow2 = bdrv_find_format("qcow2");
1300 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1303 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
1305 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
1306 free_option_parameters(create_options);
1308 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1309 "'%s': %s", tmp_filename,
1310 error_get_pretty(local_err));
1311 error_free(local_err);
1316 /* Prepare a new options QDict for the temporary file, where user
1317 * options refer to the backing file */
1319 qdict_put(options, "file.filename", qstring_from_str(filename));
1322 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1325 snapshot_options = qdict_new();
1326 qdict_put(snapshot_options, "backing", options);
1327 qdict_flatten(snapshot_options);
1329 bs->options = snapshot_options;
1330 options = qdict_clone_shallow(bs->options);
1332 filename = tmp_filename;
1334 bs->is_temporary = 1;
1337 /* Open image file without format layer */
1338 if (flags & BDRV_O_RDWR) {
1339 flags |= BDRV_O_ALLOW_RDWR;
1342 assert(file == NULL);
1343 ret = bdrv_open_image(&file, filename, options, "file",
1344 bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true,
1350 /* Find the right image format driver */
1351 drvname = qdict_get_try_str(options, "driver");
1353 drv = bdrv_find_format(drvname);
1354 qdict_del(options, "driver");
1356 error_setg(errp, "Invalid driver: '%s'", drvname);
1358 goto unlink_and_fail;
1364 ret = find_image_format(file, filename, &drv, &local_err);
1366 error_setg(errp, "Must specify either driver or file");
1368 goto unlink_and_fail;
1373 goto unlink_and_fail;
1376 /* Open the image */
1377 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1379 goto unlink_and_fail;
1382 if (file && (bs->file != file)) {
1387 /* If there is a backing file, use it */
1388 if ((flags & BDRV_O_NO_BACKING) == 0) {
1389 QDict *backing_options;
1391 qdict_extract_subqdict(options, &backing_options, "backing.");
1392 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1394 goto close_and_fail;
1398 /* Check if any unknown options were used */
1399 if (qdict_size(options) != 0) {
1400 const QDictEntry *entry = qdict_first(options);
1401 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1402 "support the option '%s'", drv->format_name, bs->device_name,
1406 goto close_and_fail;
1410 if (!bdrv_key_required(bs)) {
1411 bdrv_dev_change_media_cb(bs, true);
1421 if (bs->is_temporary) {
1425 QDECREF(bs->options);
1429 /* If *pbs is NULL, a new BDS has been created in this function and
1430 needs to be freed now. Otherwise, it does not need to be closed,
1431 since it has not really been opened yet. */
1435 error_propagate(errp, local_err);
1440 /* See fail path, but now the BDS has to be always closed */
1448 error_propagate(errp, local_err);
1453 typedef struct BlockReopenQueueEntry {
1455 BDRVReopenState state;
1456 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1457 } BlockReopenQueueEntry;
1460 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1461 * reopen of multiple devices.
1463 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1464 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1465 * be created and initialized. This newly created BlockReopenQueue should be
1466 * passed back in for subsequent calls that are intended to be of the same
1469 * bs is the BlockDriverState to add to the reopen queue.
1471 * flags contains the open flags for the associated bs
1473 * returns a pointer to bs_queue, which is either the newly allocated
1474 * bs_queue, or the existing bs_queue being used.
1477 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1478 BlockDriverState *bs, int flags)
1482 BlockReopenQueueEntry *bs_entry;
1483 if (bs_queue == NULL) {
1484 bs_queue = g_new0(BlockReopenQueue, 1);
1485 QSIMPLEQ_INIT(bs_queue);
1489 bdrv_reopen_queue(bs_queue, bs->file, flags);
1492 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1493 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1495 bs_entry->state.bs = bs;
1496 bs_entry->state.flags = flags;
1502 * Reopen multiple BlockDriverStates atomically & transactionally.
1504 * The queue passed in (bs_queue) must have been built up previous
1505 * via bdrv_reopen_queue().
1507 * Reopens all BDS specified in the queue, with the appropriate
1508 * flags. All devices are prepared for reopen, and failure of any
1509 * device will cause all device changes to be abandonded, and intermediate
1512 * If all devices prepare successfully, then the changes are committed
1516 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1519 BlockReopenQueueEntry *bs_entry, *next;
1520 Error *local_err = NULL;
1522 assert(bs_queue != NULL);
1526 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1527 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1528 error_propagate(errp, local_err);
1531 bs_entry->prepared = true;
1534 /* If we reach this point, we have success and just need to apply the
1537 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1538 bdrv_reopen_commit(&bs_entry->state);
1544 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1545 if (ret && bs_entry->prepared) {
1546 bdrv_reopen_abort(&bs_entry->state);
1555 /* Reopen a single BlockDriverState with the specified flags. */
1556 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1559 Error *local_err = NULL;
1560 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1562 ret = bdrv_reopen_multiple(queue, &local_err);
1563 if (local_err != NULL) {
1564 error_propagate(errp, local_err);
1571 * Prepares a BlockDriverState for reopen. All changes are staged in the
1572 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1573 * the block driver layer .bdrv_reopen_prepare()
1575 * bs is the BlockDriverState to reopen
1576 * flags are the new open flags
1577 * queue is the reopen queue
1579 * Returns 0 on success, non-zero on error. On error errp will be set
1582 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1583 * It is the responsibility of the caller to then call the abort() or
1584 * commit() for any other BDS that have been left in a prepare() state
1587 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1591 Error *local_err = NULL;
1594 assert(reopen_state != NULL);
1595 assert(reopen_state->bs->drv != NULL);
1596 drv = reopen_state->bs->drv;
1598 /* if we are to stay read-only, do not allow permission change
1600 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1601 reopen_state->flags & BDRV_O_RDWR) {
1602 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1603 reopen_state->bs->device_name);
1608 ret = bdrv_flush(reopen_state->bs);
1610 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1615 if (drv->bdrv_reopen_prepare) {
1616 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1618 if (local_err != NULL) {
1619 error_propagate(errp, local_err);
1621 error_setg(errp, "failed while preparing to reopen image '%s'",
1622 reopen_state->bs->filename);
1627 /* It is currently mandatory to have a bdrv_reopen_prepare()
1628 * handler for each supported drv. */
1629 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1630 drv->format_name, reopen_state->bs->device_name,
1631 "reopening of file");
1643 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1644 * makes them final by swapping the staging BlockDriverState contents into
1645 * the active BlockDriverState contents.
1647 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1651 assert(reopen_state != NULL);
1652 drv = reopen_state->bs->drv;
1653 assert(drv != NULL);
1655 /* If there are any driver level actions to take */
1656 if (drv->bdrv_reopen_commit) {
1657 drv->bdrv_reopen_commit(reopen_state);
1660 /* set BDS specific flags now */
1661 reopen_state->bs->open_flags = reopen_state->flags;
1662 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1664 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1666 bdrv_refresh_limits(reopen_state->bs);
1670 * Abort the reopen, and delete and free the staged changes in
1673 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1677 assert(reopen_state != NULL);
1678 drv = reopen_state->bs->drv;
1679 assert(drv != NULL);
1681 if (drv->bdrv_reopen_abort) {
1682 drv->bdrv_reopen_abort(reopen_state);
1687 void bdrv_close(BlockDriverState *bs)
1690 block_job_cancel_sync(bs->job);
1692 bdrv_drain_all(); /* complete I/O */
1694 bdrv_drain_all(); /* in case flush left pending I/O */
1695 notifier_list_notify(&bs->close_notifiers, bs);
1698 if (bs->backing_hd) {
1699 bdrv_unref(bs->backing_hd);
1700 bs->backing_hd = NULL;
1702 bs->drv->bdrv_close(bs);
1705 if (bs->is_temporary) {
1706 unlink(bs->filename);
1711 bs->copy_on_read = 0;
1712 bs->backing_file[0] = '\0';
1713 bs->backing_format[0] = '\0';
1714 bs->total_sectors = 0;
1719 bs->zero_beyond_eof = false;
1720 QDECREF(bs->options);
1723 if (bs->file != NULL) {
1724 bdrv_unref(bs->file);
1729 bdrv_dev_change_media_cb(bs, false);
1731 /*throttling disk I/O limits*/
1732 if (bs->io_limits_enabled) {
1733 bdrv_io_limits_disable(bs);
1737 void bdrv_close_all(void)
1739 BlockDriverState *bs;
1741 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1746 /* Check if any requests are in-flight (including throttled requests) */
1747 static bool bdrv_requests_pending(BlockDriverState *bs)
1749 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1752 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1755 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
1758 if (bs->file && bdrv_requests_pending(bs->file)) {
1761 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1767 static bool bdrv_requests_pending_all(void)
1769 BlockDriverState *bs;
1770 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1771 if (bdrv_requests_pending(bs)) {
1779 * Wait for pending requests to complete across all BlockDriverStates
1781 * This function does not flush data to disk, use bdrv_flush_all() for that
1782 * after calling this function.
1784 * Note that completion of an asynchronous I/O operation can trigger any
1785 * number of other I/O operations on other devices---for example a coroutine
1786 * can be arbitrarily complex and a constant flow of I/O can come until the
1787 * coroutine is complete. Because of this, it is not possible to have a
1788 * function to drain a single device's I/O queue.
1790 void bdrv_drain_all(void)
1792 /* Always run first iteration so any pending completion BHs run */
1794 BlockDriverState *bs;
1797 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1798 bdrv_start_throttled_reqs(bs);
1801 busy = bdrv_requests_pending_all();
1802 busy |= aio_poll(qemu_get_aio_context(), busy);
1806 /* make a BlockDriverState anonymous by removing from bdrv_state and
1807 * graph_bdrv_state list.
1808 Also, NULL terminate the device_name to prevent double remove */
1809 void bdrv_make_anon(BlockDriverState *bs)
1811 if (bs->device_name[0] != '\0') {
1812 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1814 bs->device_name[0] = '\0';
1815 if (bs->node_name[0] != '\0') {
1816 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1818 bs->node_name[0] = '\0';
1821 static void bdrv_rebind(BlockDriverState *bs)
1823 if (bs->drv && bs->drv->bdrv_rebind) {
1824 bs->drv->bdrv_rebind(bs);
1828 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1829 BlockDriverState *bs_src)
1831 /* move some fields that need to stay attached to the device */
1832 bs_dest->open_flags = bs_src->open_flags;
1835 bs_dest->dev_ops = bs_src->dev_ops;
1836 bs_dest->dev_opaque = bs_src->dev_opaque;
1837 bs_dest->dev = bs_src->dev;
1838 bs_dest->guest_block_size = bs_src->guest_block_size;
1839 bs_dest->copy_on_read = bs_src->copy_on_read;
1841 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1843 /* i/o throttled req */
1844 memcpy(&bs_dest->throttle_state,
1845 &bs_src->throttle_state,
1846 sizeof(ThrottleState));
1847 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1848 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1849 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1852 bs_dest->on_read_error = bs_src->on_read_error;
1853 bs_dest->on_write_error = bs_src->on_write_error;
1856 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1857 bs_dest->iostatus = bs_src->iostatus;
1860 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
1862 /* reference count */
1863 bs_dest->refcnt = bs_src->refcnt;
1866 bs_dest->in_use = bs_src->in_use;
1867 bs_dest->job = bs_src->job;
1869 /* keep the same entry in bdrv_states */
1870 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1871 bs_src->device_name);
1872 bs_dest->device_list = bs_src->device_list;
1874 /* keep the same entry in graph_bdrv_states
1875 * We do want to swap name but don't want to swap linked list entries
1877 bs_dest->node_list = bs_src->node_list;
1881 * Swap bs contents for two image chains while they are live,
1882 * while keeping required fields on the BlockDriverState that is
1883 * actually attached to a device.
1885 * This will modify the BlockDriverState fields, and swap contents
1886 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1888 * bs_new is required to be anonymous.
1890 * This function does not create any image files.
1892 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1894 BlockDriverState tmp;
1896 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1897 assert(bs_new->device_name[0] == '\0');
1898 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1899 assert(bs_new->job == NULL);
1900 assert(bs_new->dev == NULL);
1901 assert(bs_new->in_use == 0);
1902 assert(bs_new->io_limits_enabled == false);
1903 assert(!throttle_have_timer(&bs_new->throttle_state));
1909 /* there are some fields that should not be swapped, move them back */
1910 bdrv_move_feature_fields(&tmp, bs_old);
1911 bdrv_move_feature_fields(bs_old, bs_new);
1912 bdrv_move_feature_fields(bs_new, &tmp);
1914 /* bs_new shouldn't be in bdrv_states even after the swap! */
1915 assert(bs_new->device_name[0] == '\0');
1917 /* Check a few fields that should remain attached to the device */
1918 assert(bs_new->dev == NULL);
1919 assert(bs_new->job == NULL);
1920 assert(bs_new->in_use == 0);
1921 assert(bs_new->io_limits_enabled == false);
1922 assert(!throttle_have_timer(&bs_new->throttle_state));
1924 bdrv_rebind(bs_new);
1925 bdrv_rebind(bs_old);
1929 * Add new bs contents at the top of an image chain while the chain is
1930 * live, while keeping required fields on the top layer.
1932 * This will modify the BlockDriverState fields, and swap contents
1933 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1935 * bs_new is required to be anonymous.
1937 * This function does not create any image files.
1939 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1941 bdrv_swap(bs_new, bs_top);
1943 /* The contents of 'tmp' will become bs_top, as we are
1944 * swapping bs_new and bs_top contents. */
1945 bs_top->backing_hd = bs_new;
1946 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1947 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1949 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1950 bs_new->drv ? bs_new->drv->format_name : "");
1953 static void bdrv_delete(BlockDriverState *bs)
1957 assert(!bs->in_use);
1958 assert(!bs->refcnt);
1959 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1963 /* remove from list, if necessary */
1969 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1970 /* TODO change to DeviceState *dev when all users are qdevified */
1976 bdrv_iostatus_reset(bs);
1980 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1981 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
1983 if (bdrv_attach_dev(bs, dev) < 0) {
1988 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1989 /* TODO change to DeviceState *dev when all users are qdevified */
1991 assert(bs->dev == dev);
1994 bs->dev_opaque = NULL;
1995 bs->guest_block_size = 512;
1998 /* TODO change to return DeviceState * when all users are qdevified */
1999 void *bdrv_get_attached_dev(BlockDriverState *bs)
2004 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2008 bs->dev_opaque = opaque;
2011 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
2012 enum MonitorEvent ev,
2013 BlockErrorAction action, bool is_read)
2016 const char *action_str;
2019 case BDRV_ACTION_REPORT:
2020 action_str = "report";
2022 case BDRV_ACTION_IGNORE:
2023 action_str = "ignore";
2025 case BDRV_ACTION_STOP:
2026 action_str = "stop";
2032 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2035 is_read ? "read" : "write");
2036 monitor_protocol_event(ev, data);
2038 qobject_decref(data);
2041 static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2045 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2046 bdrv_get_device_name(bs), ejected);
2047 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2049 qobject_decref(data);
2052 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
2054 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
2055 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
2056 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
2057 if (tray_was_closed) {
2059 bdrv_emit_qmp_eject_event(bs, true);
2063 bdrv_emit_qmp_eject_event(bs, false);
2068 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2070 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2073 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2075 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2076 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2080 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2082 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2083 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2088 static void bdrv_dev_resize_cb(BlockDriverState *bs)
2090 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2091 bs->dev_ops->resize_cb(bs->dev_opaque);
2095 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2097 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2098 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2104 * Run consistency checks on an image
2106 * Returns 0 if the check could be completed (it doesn't mean that the image is
2107 * free of errors) or -errno when an internal error occurred. The results of the
2108 * check are stored in res.
2110 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2112 if (bs->drv->bdrv_check == NULL) {
2116 memset(res, 0, sizeof(*res));
2117 return bs->drv->bdrv_check(bs, res, fix);
2120 #define COMMIT_BUF_SECTORS 2048
2122 /* commit COW file into the raw image */
2123 int bdrv_commit(BlockDriverState *bs)
2125 BlockDriver *drv = bs->drv;
2126 int64_t sector, total_sectors, length, backing_length;
2127 int n, ro, open_flags;
2129 uint8_t *buf = NULL;
2130 char filename[PATH_MAX];
2135 if (!bs->backing_hd) {
2139 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2143 ro = bs->backing_hd->read_only;
2144 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2145 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
2146 open_flags = bs->backing_hd->open_flags;
2149 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2154 length = bdrv_getlength(bs);
2160 backing_length = bdrv_getlength(bs->backing_hd);
2161 if (backing_length < 0) {
2162 ret = backing_length;
2166 /* If our top snapshot is larger than the backing file image,
2167 * grow the backing file image if possible. If not possible,
2168 * we must return an error */
2169 if (length > backing_length) {
2170 ret = bdrv_truncate(bs->backing_hd, length);
2176 total_sectors = length >> BDRV_SECTOR_BITS;
2177 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2179 for (sector = 0; sector < total_sectors; sector += n) {
2180 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2185 ret = bdrv_read(bs, sector, buf, n);
2190 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2197 if (drv->bdrv_make_empty) {
2198 ret = drv->bdrv_make_empty(bs);
2206 * Make sure all data we wrote to the backing device is actually
2209 if (bs->backing_hd) {
2210 bdrv_flush(bs->backing_hd);
2218 /* ignoring error return here */
2219 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2225 int bdrv_commit_all(void)
2227 BlockDriverState *bs;
2229 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2230 if (bs->drv && bs->backing_hd) {
2231 int ret = bdrv_commit(bs);
2241 * Remove an active request from the tracked requests list
2243 * This function should be called when a tracked request is completing.
2245 static void tracked_request_end(BdrvTrackedRequest *req)
2247 if (req->serialising) {
2248 req->bs->serialising_in_flight--;
2251 QLIST_REMOVE(req, list);
2252 qemu_co_queue_restart_all(&req->wait_queue);
2256 * Add an active request to the tracked requests list
2258 static void tracked_request_begin(BdrvTrackedRequest *req,
2259 BlockDriverState *bs,
2261 unsigned int bytes, bool is_write)
2263 *req = (BdrvTrackedRequest){
2267 .is_write = is_write,
2268 .co = qemu_coroutine_self(),
2269 .serialising = false,
2270 .overlap_offset = offset,
2271 .overlap_bytes = bytes,
2274 qemu_co_queue_init(&req->wait_queue);
2276 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2279 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2281 int64_t overlap_offset = req->offset & ~(align - 1);
2282 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2285 if (!req->serialising) {
2286 req->bs->serialising_in_flight++;
2287 req->serialising = true;
2290 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2291 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2295 * Round a region to cluster boundaries
2297 void bdrv_round_to_clusters(BlockDriverState *bs,
2298 int64_t sector_num, int nb_sectors,
2299 int64_t *cluster_sector_num,
2300 int *cluster_nb_sectors)
2302 BlockDriverInfo bdi;
2304 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2305 *cluster_sector_num = sector_num;
2306 *cluster_nb_sectors = nb_sectors;
2308 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2309 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2310 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2315 static int bdrv_get_cluster_size(BlockDriverState *bs)
2317 BlockDriverInfo bdi;
2320 ret = bdrv_get_info(bs, &bdi);
2321 if (ret < 0 || bdi.cluster_size == 0) {
2322 return bs->request_alignment;
2324 return bdi.cluster_size;
2328 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2329 int64_t offset, unsigned int bytes)
2332 if (offset >= req->overlap_offset + req->overlap_bytes) {
2336 if (req->overlap_offset >= offset + bytes) {
2342 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2344 BlockDriverState *bs = self->bs;
2345 BdrvTrackedRequest *req;
2347 bool waited = false;
2349 if (!bs->serialising_in_flight) {
2355 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2356 if (req == self || (!req->serialising && !self->serialising)) {
2359 if (tracked_request_overlaps(req, self->overlap_offset,
2360 self->overlap_bytes))
2362 /* Hitting this means there was a reentrant request, for
2363 * example, a block driver issuing nested requests. This must
2364 * never happen since it means deadlock.
2366 assert(qemu_coroutine_self() != req->co);
2368 /* If the request is already (indirectly) waiting for us, or
2369 * will wait for us as soon as it wakes up, then just go on
2370 * (instead of producing a deadlock in the former case). */
2371 if (!req->waiting_for) {
2372 self->waiting_for = req;
2373 qemu_co_queue_wait(&req->wait_queue);
2374 self->waiting_for = NULL;
2389 * -EINVAL - backing format specified, but no file
2390 * -ENOSPC - can't update the backing file because no space is left in the
2392 * -ENOTSUP - format driver doesn't support changing the backing file
2394 int bdrv_change_backing_file(BlockDriverState *bs,
2395 const char *backing_file, const char *backing_fmt)
2397 BlockDriver *drv = bs->drv;
2400 /* Backing file format doesn't make sense without a backing file */
2401 if (backing_fmt && !backing_file) {
2405 if (drv->bdrv_change_backing_file != NULL) {
2406 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2412 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2413 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2419 * Finds the image layer in the chain that has 'bs' as its backing file.
2421 * active is the current topmost image.
2423 * Returns NULL if bs is not found in active's image chain,
2424 * or if active == bs.
2426 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2427 BlockDriverState *bs)
2429 BlockDriverState *overlay = NULL;
2430 BlockDriverState *intermediate;
2432 assert(active != NULL);
2435 /* if bs is the same as active, then by definition it has no overlay
2441 intermediate = active;
2442 while (intermediate->backing_hd) {
2443 if (intermediate->backing_hd == bs) {
2444 overlay = intermediate;
2447 intermediate = intermediate->backing_hd;
2453 typedef struct BlkIntermediateStates {
2454 BlockDriverState *bs;
2455 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2456 } BlkIntermediateStates;
2460 * Drops images above 'base' up to and including 'top', and sets the image
2461 * above 'top' to have base as its backing file.
2463 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2464 * information in 'bs' can be properly updated.
2466 * E.g., this will convert the following chain:
2467 * bottom <- base <- intermediate <- top <- active
2471 * bottom <- base <- active
2473 * It is allowed for bottom==base, in which case it converts:
2475 * base <- intermediate <- top <- active
2482 * if active == top, that is considered an error
2485 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2486 BlockDriverState *base)
2488 BlockDriverState *intermediate;
2489 BlockDriverState *base_bs = NULL;
2490 BlockDriverState *new_top_bs = NULL;
2491 BlkIntermediateStates *intermediate_state, *next;
2494 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2495 QSIMPLEQ_INIT(&states_to_delete);
2497 if (!top->drv || !base->drv) {
2501 new_top_bs = bdrv_find_overlay(active, top);
2503 if (new_top_bs == NULL) {
2504 /* we could not find the image above 'top', this is an error */
2508 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2509 * to do, no intermediate images */
2510 if (new_top_bs->backing_hd == base) {
2517 /* now we will go down through the list, and add each BDS we find
2518 * into our deletion queue, until we hit the 'base'
2520 while (intermediate) {
2521 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2522 intermediate_state->bs = intermediate;
2523 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2525 if (intermediate->backing_hd == base) {
2526 base_bs = intermediate->backing_hd;
2529 intermediate = intermediate->backing_hd;
2531 if (base_bs == NULL) {
2532 /* something went wrong, we did not end at the base. safely
2533 * unravel everything, and exit with error */
2537 /* success - we can delete the intermediate states, and link top->base */
2538 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2539 base_bs->drv ? base_bs->drv->format_name : "");
2543 new_top_bs->backing_hd = base_bs;
2545 bdrv_refresh_limits(new_top_bs);
2547 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2548 /* so that bdrv_close() does not recursively close the chain */
2549 intermediate_state->bs->backing_hd = NULL;
2550 bdrv_unref(intermediate_state->bs);
2555 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2556 g_free(intermediate_state);
2562 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2567 if (!bdrv_is_inserted(bs))
2573 len = bdrv_getlength(bs);
2578 if ((offset > len) || (len - offset < size))
2584 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2587 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2588 nb_sectors * BDRV_SECTOR_SIZE);
2591 typedef struct RwCo {
2592 BlockDriverState *bs;
2597 BdrvRequestFlags flags;
2600 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2602 RwCo *rwco = opaque;
2604 if (!rwco->is_write) {
2605 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2606 rwco->qiov->size, rwco->qiov,
2609 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2610 rwco->qiov->size, rwco->qiov,
2616 * Process a vectored synchronous request using coroutines
2618 static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2619 QEMUIOVector *qiov, bool is_write,
2620 BdrvRequestFlags flags)
2627 .is_write = is_write,
2633 * In sync call context, when the vcpu is blocked, this throttling timer
2634 * will not fire; so the I/O throttling function has to be disabled here
2635 * if it has been enabled.
2637 if (bs->io_limits_enabled) {
2638 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2639 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2640 bdrv_io_limits_disable(bs);
2643 if (qemu_in_coroutine()) {
2644 /* Fast-path if already in coroutine context */
2645 bdrv_rw_co_entry(&rwco);
2647 co = qemu_coroutine_create(bdrv_rw_co_entry);
2648 qemu_coroutine_enter(co, &rwco);
2649 while (rwco.ret == NOT_DONE) {
2657 * Process a synchronous request using coroutines
2659 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
2660 int nb_sectors, bool is_write, BdrvRequestFlags flags)
2663 struct iovec iov = {
2664 .iov_base = (void *)buf,
2665 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2668 qemu_iovec_init_external(&qiov, &iov, 1);
2669 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2670 &qiov, is_write, flags);
2673 /* return < 0 if error. See bdrv_write() for the return codes */
2674 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2675 uint8_t *buf, int nb_sectors)
2677 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
2680 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2681 int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2682 uint8_t *buf, int nb_sectors)
2687 enabled = bs->io_limits_enabled;
2688 bs->io_limits_enabled = false;
2689 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
2690 bs->io_limits_enabled = enabled;
2694 /* Return < 0 if error. Important errors are:
2695 -EIO generic I/O error (may happen for all errors)
2696 -ENOMEDIUM No media inserted.
2697 -EINVAL Invalid sector number or nb_sectors
2698 -EACCES Trying to write a read-only device
2700 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2701 const uint8_t *buf, int nb_sectors)
2703 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
2706 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2707 int nb_sectors, BdrvRequestFlags flags)
2709 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2710 BDRV_REQ_ZERO_WRITE | flags);
2714 * Completely zero out a block device with the help of bdrv_write_zeroes.
2715 * The operation is sped up by checking the block status and only writing
2716 * zeroes to the device if they currently do not return zeroes. Optional
2717 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2719 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2721 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2723 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2724 int64_t ret, nb_sectors, sector_num = 0;
2728 nb_sectors = target_size - sector_num;
2729 if (nb_sectors <= 0) {
2732 if (nb_sectors > INT_MAX) {
2733 nb_sectors = INT_MAX;
2735 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
2737 error_report("error getting block status at sector %" PRId64 ": %s",
2738 sector_num, strerror(-ret));
2741 if (ret & BDRV_BLOCK_ZERO) {
2745 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2747 error_report("error writing zeroes at sector %" PRId64 ": %s",
2748 sector_num, strerror(-ret));
2755 int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
2758 struct iovec iov = {
2759 .iov_base = (void *)buf,
2768 qemu_iovec_init_external(&qiov, &iov, 1);
2769 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2777 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
2781 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2789 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2790 const void *buf, int bytes)
2793 struct iovec iov = {
2794 .iov_base = (void *) buf,
2802 qemu_iovec_init_external(&qiov, &iov, 1);
2803 return bdrv_pwritev(bs, offset, &qiov);
2807 * Writes to the file and ensures that no writes are reordered across this
2808 * request (acts as a barrier)
2810 * Returns 0 on success, -errno in error cases.
2812 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2813 const void *buf, int count)
2817 ret = bdrv_pwrite(bs, offset, buf, count);
2822 /* No flush needed for cache modes that already do it */
2823 if (bs->enable_write_cache) {
2830 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2831 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2833 /* Perform I/O through a temporary buffer so that users who scribble over
2834 * their read buffer while the operation is in progress do not end up
2835 * modifying the image file. This is critical for zero-copy guest I/O
2836 * where anything might happen inside guest memory.
2838 void *bounce_buffer;
2840 BlockDriver *drv = bs->drv;
2842 QEMUIOVector bounce_qiov;
2843 int64_t cluster_sector_num;
2844 int cluster_nb_sectors;
2848 /* Cover entire cluster so no additional backing file I/O is required when
2849 * allocating cluster in the image file.
2851 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2852 &cluster_sector_num, &cluster_nb_sectors);
2854 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2855 cluster_sector_num, cluster_nb_sectors);
2857 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2858 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2859 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2861 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2867 if (drv->bdrv_co_write_zeroes &&
2868 buffer_is_zero(bounce_buffer, iov.iov_len)) {
2869 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2870 cluster_nb_sectors, 0);
2872 /* This does not change the data on the disk, it is not necessary
2873 * to flush even in cache=writethrough mode.
2875 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
2880 /* It might be okay to ignore write errors for guest requests. If this
2881 * is a deliberate copy-on-read then we don't want to ignore the error.
2882 * Simply report it in all cases.
2887 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
2888 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2889 nb_sectors * BDRV_SECTOR_SIZE);
2892 qemu_vfree(bounce_buffer);
2897 * Forwards an already correctly aligned request to the BlockDriver. This
2898 * handles copy on read and zeroing after EOF; any other features must be
2899 * implemented by the caller.
2901 static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
2902 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
2903 int64_t align, QEMUIOVector *qiov, int flags)
2905 BlockDriver *drv = bs->drv;
2908 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2909 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
2911 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2912 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2914 /* Handle Copy on Read and associated serialisation */
2915 if (flags & BDRV_REQ_COPY_ON_READ) {
2916 /* If we touch the same cluster it counts as an overlap. This
2917 * guarantees that allocating writes will be serialized and not race
2918 * with each other for the same cluster. For example, in copy-on-read
2919 * it ensures that the CoR read and write operations are atomic and
2920 * guest writes cannot interleave between them. */
2921 mark_request_serialising(req, bdrv_get_cluster_size(bs));
2924 wait_serialising_requests(req);
2926 if (flags & BDRV_REQ_COPY_ON_READ) {
2929 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
2934 if (!ret || pnum != nb_sectors) {
2935 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
2940 /* Forward the request to the BlockDriver */
2941 if (!(bs->zero_beyond_eof && bs->growable)) {
2942 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2944 /* Read zeros after EOF of growable BDSes */
2945 int64_t len, total_sectors, max_nb_sectors;
2947 len = bdrv_getlength(bs);
2953 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
2954 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2955 align >> BDRV_SECTOR_BITS);
2956 if (max_nb_sectors > 0) {
2957 ret = drv->bdrv_co_readv(bs, sector_num,
2958 MIN(nb_sectors, max_nb_sectors), qiov);
2963 /* Reading beyond end of file is supposed to produce zeroes */
2964 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2965 uint64_t offset = MAX(0, total_sectors - sector_num);
2966 uint64_t bytes = (sector_num + nb_sectors - offset) *
2968 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2977 * Handle a read request in coroutine context
2979 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2980 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
2981 BdrvRequestFlags flags)
2983 BlockDriver *drv = bs->drv;
2984 BdrvTrackedRequest req;
2986 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
2987 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
2988 uint8_t *head_buf = NULL;
2989 uint8_t *tail_buf = NULL;
2990 QEMUIOVector local_qiov;
2991 bool use_local_qiov = false;
2997 if (bdrv_check_byte_request(bs, offset, bytes)) {
3001 if (bs->copy_on_read) {
3002 flags |= BDRV_REQ_COPY_ON_READ;
3005 /* throttling disk I/O */
3006 if (bs->io_limits_enabled) {
3007 bdrv_io_limits_intercept(bs, bytes, false);
3010 /* Align read if necessary by padding qiov */
3011 if (offset & (align - 1)) {
3012 head_buf = qemu_blockalign(bs, align);
3013 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3014 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3015 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3016 use_local_qiov = true;
3018 bytes += offset & (align - 1);
3019 offset = offset & ~(align - 1);
3022 if ((offset + bytes) & (align - 1)) {
3023 if (!use_local_qiov) {
3024 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3025 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3026 use_local_qiov = true;
3028 tail_buf = qemu_blockalign(bs, align);
3029 qemu_iovec_add(&local_qiov, tail_buf,
3030 align - ((offset + bytes) & (align - 1)));
3032 bytes = ROUND_UP(bytes, align);
3035 tracked_request_begin(&req, bs, offset, bytes, false);
3036 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
3037 use_local_qiov ? &local_qiov : qiov,
3039 tracked_request_end(&req);
3041 if (use_local_qiov) {
3042 qemu_iovec_destroy(&local_qiov);
3043 qemu_vfree(head_buf);
3044 qemu_vfree(tail_buf);
3050 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3051 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3052 BdrvRequestFlags flags)
3054 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3058 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3059 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3062 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3063 int nb_sectors, QEMUIOVector *qiov)
3065 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3067 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3070 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3071 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3073 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3075 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3076 BDRV_REQ_COPY_ON_READ);
3079 /* if no limit is specified in the BlockLimits use a default
3080 * of 32768 512-byte sectors (16 MiB) per request.
3082 #define MAX_WRITE_ZEROES_DEFAULT 32768
3084 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3085 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3087 BlockDriver *drv = bs->drv;
3089 struct iovec iov = {0};
3092 int max_write_zeroes = bs->bl.max_write_zeroes ?
3093 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
3095 while (nb_sectors > 0 && !ret) {
3096 int num = nb_sectors;
3098 /* Align request. Block drivers can expect the "bulk" of the request
3101 if (bs->bl.write_zeroes_alignment
3102 && num > bs->bl.write_zeroes_alignment) {
3103 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3104 /* Make a small request up to the first aligned sector. */
3105 num = bs->bl.write_zeroes_alignment;
3106 num -= sector_num % bs->bl.write_zeroes_alignment;
3107 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3108 /* Shorten the request to the last aligned sector. num cannot
3109 * underflow because num > bs->bl.write_zeroes_alignment.
3111 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3115 /* limit request size */
3116 if (num > max_write_zeroes) {
3117 num = max_write_zeroes;
3121 /* First try the efficient write zeroes operation */
3122 if (drv->bdrv_co_write_zeroes) {
3123 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3126 if (ret == -ENOTSUP) {
3127 /* Fall back to bounce buffer if write zeroes is unsupported */
3128 iov.iov_len = num * BDRV_SECTOR_SIZE;
3129 if (iov.iov_base == NULL) {
3130 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3131 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3133 qemu_iovec_init_external(&qiov, &iov, 1);
3135 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3137 /* Keep bounce buffer around if it is big enough for all
3138 * all future requests.
3140 if (num < max_write_zeroes) {
3141 qemu_vfree(iov.iov_base);
3142 iov.iov_base = NULL;
3150 qemu_vfree(iov.iov_base);
3155 * Forwards an already correctly aligned write request to the BlockDriver.
3157 static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
3158 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3159 QEMUIOVector *qiov, int flags)
3161 BlockDriver *drv = bs->drv;
3165 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3166 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3168 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3169 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
3171 waited = wait_serialising_requests(req);
3172 assert(!waited || !req->serialising);
3173 assert(req->overlap_offset <= offset);
3174 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3176 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3179 /* Do nothing, write notifier decided to fail this request */
3180 } else if (flags & BDRV_REQ_ZERO_WRITE) {
3181 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3182 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3184 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
3185 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3187 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
3189 if (ret == 0 && !bs->enable_write_cache) {
3190 ret = bdrv_co_flush(bs);
3193 bdrv_set_dirty(bs, sector_num, nb_sectors);
3195 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3196 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3198 if (bs->growable && ret >= 0) {
3199 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3206 * Handle a write request in coroutine context
3208 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3209 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3210 BdrvRequestFlags flags)
3212 BdrvTrackedRequest req;
3213 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3214 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3215 uint8_t *head_buf = NULL;
3216 uint8_t *tail_buf = NULL;
3217 QEMUIOVector local_qiov;
3218 bool use_local_qiov = false;
3224 if (bs->read_only) {
3227 if (bdrv_check_byte_request(bs, offset, bytes)) {
3231 /* throttling disk I/O */
3232 if (bs->io_limits_enabled) {
3233 bdrv_io_limits_intercept(bs, bytes, true);
3237 * Align write if necessary by performing a read-modify-write cycle.
3238 * Pad qiov with the read parts and be sure to have a tracked request not
3239 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3241 tracked_request_begin(&req, bs, offset, bytes, true);
3243 if (offset & (align - 1)) {
3244 QEMUIOVector head_qiov;
3245 struct iovec head_iov;
3247 mark_request_serialising(&req, align);
3248 wait_serialising_requests(&req);
3250 head_buf = qemu_blockalign(bs, align);
3251 head_iov = (struct iovec) {
3252 .iov_base = head_buf,
3255 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3257 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3258 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3259 align, &head_qiov, 0);
3263 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3265 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3266 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3267 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3268 use_local_qiov = true;
3270 bytes += offset & (align - 1);
3271 offset = offset & ~(align - 1);
3274 if ((offset + bytes) & (align - 1)) {
3275 QEMUIOVector tail_qiov;
3276 struct iovec tail_iov;
3280 mark_request_serialising(&req, align);
3281 waited = wait_serialising_requests(&req);
3282 assert(!waited || !use_local_qiov);
3284 tail_buf = qemu_blockalign(bs, align);
3285 tail_iov = (struct iovec) {
3286 .iov_base = tail_buf,
3289 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3291 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3292 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3293 align, &tail_qiov, 0);
3297 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3299 if (!use_local_qiov) {
3300 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3301 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3302 use_local_qiov = true;
3305 tail_bytes = (offset + bytes) & (align - 1);
3306 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3308 bytes = ROUND_UP(bytes, align);
3311 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3312 use_local_qiov ? &local_qiov : qiov,
3316 tracked_request_end(&req);
3318 if (use_local_qiov) {
3319 qemu_iovec_destroy(&local_qiov);
3321 qemu_vfree(head_buf);
3322 qemu_vfree(tail_buf);
3327 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3328 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3329 BdrvRequestFlags flags)
3331 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3335 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3336 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3339 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3340 int nb_sectors, QEMUIOVector *qiov)
3342 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3344 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3347 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3348 int64_t sector_num, int nb_sectors,
3349 BdrvRequestFlags flags)
3351 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3353 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3354 flags &= ~BDRV_REQ_MAY_UNMAP;
3357 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3358 BDRV_REQ_ZERO_WRITE | flags);
3362 * Truncate file to 'offset' bytes (needed only for file protocols)
3364 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3366 BlockDriver *drv = bs->drv;
3370 if (!drv->bdrv_truncate)
3374 if (bdrv_in_use(bs))
3376 ret = drv->bdrv_truncate(bs, offset);
3378 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3379 bdrv_dev_resize_cb(bs);
3385 * Length of a allocated file in bytes. Sparse files are counted by actual
3386 * allocated space. Return < 0 if error or unknown.
3388 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3390 BlockDriver *drv = bs->drv;
3394 if (drv->bdrv_get_allocated_file_size) {
3395 return drv->bdrv_get_allocated_file_size(bs);
3398 return bdrv_get_allocated_file_size(bs->file);
3404 * Length of a file in bytes. Return < 0 if error or unknown.
3406 int64_t bdrv_getlength(BlockDriverState *bs)
3408 BlockDriver *drv = bs->drv;
3412 if (drv->has_variable_length) {
3413 int ret = refresh_total_sectors(bs, bs->total_sectors);
3418 return bs->total_sectors * BDRV_SECTOR_SIZE;
3421 /* return 0 as number of sectors if no device present or error */
3422 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3425 length = bdrv_getlength(bs);
3429 length = length >> BDRV_SECTOR_BITS;
3430 *nb_sectors_ptr = length;
3433 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3434 BlockdevOnError on_write_error)
3436 bs->on_read_error = on_read_error;
3437 bs->on_write_error = on_write_error;
3440 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3442 return is_read ? bs->on_read_error : bs->on_write_error;
3445 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3447 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3450 case BLOCKDEV_ON_ERROR_ENOSPC:
3451 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3452 case BLOCKDEV_ON_ERROR_STOP:
3453 return BDRV_ACTION_STOP;
3454 case BLOCKDEV_ON_ERROR_REPORT:
3455 return BDRV_ACTION_REPORT;
3456 case BLOCKDEV_ON_ERROR_IGNORE:
3457 return BDRV_ACTION_IGNORE;
3463 /* This is done by device models because, while the block layer knows
3464 * about the error, it does not know whether an operation comes from
3465 * the device or the block layer (from a job, for example).
3467 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3468 bool is_read, int error)
3471 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
3472 if (action == BDRV_ACTION_STOP) {
3473 vm_stop(RUN_STATE_IO_ERROR);
3474 bdrv_iostatus_set_err(bs, error);
3478 int bdrv_is_read_only(BlockDriverState *bs)
3480 return bs->read_only;
3483 int bdrv_is_sg(BlockDriverState *bs)
3488 int bdrv_enable_write_cache(BlockDriverState *bs)
3490 return bs->enable_write_cache;
3493 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3495 bs->enable_write_cache = wce;
3497 /* so a reopen() will preserve wce */
3499 bs->open_flags |= BDRV_O_CACHE_WB;
3501 bs->open_flags &= ~BDRV_O_CACHE_WB;
3505 int bdrv_is_encrypted(BlockDriverState *bs)
3507 if (bs->backing_hd && bs->backing_hd->encrypted)
3509 return bs->encrypted;
3512 int bdrv_key_required(BlockDriverState *bs)
3514 BlockDriverState *backing_hd = bs->backing_hd;
3516 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3518 return (bs->encrypted && !bs->valid_key);
3521 int bdrv_set_key(BlockDriverState *bs, const char *key)
3524 if (bs->backing_hd && bs->backing_hd->encrypted) {
3525 ret = bdrv_set_key(bs->backing_hd, key);
3531 if (!bs->encrypted) {
3533 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3536 ret = bs->drv->bdrv_set_key(bs, key);
3539 } else if (!bs->valid_key) {
3541 /* call the change callback now, we skipped it on open */
3542 bdrv_dev_change_media_cb(bs, true);
3547 const char *bdrv_get_format_name(BlockDriverState *bs)
3549 return bs->drv ? bs->drv->format_name : NULL;
3552 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3557 QLIST_FOREACH(drv, &bdrv_drivers, list) {
3558 it(opaque, drv->format_name);
3562 /* This function is to find block backend bs */
3563 BlockDriverState *bdrv_find(const char *name)
3565 BlockDriverState *bs;
3567 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3568 if (!strcmp(name, bs->device_name)) {
3575 /* This function is to find a node in the bs graph */
3576 BlockDriverState *bdrv_find_node(const char *node_name)
3578 BlockDriverState *bs;
3582 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3583 if (!strcmp(node_name, bs->node_name)) {
3590 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3591 BlockDeviceInfoList *bdrv_named_nodes_list(void)
3593 BlockDeviceInfoList *list, *entry;
3594 BlockDriverState *bs;
3597 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3598 entry = g_malloc0(sizeof(*entry));
3599 entry->value = bdrv_block_device_info(bs);
3607 BlockDriverState *bdrv_lookup_bs(const char *device,
3608 const char *node_name,
3611 BlockDriverState *bs = NULL;
3614 bs = bdrv_find(device);
3622 bs = bdrv_find_node(node_name);
3629 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3630 device ? device : "",
3631 node_name ? node_name : "");
3635 BlockDriverState *bdrv_next(BlockDriverState *bs)
3638 return QTAILQ_FIRST(&bdrv_states);
3640 return QTAILQ_NEXT(bs, device_list);
3643 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
3645 BlockDriverState *bs;
3647 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3652 const char *bdrv_get_device_name(BlockDriverState *bs)
3654 return bs->device_name;
3657 int bdrv_get_flags(BlockDriverState *bs)
3659 return bs->open_flags;
3662 int bdrv_flush_all(void)
3664 BlockDriverState *bs;
3667 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3668 int ret = bdrv_flush(bs);
3669 if (ret < 0 && !result) {
3677 int bdrv_has_zero_init_1(BlockDriverState *bs)
3682 int bdrv_has_zero_init(BlockDriverState *bs)
3686 /* If BS is a copy on write image, it is initialized to
3687 the contents of the base image, which may not be zeroes. */
3688 if (bs->backing_hd) {
3691 if (bs->drv->bdrv_has_zero_init) {
3692 return bs->drv->bdrv_has_zero_init(bs);
3699 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3701 BlockDriverInfo bdi;
3703 if (bs->backing_hd) {
3707 if (bdrv_get_info(bs, &bdi) == 0) {
3708 return bdi.unallocated_blocks_are_zero;
3714 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3716 BlockDriverInfo bdi;
3718 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3722 if (bdrv_get_info(bs, &bdi) == 0) {
3723 return bdi.can_write_zeroes_with_unmap;
3729 typedef struct BdrvCoGetBlockStatusData {
3730 BlockDriverState *bs;
3731 BlockDriverState *base;
3737 } BdrvCoGetBlockStatusData;
3740 * Returns true iff the specified sector is present in the disk image. Drivers
3741 * not implementing the functionality are assumed to not support backing files,
3742 * hence all their sectors are reported as allocated.
3744 * If 'sector_num' is beyond the end of the disk image the return value is 0
3745 * and 'pnum' is set to 0.
3747 * 'pnum' is set to the number of sectors (including and immediately following
3748 * the specified sector) that are known to be in the same
3749 * allocated/unallocated state.
3751 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3752 * beyond the end of the disk image it will be clamped.
3754 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3756 int nb_sectors, int *pnum)
3762 length = bdrv_getlength(bs);
3767 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
3772 n = bs->total_sectors - sector_num;
3773 if (n < nb_sectors) {
3777 if (!bs->drv->bdrv_co_get_block_status) {
3779 ret = BDRV_BLOCK_DATA;
3780 if (bs->drv->protocol_name) {
3781 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3786 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3792 if (ret & BDRV_BLOCK_RAW) {
3793 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3794 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3798 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3799 if (bdrv_unallocated_blocks_are_zero(bs)) {
3800 ret |= BDRV_BLOCK_ZERO;
3801 } else if (bs->backing_hd) {
3802 BlockDriverState *bs2 = bs->backing_hd;
3803 int64_t length2 = bdrv_getlength(bs2);
3804 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3805 ret |= BDRV_BLOCK_ZERO;
3811 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3812 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3813 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3816 /* Ignore errors. This is just providing extra information, it
3817 * is useful but not necessary.
3819 ret |= (ret2 & BDRV_BLOCK_ZERO);
3826 /* Coroutine wrapper for bdrv_get_block_status() */
3827 static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
3829 BdrvCoGetBlockStatusData *data = opaque;
3830 BlockDriverState *bs = data->bs;
3832 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3838 * Synchronous wrapper around bdrv_co_get_block_status().
3840 * See bdrv_co_get_block_status() for details.
3842 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3843 int nb_sectors, int *pnum)
3846 BdrvCoGetBlockStatusData data = {
3848 .sector_num = sector_num,
3849 .nb_sectors = nb_sectors,
3854 if (qemu_in_coroutine()) {
3855 /* Fast-path if already in coroutine context */
3856 bdrv_get_block_status_co_entry(&data);
3858 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
3859 qemu_coroutine_enter(co, &data);
3860 while (!data.done) {
3867 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3868 int nb_sectors, int *pnum)
3870 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3875 (ret & BDRV_BLOCK_DATA) ||
3876 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
3880 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3882 * Return true if the given sector is allocated in any image between
3883 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3884 * sector is allocated in any image of the chain. Return false otherwise.
3886 * 'pnum' is set to the number of sectors (including and immediately following
3887 * the specified sector) that are known to be in the same
3888 * allocated/unallocated state.
3891 int bdrv_is_allocated_above(BlockDriverState *top,
3892 BlockDriverState *base,
3894 int nb_sectors, int *pnum)
3896 BlockDriverState *intermediate;
3897 int ret, n = nb_sectors;
3900 while (intermediate && intermediate != base) {
3902 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3912 * [sector_num, nb_sectors] is unallocated on top but intermediate
3915 * [sector_num+x, nr_sectors] allocated.
3917 if (n > pnum_inter &&
3918 (intermediate == top ||
3919 sector_num + pnum_inter < intermediate->total_sectors)) {
3923 intermediate = intermediate->backing_hd;
3930 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3932 if (bs->backing_hd && bs->backing_hd->encrypted)
3933 return bs->backing_file;
3934 else if (bs->encrypted)
3935 return bs->filename;
3940 void bdrv_get_backing_filename(BlockDriverState *bs,
3941 char *filename, int filename_size)
3943 pstrcpy(filename, filename_size, bs->backing_file);
3946 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
3947 const uint8_t *buf, int nb_sectors)
3949 BlockDriver *drv = bs->drv;
3952 if (!drv->bdrv_write_compressed)
3954 if (bdrv_check_request(bs, sector_num, nb_sectors))
3957 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
3959 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3962 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3964 BlockDriver *drv = bs->drv;
3967 if (!drv->bdrv_get_info)
3969 memset(bdi, 0, sizeof(*bdi));
3970 return drv->bdrv_get_info(bs, bdi);
3973 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3975 BlockDriver *drv = bs->drv;
3976 if (drv && drv->bdrv_get_specific_info) {
3977 return drv->bdrv_get_specific_info(bs);
3982 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3983 int64_t pos, int size)
3986 struct iovec iov = {
3987 .iov_base = (void *) buf,
3991 qemu_iovec_init_external(&qiov, &iov, 1);
3992 return bdrv_writev_vmstate(bs, &qiov, pos);
3995 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
3997 BlockDriver *drv = bs->drv;
4001 } else if (drv->bdrv_save_vmstate) {
4002 return drv->bdrv_save_vmstate(bs, qiov, pos);
4003 } else if (bs->file) {
4004 return bdrv_writev_vmstate(bs->file, qiov, pos);
4010 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4011 int64_t pos, int size)
4013 BlockDriver *drv = bs->drv;
4016 if (drv->bdrv_load_vmstate)
4017 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4019 return bdrv_load_vmstate(bs->file, buf, pos, size);
4023 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4025 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
4029 bs->drv->bdrv_debug_event(bs, event);
4032 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4035 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4039 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4040 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4046 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4048 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4052 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4053 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4059 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4061 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
4065 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4066 return bs->drv->bdrv_debug_resume(bs, tag);
4072 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4074 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4078 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4079 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4085 int bdrv_is_snapshot(BlockDriverState *bs)
4087 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4090 /* backing_file can either be relative, or absolute, or a protocol. If it is
4091 * relative, it must be relative to the chain. So, passing in bs->filename
4092 * from a BDS as backing_file should not be done, as that may be relative to
4093 * the CWD rather than the chain. */
4094 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4095 const char *backing_file)
4097 char *filename_full = NULL;
4098 char *backing_file_full = NULL;
4099 char *filename_tmp = NULL;
4100 int is_protocol = 0;
4101 BlockDriverState *curr_bs = NULL;
4102 BlockDriverState *retval = NULL;
4104 if (!bs || !bs->drv || !backing_file) {
4108 filename_full = g_malloc(PATH_MAX);
4109 backing_file_full = g_malloc(PATH_MAX);
4110 filename_tmp = g_malloc(PATH_MAX);
4112 is_protocol = path_has_protocol(backing_file);
4114 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4116 /* If either of the filename paths is actually a protocol, then
4117 * compare unmodified paths; otherwise make paths relative */
4118 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4119 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4120 retval = curr_bs->backing_hd;
4124 /* If not an absolute filename path, make it relative to the current
4125 * image's filename path */
4126 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4129 /* We are going to compare absolute pathnames */
4130 if (!realpath(filename_tmp, filename_full)) {
4134 /* We need to make sure the backing filename we are comparing against
4135 * is relative to the current image filename (or absolute) */
4136 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4137 curr_bs->backing_file);
4139 if (!realpath(filename_tmp, backing_file_full)) {
4143 if (strcmp(backing_file_full, filename_full) == 0) {
4144 retval = curr_bs->backing_hd;
4150 g_free(filename_full);
4151 g_free(backing_file_full);
4152 g_free(filename_tmp);
4156 int bdrv_get_backing_file_depth(BlockDriverState *bs)
4162 if (!bs->backing_hd) {
4166 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4169 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4171 BlockDriverState *curr_bs = NULL;
4179 while (curr_bs->backing_hd) {
4180 curr_bs = curr_bs->backing_hd;
4185 /**************************************************************/
4188 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4189 QEMUIOVector *qiov, int nb_sectors,
4190 BlockDriverCompletionFunc *cb, void *opaque)
4192 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4194 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4198 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4199 QEMUIOVector *qiov, int nb_sectors,
4200 BlockDriverCompletionFunc *cb, void *opaque)
4202 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4204 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4208 BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4209 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4210 BlockDriverCompletionFunc *cb, void *opaque)
4212 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4214 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4215 BDRV_REQ_ZERO_WRITE | flags,
4220 typedef struct MultiwriteCB {
4225 BlockDriverCompletionFunc *cb;
4227 QEMUIOVector *free_qiov;
4231 static void multiwrite_user_cb(MultiwriteCB *mcb)
4235 for (i = 0; i < mcb->num_callbacks; i++) {
4236 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
4237 if (mcb->callbacks[i].free_qiov) {
4238 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4240 g_free(mcb->callbacks[i].free_qiov);
4244 static void multiwrite_cb(void *opaque, int ret)
4246 MultiwriteCB *mcb = opaque;
4248 trace_multiwrite_cb(mcb, ret);
4250 if (ret < 0 && !mcb->error) {
4254 mcb->num_requests--;
4255 if (mcb->num_requests == 0) {
4256 multiwrite_user_cb(mcb);
4261 static int multiwrite_req_compare(const void *a, const void *b)
4263 const BlockRequest *req1 = a, *req2 = b;
4266 * Note that we can't simply subtract req2->sector from req1->sector
4267 * here as that could overflow the return value.
4269 if (req1->sector > req2->sector) {
4271 } else if (req1->sector < req2->sector) {
4279 * Takes a bunch of requests and tries to merge them. Returns the number of
4280 * requests that remain after merging.
4282 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4283 int num_reqs, MultiwriteCB *mcb)
4287 // Sort requests by start sector
4288 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4290 // Check if adjacent requests touch the same clusters. If so, combine them,
4291 // filling up gaps with zero sectors.
4293 for (i = 1; i < num_reqs; i++) {
4295 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4297 // Handle exactly sequential writes and overlapping writes.
4298 if (reqs[i].sector <= oldreq_last) {
4302 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4308 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
4309 qemu_iovec_init(qiov,
4310 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4312 // Add the first request to the merged one. If the requests are
4313 // overlapping, drop the last sectors of the first request.
4314 size = (reqs[i].sector - reqs[outidx].sector) << 9;
4315 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
4317 // We should need to add any zeros between the two requests
4318 assert (reqs[i].sector <= oldreq_last);
4320 // Add the second request
4321 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
4323 reqs[outidx].nb_sectors = qiov->size >> 9;
4324 reqs[outidx].qiov = qiov;
4326 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4329 reqs[outidx].sector = reqs[i].sector;
4330 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4331 reqs[outidx].qiov = reqs[i].qiov;
4339 * Submit multiple AIO write requests at once.
4341 * On success, the function returns 0 and all requests in the reqs array have
4342 * been submitted. In error case this function returns -1, and any of the
4343 * requests may or may not be submitted yet. In particular, this means that the
4344 * callback will be called for some of the requests, for others it won't. The
4345 * caller must check the error field of the BlockRequest to wait for the right
4346 * callbacks (if error != 0, no callback will be called).
4348 * The implementation may modify the contents of the reqs array, e.g. to merge
4349 * requests. However, the fields opaque and error are left unmodified as they
4350 * are used to signal failure for a single request to the caller.
4352 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4357 /* don't submit writes if we don't have a medium */
4358 if (bs->drv == NULL) {
4359 for (i = 0; i < num_reqs; i++) {
4360 reqs[i].error = -ENOMEDIUM;
4365 if (num_reqs == 0) {
4369 // Create MultiwriteCB structure
4370 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
4371 mcb->num_requests = 0;
4372 mcb->num_callbacks = num_reqs;
4374 for (i = 0; i < num_reqs; i++) {
4375 mcb->callbacks[i].cb = reqs[i].cb;
4376 mcb->callbacks[i].opaque = reqs[i].opaque;
4379 // Check for mergable requests
4380 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4382 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4384 /* Run the aio requests. */
4385 mcb->num_requests = num_reqs;
4386 for (i = 0; i < num_reqs; i++) {
4387 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4388 reqs[i].nb_sectors, reqs[i].flags,
4396 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
4398 acb->aiocb_info->cancel(acb);
4401 /**************************************************************/
4402 /* async block device emulation */
4404 typedef struct BlockDriverAIOCBSync {
4405 BlockDriverAIOCB common;
4408 /* vector translation state */
4412 } BlockDriverAIOCBSync;
4414 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4416 BlockDriverAIOCBSync *acb =
4417 container_of(blockacb, BlockDriverAIOCBSync, common);
4418 qemu_bh_delete(acb->bh);
4420 qemu_aio_release(acb);
4423 static const AIOCBInfo bdrv_em_aiocb_info = {
4424 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4425 .cancel = bdrv_aio_cancel_em,
4428 static void bdrv_aio_bh_cb(void *opaque)
4430 BlockDriverAIOCBSync *acb = opaque;
4433 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4434 qemu_vfree(acb->bounce);
4435 acb->common.cb(acb->common.opaque, acb->ret);
4436 qemu_bh_delete(acb->bh);
4438 qemu_aio_release(acb);
4441 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4445 BlockDriverCompletionFunc *cb,
4450 BlockDriverAIOCBSync *acb;
4452 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4453 acb->is_write = is_write;
4455 acb->bounce = qemu_blockalign(bs, qiov->size);
4456 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
4459 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
4460 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4462 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4465 qemu_bh_schedule(acb->bh);
4467 return &acb->common;
4470 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4471 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4472 BlockDriverCompletionFunc *cb, void *opaque)
4474 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4477 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4478 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4479 BlockDriverCompletionFunc *cb, void *opaque)
4481 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4485 typedef struct BlockDriverAIOCBCoroutine {
4486 BlockDriverAIOCB common;
4491 } BlockDriverAIOCBCoroutine;
4493 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4495 BlockDriverAIOCBCoroutine *acb =
4496 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4505 static const AIOCBInfo bdrv_em_co_aiocb_info = {
4506 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4507 .cancel = bdrv_aio_co_cancel_em,
4510 static void bdrv_co_em_bh(void *opaque)
4512 BlockDriverAIOCBCoroutine *acb = opaque;
4514 acb->common.cb(acb->common.opaque, acb->req.error);
4520 qemu_bh_delete(acb->bh);
4521 qemu_aio_release(acb);
4524 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4525 static void coroutine_fn bdrv_co_do_rw(void *opaque)
4527 BlockDriverAIOCBCoroutine *acb = opaque;
4528 BlockDriverState *bs = acb->common.bs;
4530 if (!acb->is_write) {
4531 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4532 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4534 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4535 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4538 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4539 qemu_bh_schedule(acb->bh);
4542 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4546 BdrvRequestFlags flags,
4547 BlockDriverCompletionFunc *cb,
4552 BlockDriverAIOCBCoroutine *acb;
4554 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4555 acb->req.sector = sector_num;
4556 acb->req.nb_sectors = nb_sectors;
4557 acb->req.qiov = qiov;
4558 acb->req.flags = flags;
4559 acb->is_write = is_write;
4562 co = qemu_coroutine_create(bdrv_co_do_rw);
4563 qemu_coroutine_enter(co, acb);
4565 return &acb->common;
4568 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4570 BlockDriverAIOCBCoroutine *acb = opaque;
4571 BlockDriverState *bs = acb->common.bs;
4573 acb->req.error = bdrv_co_flush(bs);
4574 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4575 qemu_bh_schedule(acb->bh);
4578 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4579 BlockDriverCompletionFunc *cb, void *opaque)
4581 trace_bdrv_aio_flush(bs, opaque);
4584 BlockDriverAIOCBCoroutine *acb;
4586 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4589 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4590 qemu_coroutine_enter(co, acb);
4592 return &acb->common;
4595 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4597 BlockDriverAIOCBCoroutine *acb = opaque;
4598 BlockDriverState *bs = acb->common.bs;
4600 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4601 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4602 qemu_bh_schedule(acb->bh);
4605 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4606 int64_t sector_num, int nb_sectors,
4607 BlockDriverCompletionFunc *cb, void *opaque)
4610 BlockDriverAIOCBCoroutine *acb;
4612 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4614 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4615 acb->req.sector = sector_num;
4616 acb->req.nb_sectors = nb_sectors;
4618 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4619 qemu_coroutine_enter(co, acb);
4621 return &acb->common;
4624 void bdrv_init(void)
4626 module_call_init(MODULE_INIT_BLOCK);
4629 void bdrv_init_with_whitelist(void)
4631 use_bdrv_whitelist = 1;
4635 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
4636 BlockDriverCompletionFunc *cb, void *opaque)
4638 BlockDriverAIOCB *acb;
4640 acb = g_slice_alloc(aiocb_info->aiocb_size);
4641 acb->aiocb_info = aiocb_info;
4644 acb->opaque = opaque;
4648 void qemu_aio_release(void *p)
4650 BlockDriverAIOCB *acb = p;
4651 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4654 /**************************************************************/
4655 /* Coroutine block device emulation */
4657 typedef struct CoroutineIOCompletion {
4658 Coroutine *coroutine;
4660 } CoroutineIOCompletion;
4662 static void bdrv_co_io_em_complete(void *opaque, int ret)
4664 CoroutineIOCompletion *co = opaque;
4667 qemu_coroutine_enter(co->coroutine, NULL);
4670 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4671 int nb_sectors, QEMUIOVector *iov,
4674 CoroutineIOCompletion co = {
4675 .coroutine = qemu_coroutine_self(),
4677 BlockDriverAIOCB *acb;
4680 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4681 bdrv_co_io_em_complete, &co);
4683 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4684 bdrv_co_io_em_complete, &co);
4687 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
4691 qemu_coroutine_yield();
4696 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4697 int64_t sector_num, int nb_sectors,
4700 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4703 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4704 int64_t sector_num, int nb_sectors,
4707 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4710 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
4712 RwCo *rwco = opaque;
4714 rwco->ret = bdrv_co_flush(rwco->bs);
4717 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4721 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
4725 /* Write back cached data to the OS even with cache=unsafe */
4726 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
4727 if (bs->drv->bdrv_co_flush_to_os) {
4728 ret = bs->drv->bdrv_co_flush_to_os(bs);
4734 /* But don't actually force it to the disk with cache=unsafe */
4735 if (bs->open_flags & BDRV_O_NO_FLUSH) {
4739 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
4740 if (bs->drv->bdrv_co_flush_to_disk) {
4741 ret = bs->drv->bdrv_co_flush_to_disk(bs);
4742 } else if (bs->drv->bdrv_aio_flush) {
4743 BlockDriverAIOCB *acb;
4744 CoroutineIOCompletion co = {
4745 .coroutine = qemu_coroutine_self(),
4748 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4752 qemu_coroutine_yield();
4757 * Some block drivers always operate in either writethrough or unsafe
4758 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4759 * know how the server works (because the behaviour is hardcoded or
4760 * depends on server-side configuration), so we can't ensure that
4761 * everything is safe on disk. Returning an error doesn't work because
4762 * that would break guests even if the server operates in writethrough
4765 * Let's hope the user knows what he's doing.
4773 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4774 * in the case of cache=unsafe, so there are no useless flushes.
4777 return bdrv_co_flush(bs->file);
4780 void bdrv_invalidate_cache(BlockDriverState *bs)
4782 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4783 bs->drv->bdrv_invalidate_cache(bs);
4787 void bdrv_invalidate_cache_all(void)
4789 BlockDriverState *bs;
4791 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4792 bdrv_invalidate_cache(bs);
4796 void bdrv_clear_incoming_migration_all(void)
4798 BlockDriverState *bs;
4800 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4801 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4805 int bdrv_flush(BlockDriverState *bs)
4813 if (qemu_in_coroutine()) {
4814 /* Fast-path if already in coroutine context */
4815 bdrv_flush_co_entry(&rwco);
4817 co = qemu_coroutine_create(bdrv_flush_co_entry);
4818 qemu_coroutine_enter(co, &rwco);
4819 while (rwco.ret == NOT_DONE) {
4827 typedef struct DiscardCo {
4828 BlockDriverState *bs;
4833 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4835 DiscardCo *rwco = opaque;
4837 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4840 /* if no limit is specified in the BlockLimits use a default
4841 * of 32768 512-byte sectors (16 MiB) per request.
4843 #define MAX_DISCARD_DEFAULT 32768
4845 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4852 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4854 } else if (bs->read_only) {
4858 bdrv_reset_dirty(bs, sector_num, nb_sectors);
4860 /* Do nothing if disabled. */
4861 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4865 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
4869 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4870 while (nb_sectors > 0) {
4872 int num = nb_sectors;
4875 if (bs->bl.discard_alignment &&
4876 num >= bs->bl.discard_alignment &&
4877 sector_num % bs->bl.discard_alignment) {
4878 if (num > bs->bl.discard_alignment) {
4879 num = bs->bl.discard_alignment;
4881 num -= sector_num % bs->bl.discard_alignment;
4884 /* limit request size */
4885 if (num > max_discard) {
4889 if (bs->drv->bdrv_co_discard) {
4890 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4892 BlockDriverAIOCB *acb;
4893 CoroutineIOCompletion co = {
4894 .coroutine = qemu_coroutine_self(),
4897 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4898 bdrv_co_io_em_complete, &co);
4902 qemu_coroutine_yield();
4906 if (ret && ret != -ENOTSUP) {
4916 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4921 .sector_num = sector_num,
4922 .nb_sectors = nb_sectors,
4926 if (qemu_in_coroutine()) {
4927 /* Fast-path if already in coroutine context */
4928 bdrv_discard_co_entry(&rwco);
4930 co = qemu_coroutine_create(bdrv_discard_co_entry);
4931 qemu_coroutine_enter(co, &rwco);
4932 while (rwco.ret == NOT_DONE) {
4940 /**************************************************************/
4941 /* removable device support */
4944 * Return TRUE if the media is present
4946 int bdrv_is_inserted(BlockDriverState *bs)
4948 BlockDriver *drv = bs->drv;
4952 if (!drv->bdrv_is_inserted)
4954 return drv->bdrv_is_inserted(bs);
4958 * Return whether the media changed since the last call to this
4959 * function, or -ENOTSUP if we don't know. Most drivers don't know.
4961 int bdrv_media_changed(BlockDriverState *bs)
4963 BlockDriver *drv = bs->drv;
4965 if (drv && drv->bdrv_media_changed) {
4966 return drv->bdrv_media_changed(bs);
4972 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4974 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
4976 BlockDriver *drv = bs->drv;
4978 if (drv && drv->bdrv_eject) {
4979 drv->bdrv_eject(bs, eject_flag);
4982 if (bs->device_name[0] != '\0') {
4983 bdrv_emit_qmp_eject_event(bs, eject_flag);
4988 * Lock or unlock the media (if it is locked, the user won't be able
4989 * to eject it manually).
4991 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
4993 BlockDriver *drv = bs->drv;
4995 trace_bdrv_lock_medium(bs, locked);
4997 if (drv && drv->bdrv_lock_medium) {
4998 drv->bdrv_lock_medium(bs, locked);
5002 /* needed for generic scsi interface */
5004 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5006 BlockDriver *drv = bs->drv;
5008 if (drv && drv->bdrv_ioctl)
5009 return drv->bdrv_ioctl(bs, req, buf);
5013 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5014 unsigned long int req, void *buf,
5015 BlockDriverCompletionFunc *cb, void *opaque)
5017 BlockDriver *drv = bs->drv;
5019 if (drv && drv->bdrv_aio_ioctl)
5020 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5024 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
5026 bs->guest_block_size = align;
5029 void *qemu_blockalign(BlockDriverState *bs, size_t size)
5031 return qemu_memalign(bdrv_opt_mem_align(bs), size);
5035 * Check if all memory in this vector is sector aligned.
5037 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5040 size_t alignment = bdrv_opt_mem_align(bs);
5042 for (i = 0; i < qiov->niov; i++) {
5043 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5046 if (qiov->iov[i].iov_len % alignment) {
5054 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
5056 int64_t bitmap_size;
5057 BdrvDirtyBitmap *bitmap;
5059 assert((granularity & (granularity - 1)) == 0);
5061 granularity >>= BDRV_SECTOR_BITS;
5062 assert(granularity);
5063 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5064 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5065 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5066 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5070 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5072 BdrvDirtyBitmap *bm, *next;
5073 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5075 QLIST_REMOVE(bitmap, list);
5076 hbitmap_free(bitmap->bitmap);
5083 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5085 BdrvDirtyBitmap *bm;
5086 BlockDirtyInfoList *list = NULL;
5087 BlockDirtyInfoList **plist = &list;
5089 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5090 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5091 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5092 info->count = bdrv_get_dirty_count(bs, bm);
5094 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5095 entry->value = info;
5097 plist = &entry->next;
5103 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
5106 return hbitmap_get(bitmap->bitmap, sector);
5112 void bdrv_dirty_iter_init(BlockDriverState *bs,
5113 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
5115 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
5118 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5121 BdrvDirtyBitmap *bitmap;
5122 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5123 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5127 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5129 BdrvDirtyBitmap *bitmap;
5130 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5131 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5135 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5137 return hbitmap_count(bitmap->bitmap);
5140 /* Get a reference to bs */
5141 void bdrv_ref(BlockDriverState *bs)
5146 /* Release a previously grabbed reference to bs.
5147 * If after releasing, reference count is zero, the BlockDriverState is
5149 void bdrv_unref(BlockDriverState *bs)
5151 assert(bs->refcnt > 0);
5152 if (--bs->refcnt == 0) {
5157 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5159 assert(bs->in_use != in_use);
5160 bs->in_use = in_use;
5163 int bdrv_in_use(BlockDriverState *bs)
5168 void bdrv_iostatus_enable(BlockDriverState *bs)
5170 bs->iostatus_enabled = true;
5171 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5174 /* The I/O status is only enabled if the drive explicitly
5175 * enables it _and_ the VM is configured to stop on errors */
5176 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5178 return (bs->iostatus_enabled &&
5179 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5180 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5181 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
5184 void bdrv_iostatus_disable(BlockDriverState *bs)
5186 bs->iostatus_enabled = false;
5189 void bdrv_iostatus_reset(BlockDriverState *bs)
5191 if (bdrv_iostatus_is_enabled(bs)) {
5192 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5194 block_job_iostatus_reset(bs->job);
5199 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5201 assert(bdrv_iostatus_is_enabled(bs));
5202 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
5203 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5204 BLOCK_DEVICE_IO_STATUS_FAILED;
5209 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5210 enum BlockAcctType type)
5212 assert(type < BDRV_MAX_IOTYPE);
5214 cookie->bytes = bytes;
5215 cookie->start_time_ns = get_clock();
5216 cookie->type = type;
5220 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5222 assert(cookie->type < BDRV_MAX_IOTYPE);
5224 bs->nr_bytes[cookie->type] += cookie->bytes;
5225 bs->nr_ops[cookie->type]++;
5226 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
5229 void bdrv_img_create(const char *filename, const char *fmt,
5230 const char *base_filename, const char *base_fmt,
5231 char *options, uint64_t img_size, int flags,
5232 Error **errp, bool quiet)
5234 QEMUOptionParameter *param = NULL, *create_options = NULL;
5235 QEMUOptionParameter *backing_fmt, *backing_file, *size;
5236 BlockDriver *drv, *proto_drv;
5237 BlockDriver *backing_drv = NULL;
5238 Error *local_err = NULL;
5241 /* Find driver and parse its options */
5242 drv = bdrv_find_format(fmt);
5244 error_setg(errp, "Unknown file format '%s'", fmt);
5248 proto_drv = bdrv_find_protocol(filename, true);
5250 error_setg(errp, "Unknown protocol '%s'", filename);
5254 create_options = append_option_parameters(create_options,
5255 drv->create_options);
5256 create_options = append_option_parameters(create_options,
5257 proto_drv->create_options);
5259 /* Create parameter list with default values */
5260 param = parse_option_parameters("", create_options, param);
5262 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5264 /* Parse -o options */
5266 param = parse_option_parameters(options, create_options, param);
5267 if (param == NULL) {
5268 error_setg(errp, "Invalid options for file format '%s'.", fmt);
5273 if (base_filename) {
5274 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5276 error_setg(errp, "Backing file not supported for file format '%s'",
5283 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
5284 error_setg(errp, "Backing file format not supported for file "
5285 "format '%s'", fmt);
5290 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5291 if (backing_file && backing_file->value.s) {
5292 if (!strcmp(filename, backing_file->value.s)) {
5293 error_setg(errp, "Error: Trying to create an image with the "
5294 "same filename as the backing file");
5299 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5300 if (backing_fmt && backing_fmt->value.s) {
5301 backing_drv = bdrv_find_format(backing_fmt->value.s);
5303 error_setg(errp, "Unknown backing file format '%s'",
5304 backing_fmt->value.s);
5309 // The size for the image must always be specified, with one exception:
5310 // If we are using a backing file, we can obtain the size from there
5311 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5312 if (size && size->value.n == -1) {
5313 if (backing_file && backing_file->value.s) {
5314 BlockDriverState *bs;
5319 /* backing files always opened read-only */
5321 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
5324 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
5325 backing_drv, &local_err);
5327 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5328 backing_file->value.s,
5329 error_get_pretty(local_err));
5330 error_free(local_err);
5334 bdrv_get_geometry(bs, &size);
5337 snprintf(buf, sizeof(buf), "%" PRId64, size);
5338 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
5342 error_setg(errp, "Image creation needs a size parameter");
5348 printf("Formatting '%s', fmt=%s ", filename, fmt);
5349 print_option_parameters(param);
5352 ret = bdrv_create(drv, filename, param, &local_err);
5353 if (ret == -EFBIG) {
5354 /* This is generally a better message than whatever the driver would
5355 * deliver (especially because of the cluster_size_hint), since that
5356 * is most probably not much different from "image too large". */
5357 const char *cluster_size_hint = "";
5358 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5359 cluster_size_hint = " (try using a larger cluster size)";
5361 error_setg(errp, "The image size is too large for file format '%s'"
5362 "%s", fmt, cluster_size_hint);
5363 error_free(local_err);
5368 free_option_parameters(create_options);
5369 free_option_parameters(param);
5372 error_propagate(errp, local_err);
5376 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5378 /* Currently BlockDriverState always uses the main loop AioContext */
5379 return qemu_get_aio_context();
5382 void bdrv_add_before_write_notifier(BlockDriverState *bs,
5383 NotifierWithReturn *notifier)
5385 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5388 int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5390 if (bs->drv->bdrv_amend_options == NULL) {
5393 return bs->drv->bdrv_amend_options(bs, options);
5396 /* Used to recurse on single child block filters.
5397 * Single child block filter will store their child in bs->file.
5399 bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
5400 BlockDriverState *candidate)
5406 if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
5407 if (bs == candidate) {
5414 if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
5422 return bdrv_recurse_is_first_non_filter(bs->file, candidate);
5425 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5426 BlockDriverState *candidate)
5428 if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
5429 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5432 return bdrv_generic_is_first_non_filter(bs, candidate);
5435 /* This function checks if the candidate is the first non filter bs down it's
5436 * bs chain. Since we don't have pointers to parents it explore all bs chains
5437 * from the top. Some filters can choose not to pass down the recursion.
5439 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5441 BlockDriverState *bs;
5443 /* walk down the bs forest recursively */
5444 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5447 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
5449 /* candidate is the first non filter */