2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "monitor/monitor.h"
28 #include "block/block_int.h"
29 #include "block/blockjob.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qjson.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
40 #include <sys/types.h>
42 #include <sys/ioctl.h>
43 #include <sys/queue.h>
53 struct BdrvDirtyBitmap {
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
58 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
61 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63 BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
73 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
75 BdrvRequestFlags flags);
76 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78 BdrvRequestFlags flags);
79 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 BdrvRequestFlags flags,
84 BlockDriverCompletionFunc *cb,
87 static void coroutine_fn bdrv_co_do_rw(void *opaque);
88 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
89 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
91 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
94 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
97 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist;
104 static int is_windows_drive_prefix(const char *filename)
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename)
113 if (is_windows_drive_prefix(filename) &&
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
123 /* throttling disk I/O limits */
124 void bdrv_set_io_limits(BlockDriverState *bs,
129 throttle_config(&bs->throttle_state, cfg);
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
136 /* this function drain all the throttled IOs */
137 static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
143 bs->io_limits_enabled = false;
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
151 bs->io_limits_enabled = enabled;
156 void bdrv_io_limits_disable(BlockDriverState *bs)
158 bs->io_limits_enabled = false;
160 bdrv_start_throttled_reqs(bs);
162 throttle_destroy(&bs->throttle_state);
165 static void bdrv_throttle_read_timer_cb(void *opaque)
167 BlockDriverState *bs = opaque;
168 qemu_co_enter_next(&bs->throttled_reqs[0]);
171 static void bdrv_throttle_write_timer_cb(void *opaque)
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
177 /* should be called before bdrv_set_io_limits if a limit is set */
178 void bdrv_io_limits_enable(BlockDriverState *bs)
180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
186 bs->io_limits_enabled = true;
189 /* This function makes an IO wait if needed
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
194 static void bdrv_io_limits_intercept(BlockDriverState *bs,
198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
201 /* if must wait or any request of this type throttled queue the IO */
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
207 /* the IO will be executed, do the accounting */
208 throttle_account(&bs->throttle_state, is_write, bytes);
211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
220 size_t bdrv_opt_mem_align(BlockDriverState *bs)
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
227 return bs->bl.opt_mem_alignment;
230 /* check if the path starts with "<protocol>:" */
231 static int path_has_protocol(const char *path)
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
240 p = path + strcspn(path, ":/\\");
242 p = path + strcspn(path, ":/");
248 int path_is_absolute(const char *path)
251 /* specific case for names like: "\\.\d:" */
252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
255 return (*path == '/' || *path == '\\');
257 return (*path == '/');
261 /* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
264 void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
276 p = strchr(base_path, ':');
281 p1 = strrchr(base_path, '/');
285 p2 = strrchr(base_path, '\\');
297 if (len > dest_size - 1)
299 memcpy(dest, base_path, len);
301 pstrcat(dest, dest_size, filename);
305 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
310 path_combine(dest, sz, bs->filename, bs->backing_file);
314 void bdrv_register(BlockDriver *bdrv)
316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
334 /* create a new block device (by default it is empty) */
335 BlockDriverState *bdrv_new(const char *device_name)
337 BlockDriverState *bs;
339 bs = g_malloc0(sizeof(BlockDriverState));
340 QLIST_INIT(&bs->dirty_bitmaps);
341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
342 if (device_name[0] != '\0') {
343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
345 bdrv_iostatus_disable(bs);
346 notifier_list_init(&bs->close_notifiers);
347 notifier_with_return_list_init(&bs->before_write_notifiers);
348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
355 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
357 notifier_list_add(&bs->close_notifiers, notify);
360 BlockDriver *bdrv_find_format(const char *format_name)
363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
371 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
382 return 1; /* no whitelist, anything goes */
385 for (p = whitelist_rw; *p; p++) {
386 if (!strcmp(drv->format_name, *p)) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
400 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
403 BlockDriver *drv = bdrv_find_format(format_name);
404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
407 typedef struct CreateCo {
410 QEMUOptionParameter *options;
415 static void coroutine_fn bdrv_create_co_entry(void *opaque)
417 Error *local_err = NULL;
420 CreateCo *cco = opaque;
423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
425 error_propagate(&cco->err, local_err);
430 int bdrv_create(BlockDriver *drv, const char* filename,
431 QEMUOptionParameter *options, Error **errp)
438 .filename = g_strdup(filename),
444 if (!drv->bdrv_create) {
445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
464 error_propagate(errp, cco.err);
466 error_setg_errno(errp, -ret, "Could not create image");
471 g_free(cco.filename);
475 int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
479 Error *local_err = NULL;
482 drv = bdrv_find_protocol(filename, true);
484 error_setg(errp, "Could not find protocol for file '%s'", filename);
488 ret = bdrv_create(drv, filename, options, &local_err);
490 error_propagate(errp, local_err);
495 int bdrv_refresh_limits(BlockDriverState *bs)
497 BlockDriver *drv = bs->drv;
499 memset(&bs->bl, 0, sizeof(bs->bl));
505 /* Take some limits from the children as a default */
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
511 bs->bl.opt_mem_alignment = 512;
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
526 return drv->bdrv_refresh_limits(bs);
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
536 int get_tmp_filename(char *filename, int size)
539 char temp_dir[MAX_PATH];
540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
549 tmpdir = getenv("TMPDIR");
552 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
555 fd = mkstemp(filename);
559 if (close(fd) != 0) {
568 * Detect host devices. By convention, /dev/cdrom[N] is always
569 * recognized as a host CDROM.
571 static BlockDriver *find_hdev_driver(const char *filename)
573 int score_max = 0, score;
574 BlockDriver *drv = NULL, *d;
576 QLIST_FOREACH(d, &bdrv_drivers, list) {
577 if (d->bdrv_probe_device) {
578 score = d->bdrv_probe_device(filename);
579 if (score > score_max) {
589 BlockDriver *bdrv_find_protocol(const char *filename,
590 bool allow_protocol_prefix)
597 /* TODO Drivers without bdrv_file_open must be specified explicitly */
600 * XXX(hch): we really should not let host device detection
601 * override an explicit protocol specification, but moving this
602 * later breaks access to device names with colons in them.
603 * Thanks to the brain-dead persistent naming schemes on udev-
604 * based Linux systems those actually are quite common.
606 drv1 = find_hdev_driver(filename);
611 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
612 return bdrv_find_format("file");
615 p = strchr(filename, ':');
618 if (len > sizeof(protocol) - 1)
619 len = sizeof(protocol) - 1;
620 memcpy(protocol, filename, len);
621 protocol[len] = '\0';
622 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
623 if (drv1->protocol_name &&
624 !strcmp(drv1->protocol_name, protocol)) {
631 static int find_image_format(BlockDriverState *bs, const char *filename,
632 BlockDriver **pdrv, Error **errp)
634 int score, score_max;
635 BlockDriver *drv1, *drv;
639 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
640 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
641 drv = bdrv_find_format("raw");
643 error_setg(errp, "Could not find raw image format");
650 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
652 error_setg_errno(errp, -ret, "Could not read image for determining its "
660 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
661 if (drv1->bdrv_probe) {
662 score = drv1->bdrv_probe(buf, ret, filename);
663 if (score > score_max) {
670 error_setg(errp, "Could not determine image format: No compatible "
679 * Set the current 'total_sectors' value
681 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
683 BlockDriver *drv = bs->drv;
685 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
689 /* query actual device if possible, otherwise just trust the hint */
690 if (drv->bdrv_getlength) {
691 int64_t length = drv->bdrv_getlength(bs);
695 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
698 bs->total_sectors = hint;
703 * Set open flags for a given discard mode
705 * Return 0 on success, -1 if the discard mode was invalid.
707 int bdrv_parse_discard_flags(const char *mode, int *flags)
709 *flags &= ~BDRV_O_UNMAP;
711 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
713 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
714 *flags |= BDRV_O_UNMAP;
723 * Set open flags for a given cache mode
725 * Return 0 on success, -1 if the cache mode was invalid.
727 int bdrv_parse_cache_flags(const char *mode, int *flags)
729 *flags &= ~BDRV_O_CACHE_MASK;
731 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
732 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
733 } else if (!strcmp(mode, "directsync")) {
734 *flags |= BDRV_O_NOCACHE;
735 } else if (!strcmp(mode, "writeback")) {
736 *flags |= BDRV_O_CACHE_WB;
737 } else if (!strcmp(mode, "unsafe")) {
738 *flags |= BDRV_O_CACHE_WB;
739 *flags |= BDRV_O_NO_FLUSH;
740 } else if (!strcmp(mode, "writethrough")) {
741 /* this is the default */
750 * The copy-on-read flag is actually a reference count so multiple users may
751 * use the feature without worrying about clobbering its previous state.
752 * Copy-on-read stays enabled until all users have called to disable it.
754 void bdrv_enable_copy_on_read(BlockDriverState *bs)
759 void bdrv_disable_copy_on_read(BlockDriverState *bs)
761 assert(bs->copy_on_read > 0);
765 static int bdrv_open_flags(BlockDriverState *bs, int flags)
767 int open_flags = flags | BDRV_O_CACHE_WB;
770 * Clear flags that are internal to the block layer before opening the
773 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
776 * Snapshots should be writable.
778 if (bs->is_temporary) {
779 open_flags |= BDRV_O_RDWR;
785 static int bdrv_assign_node_name(BlockDriverState *bs,
786 const char *node_name,
793 /* empty string node name is invalid */
794 if (node_name[0] == '\0') {
795 error_setg(errp, "Empty node name");
799 /* takes care of avoiding namespaces collisions */
800 if (bdrv_find(node_name)) {
801 error_setg(errp, "node-name=%s is conflicting with a device id",
806 /* takes care of avoiding duplicates node names */
807 if (bdrv_find_node(node_name)) {
808 error_setg(errp, "Duplicate node name");
812 /* copy node name into the bs and insert it into the graph list */
813 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
814 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
820 * Common part for opening disk images and files
822 * Removes all processed options from *options.
824 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
825 QDict *options, int flags, BlockDriver *drv, Error **errp)
828 const char *filename;
829 const char *node_name = NULL;
830 Error *local_err = NULL;
833 assert(bs->file == NULL);
834 assert(options != NULL && bs->options != options);
837 filename = file->filename;
839 filename = qdict_get_try_str(options, "filename");
842 if (drv->bdrv_needs_filename && !filename) {
843 error_setg(errp, "The '%s' block driver requires a file name",
848 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
850 node_name = qdict_get_try_str(options, "node-name");
851 ret = bdrv_assign_node_name(bs, node_name, errp);
855 qdict_del(options, "node-name");
857 /* bdrv_open() with directly using a protocol as drv. This layer is already
858 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
859 * and return immediately. */
860 if (file != NULL && drv->bdrv_file_open) {
865 bs->open_flags = flags;
866 bs->guest_block_size = 512;
867 bs->request_alignment = 512;
868 bs->zero_beyond_eof = true;
869 open_flags = bdrv_open_flags(bs, flags);
870 bs->read_only = !(open_flags & BDRV_O_RDWR);
872 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
874 !bs->read_only && bdrv_is_whitelisted(drv, true)
875 ? "Driver '%s' can only be used for read-only devices"
876 : "Driver '%s' is not whitelisted",
881 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
882 if (flags & BDRV_O_COPY_ON_READ) {
883 if (!bs->read_only) {
884 bdrv_enable_copy_on_read(bs);
886 error_setg(errp, "Can't use copy-on-read on read-only device");
891 if (filename != NULL) {
892 pstrcpy(bs->filename, sizeof(bs->filename), filename);
894 bs->filename[0] = '\0';
898 bs->opaque = g_malloc0(drv->instance_size);
900 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
902 /* Open the image, either directly or using a protocol */
903 if (drv->bdrv_file_open) {
904 assert(file == NULL);
905 assert(!drv->bdrv_needs_filename || filename != NULL);
906 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
909 error_setg(errp, "Can't use '%s' as a block driver for the "
910 "protocol level", drv->format_name);
915 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
920 error_propagate(errp, local_err);
921 } else if (bs->filename[0]) {
922 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
924 error_setg_errno(errp, -ret, "Could not open image");
929 ret = refresh_total_sectors(bs, bs->total_sectors);
931 error_setg_errno(errp, -ret, "Could not refresh total sector count");
935 bdrv_refresh_limits(bs);
936 assert(bdrv_opt_mem_align(bs) != 0);
937 assert(bs->request_alignment != 0);
940 if (bs->is_temporary) {
941 assert(bs->filename[0] != '\0');
942 unlink(bs->filename);
956 * Opens a file using a protocol (file, host_device, nbd, ...)
958 * options is a QDict of options to pass to the block drivers, or NULL for an
959 * empty set of options. The reference to the QDict belongs to the block layer
960 * after the call (even on failure), so if the caller intends to reuse the
961 * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
963 int bdrv_file_open(BlockDriverState **pbs, const char *filename,
964 const char *reference, QDict *options, int flags,
967 BlockDriverState *bs = NULL;
970 bool allow_protocol_prefix = false;
971 Error *local_err = NULL;
974 /* NULL means an empty set of options */
975 if (options == NULL) {
976 options = qdict_new();
980 if (filename || qdict_size(options)) {
981 error_setg(errp, "Cannot reference an existing block device with "
982 "additional options or a new filename");
987 bs = bdrv_lookup_bs(reference, reference, errp);
997 bs->options = options;
998 options = qdict_clone_shallow(options);
1000 /* Fetch the file name from the options QDict if necessary */
1002 filename = qdict_get_try_str(options, "filename");
1003 } else if (filename && !qdict_haskey(options, "filename")) {
1004 qdict_put(options, "filename", qstring_from_str(filename));
1005 allow_protocol_prefix = true;
1007 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
1013 /* Find the right block driver */
1014 drvname = qdict_get_try_str(options, "driver");
1016 drv = bdrv_find_format(drvname);
1018 error_setg(errp, "Unknown driver '%s'", drvname);
1020 qdict_del(options, "driver");
1021 } else if (filename) {
1022 drv = bdrv_find_protocol(filename, allow_protocol_prefix);
1024 error_setg(errp, "Unknown protocol");
1027 error_setg(errp, "Must specify either driver or file");
1032 /* errp has been set already */
1037 /* Parse the filename and open it */
1038 if (drv->bdrv_parse_filename && filename) {
1039 drv->bdrv_parse_filename(filename, options, &local_err);
1041 error_propagate(errp, local_err);
1045 qdict_del(options, "filename");
1048 if (!drv->bdrv_file_open) {
1049 ret = bdrv_open(&bs, filename, NULL, options, flags, drv, &local_err);
1052 ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
1055 error_propagate(errp, local_err);
1059 /* Check if any unknown options were used */
1060 if (options && (qdict_size(options) != 0)) {
1061 const QDictEntry *entry = qdict_first(options);
1062 error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
1063 drv->format_name, entry->key);
1076 QDECREF(bs->options);
1083 * Opens the backing file for a BlockDriverState if not yet open
1085 * options is a QDict of options to pass to the block drivers, or NULL for an
1086 * empty set of options. The reference to the QDict is transferred to this
1087 * function (even on failure), so if the caller intends to reuse the dictionary,
1088 * it needs to use QINCREF() before calling bdrv_file_open.
1090 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1092 char backing_filename[PATH_MAX];
1093 int back_flags, ret;
1094 BlockDriver *back_drv = NULL;
1095 Error *local_err = NULL;
1097 if (bs->backing_hd != NULL) {
1102 /* NULL means an empty set of options */
1103 if (options == NULL) {
1104 options = qdict_new();
1107 bs->open_flags &= ~BDRV_O_NO_BACKING;
1108 if (qdict_haskey(options, "file.filename")) {
1109 backing_filename[0] = '\0';
1110 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1114 bdrv_get_full_backing_filename(bs, backing_filename,
1115 sizeof(backing_filename));
1118 if (bs->backing_format[0] != '\0') {
1119 back_drv = bdrv_find_format(bs->backing_format);
1122 /* backing files always opened read-only */
1123 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1124 BDRV_O_COPY_ON_READ);
1126 assert(bs->backing_hd == NULL);
1127 ret = bdrv_open(&bs->backing_hd,
1128 *backing_filename ? backing_filename : NULL, NULL, options,
1129 back_flags, back_drv, &local_err);
1131 bs->backing_hd = NULL;
1132 bs->open_flags |= BDRV_O_NO_BACKING;
1133 error_setg(errp, "Could not open backing file: %s",
1134 error_get_pretty(local_err));
1135 error_free(local_err);
1139 if (bs->backing_hd->file) {
1140 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1141 bs->backing_hd->file->filename);
1144 /* Recalculate the BlockLimits with the backing file */
1145 bdrv_refresh_limits(bs);
1151 * Opens a disk image whose options are given as BlockdevRef in another block
1154 * If force_raw is true, bdrv_file_open() will be used, thereby preventing any
1155 * image format auto-detection. If it is false and a filename is given,
1156 * bdrv_open() will be used for auto-detection.
1158 * If allow_none is true, no image will be opened if filename is false and no
1159 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1161 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1162 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1163 * itself, all options starting with "${bdref_key}." are considered part of the
1166 * The BlockdevRef will be removed from the options QDict.
1168 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1170 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1171 QDict *options, const char *bdref_key, int flags,
1172 bool force_raw, bool allow_none, Error **errp)
1174 QDict *image_options;
1176 char *bdref_key_dot;
1177 const char *reference;
1180 assert(*pbs == NULL);
1182 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1183 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1184 g_free(bdref_key_dot);
1186 reference = qdict_get_try_str(options, bdref_key);
1187 if (!filename && !reference && !qdict_size(image_options)) {
1191 error_setg(errp, "A block device must be specified for \"%s\"",
1198 if (filename && !force_raw) {
1199 /* If a filename is given and the block driver should be detected
1200 automatically (instead of using none), use bdrv_open() in order to do
1201 that auto-detection. */
1203 error_setg(errp, "Cannot reference an existing block device while "
1204 "giving a filename");
1209 ret = bdrv_open(pbs, filename, NULL, image_options, flags, NULL, errp);
1211 ret = bdrv_file_open(pbs, filename, reference, image_options, flags,
1216 qdict_del(options, bdref_key);
1221 * Opens a disk image (raw, qcow2, vmdk, ...)
1223 * options is a QDict of options to pass to the block drivers, or NULL for an
1224 * empty set of options. The reference to the QDict belongs to the block layer
1225 * after the call (even on failure), so if the caller intends to reuse the
1226 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1228 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1229 * If it is not NULL, the referenced BDS will be reused.
1231 * The reference parameter may be used to specify an existing block device which
1232 * should be opened. If specified, neither options nor a filename may be given,
1233 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1235 int bdrv_open(BlockDriverState **pbs, const char *filename,
1236 const char *reference, QDict *options, int flags,
1237 BlockDriver *drv, Error **errp)
1240 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1241 char tmp_filename[PATH_MAX + 1];
1242 BlockDriverState *file = NULL, *bs;
1243 const char *drvname;
1244 Error *local_err = NULL;
1249 bool options_non_empty = options ? qdict_size(options) : false;
1253 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1254 "another block device");
1258 if (filename || options_non_empty) {
1259 error_setg(errp, "Cannot reference an existing block device with "
1260 "additional options or a new filename");
1264 bs = bdrv_lookup_bs(reference, reference, errp);
1279 /* NULL means an empty set of options */
1280 if (options == NULL) {
1281 options = qdict_new();
1284 bs->options = options;
1285 options = qdict_clone_shallow(options);
1287 /* For snapshot=on, create a temporary qcow2 overlay */
1288 if (flags & BDRV_O_SNAPSHOT) {
1289 BlockDriverState *bs1;
1291 BlockDriver *bdrv_qcow2;
1292 QEMUOptionParameter *create_options;
1293 QDict *snapshot_options;
1295 /* if snapshot, we create a temporary backing file and open it
1296 instead of opening 'filename' directly */
1298 /* Get the required size from the image */
1301 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
1306 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
1310 /* Create the temporary image */
1311 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1313 error_setg_errno(errp, -ret, "Could not get temporary filename");
1317 bdrv_qcow2 = bdrv_find_format("qcow2");
1318 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1321 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
1323 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
1324 free_option_parameters(create_options);
1326 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1327 "'%s': %s", tmp_filename,
1328 error_get_pretty(local_err));
1329 error_free(local_err);
1334 /* Prepare a new options QDict for the temporary file, where user
1335 * options refer to the backing file */
1337 qdict_put(options, "file.filename", qstring_from_str(filename));
1340 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1343 snapshot_options = qdict_new();
1344 qdict_put(snapshot_options, "backing", options);
1345 qdict_flatten(snapshot_options);
1347 bs->options = snapshot_options;
1348 options = qdict_clone_shallow(bs->options);
1350 filename = tmp_filename;
1352 bs->is_temporary = 1;
1355 /* Open image file without format layer */
1356 if (flags & BDRV_O_RDWR) {
1357 flags |= BDRV_O_ALLOW_RDWR;
1360 assert(file == NULL);
1361 ret = bdrv_open_image(&file, filename, options, "file",
1362 bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true,
1368 /* Find the right image format driver */
1369 drvname = qdict_get_try_str(options, "driver");
1371 drv = bdrv_find_format(drvname);
1372 qdict_del(options, "driver");
1374 error_setg(errp, "Invalid driver: '%s'", drvname);
1376 goto unlink_and_fail;
1382 ret = find_image_format(file, filename, &drv, &local_err);
1384 error_setg(errp, "Must specify either driver or file");
1386 goto unlink_and_fail;
1391 goto unlink_and_fail;
1394 /* Open the image */
1395 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1397 goto unlink_and_fail;
1400 if (file && (bs->file != file)) {
1405 /* If there is a backing file, use it */
1406 if ((flags & BDRV_O_NO_BACKING) == 0) {
1407 QDict *backing_options;
1409 qdict_extract_subqdict(options, &backing_options, "backing.");
1410 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1412 goto close_and_fail;
1416 /* Check if any unknown options were used */
1417 if (qdict_size(options) != 0) {
1418 const QDictEntry *entry = qdict_first(options);
1419 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1420 "support the option '%s'", drv->format_name, bs->device_name,
1424 goto close_and_fail;
1428 if (!bdrv_key_required(bs)) {
1429 bdrv_dev_change_media_cb(bs, true);
1439 if (bs->is_temporary) {
1443 QDECREF(bs->options);
1447 /* If *pbs is NULL, a new BDS has been created in this function and
1448 needs to be freed now. Otherwise, it does not need to be closed,
1449 since it has not really been opened yet. */
1453 error_propagate(errp, local_err);
1458 /* See fail path, but now the BDS has to be always closed */
1466 error_propagate(errp, local_err);
1471 typedef struct BlockReopenQueueEntry {
1473 BDRVReopenState state;
1474 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1475 } BlockReopenQueueEntry;
1478 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1479 * reopen of multiple devices.
1481 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1482 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1483 * be created and initialized. This newly created BlockReopenQueue should be
1484 * passed back in for subsequent calls that are intended to be of the same
1487 * bs is the BlockDriverState to add to the reopen queue.
1489 * flags contains the open flags for the associated bs
1491 * returns a pointer to bs_queue, which is either the newly allocated
1492 * bs_queue, or the existing bs_queue being used.
1495 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1496 BlockDriverState *bs, int flags)
1500 BlockReopenQueueEntry *bs_entry;
1501 if (bs_queue == NULL) {
1502 bs_queue = g_new0(BlockReopenQueue, 1);
1503 QSIMPLEQ_INIT(bs_queue);
1507 bdrv_reopen_queue(bs_queue, bs->file, flags);
1510 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1511 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1513 bs_entry->state.bs = bs;
1514 bs_entry->state.flags = flags;
1520 * Reopen multiple BlockDriverStates atomically & transactionally.
1522 * The queue passed in (bs_queue) must have been built up previous
1523 * via bdrv_reopen_queue().
1525 * Reopens all BDS specified in the queue, with the appropriate
1526 * flags. All devices are prepared for reopen, and failure of any
1527 * device will cause all device changes to be abandonded, and intermediate
1530 * If all devices prepare successfully, then the changes are committed
1534 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1537 BlockReopenQueueEntry *bs_entry, *next;
1538 Error *local_err = NULL;
1540 assert(bs_queue != NULL);
1544 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1545 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1546 error_propagate(errp, local_err);
1549 bs_entry->prepared = true;
1552 /* If we reach this point, we have success and just need to apply the
1555 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1556 bdrv_reopen_commit(&bs_entry->state);
1562 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1563 if (ret && bs_entry->prepared) {
1564 bdrv_reopen_abort(&bs_entry->state);
1573 /* Reopen a single BlockDriverState with the specified flags. */
1574 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1577 Error *local_err = NULL;
1578 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1580 ret = bdrv_reopen_multiple(queue, &local_err);
1581 if (local_err != NULL) {
1582 error_propagate(errp, local_err);
1589 * Prepares a BlockDriverState for reopen. All changes are staged in the
1590 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1591 * the block driver layer .bdrv_reopen_prepare()
1593 * bs is the BlockDriverState to reopen
1594 * flags are the new open flags
1595 * queue is the reopen queue
1597 * Returns 0 on success, non-zero on error. On error errp will be set
1600 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1601 * It is the responsibility of the caller to then call the abort() or
1602 * commit() for any other BDS that have been left in a prepare() state
1605 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1609 Error *local_err = NULL;
1612 assert(reopen_state != NULL);
1613 assert(reopen_state->bs->drv != NULL);
1614 drv = reopen_state->bs->drv;
1616 /* if we are to stay read-only, do not allow permission change
1618 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1619 reopen_state->flags & BDRV_O_RDWR) {
1620 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1621 reopen_state->bs->device_name);
1626 ret = bdrv_flush(reopen_state->bs);
1628 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1633 if (drv->bdrv_reopen_prepare) {
1634 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1636 if (local_err != NULL) {
1637 error_propagate(errp, local_err);
1639 error_setg(errp, "failed while preparing to reopen image '%s'",
1640 reopen_state->bs->filename);
1645 /* It is currently mandatory to have a bdrv_reopen_prepare()
1646 * handler for each supported drv. */
1647 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1648 drv->format_name, reopen_state->bs->device_name,
1649 "reopening of file");
1661 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1662 * makes them final by swapping the staging BlockDriverState contents into
1663 * the active BlockDriverState contents.
1665 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1669 assert(reopen_state != NULL);
1670 drv = reopen_state->bs->drv;
1671 assert(drv != NULL);
1673 /* If there are any driver level actions to take */
1674 if (drv->bdrv_reopen_commit) {
1675 drv->bdrv_reopen_commit(reopen_state);
1678 /* set BDS specific flags now */
1679 reopen_state->bs->open_flags = reopen_state->flags;
1680 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1682 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1684 bdrv_refresh_limits(reopen_state->bs);
1688 * Abort the reopen, and delete and free the staged changes in
1691 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1695 assert(reopen_state != NULL);
1696 drv = reopen_state->bs->drv;
1697 assert(drv != NULL);
1699 if (drv->bdrv_reopen_abort) {
1700 drv->bdrv_reopen_abort(reopen_state);
1705 void bdrv_close(BlockDriverState *bs)
1708 block_job_cancel_sync(bs->job);
1710 bdrv_drain_all(); /* complete I/O */
1712 bdrv_drain_all(); /* in case flush left pending I/O */
1713 notifier_list_notify(&bs->close_notifiers, bs);
1716 if (bs->backing_hd) {
1717 bdrv_unref(bs->backing_hd);
1718 bs->backing_hd = NULL;
1720 bs->drv->bdrv_close(bs);
1723 if (bs->is_temporary) {
1724 unlink(bs->filename);
1729 bs->copy_on_read = 0;
1730 bs->backing_file[0] = '\0';
1731 bs->backing_format[0] = '\0';
1732 bs->total_sectors = 0;
1737 bs->zero_beyond_eof = false;
1738 QDECREF(bs->options);
1741 if (bs->file != NULL) {
1742 bdrv_unref(bs->file);
1747 bdrv_dev_change_media_cb(bs, false);
1749 /*throttling disk I/O limits*/
1750 if (bs->io_limits_enabled) {
1751 bdrv_io_limits_disable(bs);
1755 void bdrv_close_all(void)
1757 BlockDriverState *bs;
1759 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1764 /* Check if any requests are in-flight (including throttled requests) */
1765 static bool bdrv_requests_pending(BlockDriverState *bs)
1767 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1770 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1773 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
1776 if (bs->file && bdrv_requests_pending(bs->file)) {
1779 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1785 static bool bdrv_requests_pending_all(void)
1787 BlockDriverState *bs;
1788 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1789 if (bdrv_requests_pending(bs)) {
1797 * Wait for pending requests to complete across all BlockDriverStates
1799 * This function does not flush data to disk, use bdrv_flush_all() for that
1800 * after calling this function.
1802 * Note that completion of an asynchronous I/O operation can trigger any
1803 * number of other I/O operations on other devices---for example a coroutine
1804 * can be arbitrarily complex and a constant flow of I/O can come until the
1805 * coroutine is complete. Because of this, it is not possible to have a
1806 * function to drain a single device's I/O queue.
1808 void bdrv_drain_all(void)
1810 /* Always run first iteration so any pending completion BHs run */
1812 BlockDriverState *bs;
1815 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1816 bdrv_start_throttled_reqs(bs);
1819 busy = bdrv_requests_pending_all();
1820 busy |= aio_poll(qemu_get_aio_context(), busy);
1824 /* make a BlockDriverState anonymous by removing from bdrv_state and
1825 * graph_bdrv_state list.
1826 Also, NULL terminate the device_name to prevent double remove */
1827 void bdrv_make_anon(BlockDriverState *bs)
1829 if (bs->device_name[0] != '\0') {
1830 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1832 bs->device_name[0] = '\0';
1833 if (bs->node_name[0] != '\0') {
1834 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1836 bs->node_name[0] = '\0';
1839 static void bdrv_rebind(BlockDriverState *bs)
1841 if (bs->drv && bs->drv->bdrv_rebind) {
1842 bs->drv->bdrv_rebind(bs);
1846 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1847 BlockDriverState *bs_src)
1849 /* move some fields that need to stay attached to the device */
1850 bs_dest->open_flags = bs_src->open_flags;
1853 bs_dest->dev_ops = bs_src->dev_ops;
1854 bs_dest->dev_opaque = bs_src->dev_opaque;
1855 bs_dest->dev = bs_src->dev;
1856 bs_dest->guest_block_size = bs_src->guest_block_size;
1857 bs_dest->copy_on_read = bs_src->copy_on_read;
1859 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1861 /* i/o throttled req */
1862 memcpy(&bs_dest->throttle_state,
1863 &bs_src->throttle_state,
1864 sizeof(ThrottleState));
1865 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1866 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1867 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1870 bs_dest->on_read_error = bs_src->on_read_error;
1871 bs_dest->on_write_error = bs_src->on_write_error;
1874 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1875 bs_dest->iostatus = bs_src->iostatus;
1878 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
1880 /* reference count */
1881 bs_dest->refcnt = bs_src->refcnt;
1884 bs_dest->in_use = bs_src->in_use;
1885 bs_dest->job = bs_src->job;
1887 /* keep the same entry in bdrv_states */
1888 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1889 bs_src->device_name);
1890 bs_dest->device_list = bs_src->device_list;
1892 /* keep the same entry in graph_bdrv_states
1893 * We do want to swap name but don't want to swap linked list entries
1895 bs_dest->node_list = bs_src->node_list;
1899 * Swap bs contents for two image chains while they are live,
1900 * while keeping required fields on the BlockDriverState that is
1901 * actually attached to a device.
1903 * This will modify the BlockDriverState fields, and swap contents
1904 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1906 * bs_new is required to be anonymous.
1908 * This function does not create any image files.
1910 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1912 BlockDriverState tmp;
1914 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1915 assert(bs_new->device_name[0] == '\0');
1916 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
1917 assert(bs_new->job == NULL);
1918 assert(bs_new->dev == NULL);
1919 assert(bs_new->in_use == 0);
1920 assert(bs_new->io_limits_enabled == false);
1921 assert(!throttle_have_timer(&bs_new->throttle_state));
1927 /* there are some fields that should not be swapped, move them back */
1928 bdrv_move_feature_fields(&tmp, bs_old);
1929 bdrv_move_feature_fields(bs_old, bs_new);
1930 bdrv_move_feature_fields(bs_new, &tmp);
1932 /* bs_new shouldn't be in bdrv_states even after the swap! */
1933 assert(bs_new->device_name[0] == '\0');
1935 /* Check a few fields that should remain attached to the device */
1936 assert(bs_new->dev == NULL);
1937 assert(bs_new->job == NULL);
1938 assert(bs_new->in_use == 0);
1939 assert(bs_new->io_limits_enabled == false);
1940 assert(!throttle_have_timer(&bs_new->throttle_state));
1942 bdrv_rebind(bs_new);
1943 bdrv_rebind(bs_old);
1947 * Add new bs contents at the top of an image chain while the chain is
1948 * live, while keeping required fields on the top layer.
1950 * This will modify the BlockDriverState fields, and swap contents
1951 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1953 * bs_new is required to be anonymous.
1955 * This function does not create any image files.
1957 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1959 bdrv_swap(bs_new, bs_top);
1961 /* The contents of 'tmp' will become bs_top, as we are
1962 * swapping bs_new and bs_top contents. */
1963 bs_top->backing_hd = bs_new;
1964 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1965 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1967 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1968 bs_new->drv ? bs_new->drv->format_name : "");
1971 static void bdrv_delete(BlockDriverState *bs)
1975 assert(!bs->in_use);
1976 assert(!bs->refcnt);
1977 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
1981 /* remove from list, if necessary */
1987 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1988 /* TODO change to DeviceState *dev when all users are qdevified */
1994 bdrv_iostatus_reset(bs);
1998 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1999 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
2001 if (bdrv_attach_dev(bs, dev) < 0) {
2006 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2007 /* TODO change to DeviceState *dev when all users are qdevified */
2009 assert(bs->dev == dev);
2012 bs->dev_opaque = NULL;
2013 bs->guest_block_size = 512;
2016 /* TODO change to return DeviceState * when all users are qdevified */
2017 void *bdrv_get_attached_dev(BlockDriverState *bs)
2022 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2026 bs->dev_opaque = opaque;
2029 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
2030 enum MonitorEvent ev,
2031 BlockErrorAction action, bool is_read)
2034 const char *action_str;
2037 case BDRV_ACTION_REPORT:
2038 action_str = "report";
2040 case BDRV_ACTION_IGNORE:
2041 action_str = "ignore";
2043 case BDRV_ACTION_STOP:
2044 action_str = "stop";
2050 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2053 is_read ? "read" : "write");
2054 monitor_protocol_event(ev, data);
2056 qobject_decref(data);
2059 static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2063 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2064 bdrv_get_device_name(bs), ejected);
2065 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2067 qobject_decref(data);
2070 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
2072 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
2073 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
2074 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
2075 if (tray_was_closed) {
2077 bdrv_emit_qmp_eject_event(bs, true);
2081 bdrv_emit_qmp_eject_event(bs, false);
2086 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2088 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2091 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2093 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2094 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2098 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2100 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2101 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2106 static void bdrv_dev_resize_cb(BlockDriverState *bs)
2108 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2109 bs->dev_ops->resize_cb(bs->dev_opaque);
2113 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2115 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2116 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2122 * Run consistency checks on an image
2124 * Returns 0 if the check could be completed (it doesn't mean that the image is
2125 * free of errors) or -errno when an internal error occurred. The results of the
2126 * check are stored in res.
2128 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2130 if (bs->drv->bdrv_check == NULL) {
2134 memset(res, 0, sizeof(*res));
2135 return bs->drv->bdrv_check(bs, res, fix);
2138 #define COMMIT_BUF_SECTORS 2048
2140 /* commit COW file into the raw image */
2141 int bdrv_commit(BlockDriverState *bs)
2143 BlockDriver *drv = bs->drv;
2144 int64_t sector, total_sectors, length, backing_length;
2145 int n, ro, open_flags;
2147 uint8_t *buf = NULL;
2148 char filename[PATH_MAX];
2153 if (!bs->backing_hd) {
2157 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2161 ro = bs->backing_hd->read_only;
2162 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2163 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
2164 open_flags = bs->backing_hd->open_flags;
2167 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2172 length = bdrv_getlength(bs);
2178 backing_length = bdrv_getlength(bs->backing_hd);
2179 if (backing_length < 0) {
2180 ret = backing_length;
2184 /* If our top snapshot is larger than the backing file image,
2185 * grow the backing file image if possible. If not possible,
2186 * we must return an error */
2187 if (length > backing_length) {
2188 ret = bdrv_truncate(bs->backing_hd, length);
2194 total_sectors = length >> BDRV_SECTOR_BITS;
2195 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2197 for (sector = 0; sector < total_sectors; sector += n) {
2198 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2203 ret = bdrv_read(bs, sector, buf, n);
2208 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2215 if (drv->bdrv_make_empty) {
2216 ret = drv->bdrv_make_empty(bs);
2224 * Make sure all data we wrote to the backing device is actually
2227 if (bs->backing_hd) {
2228 bdrv_flush(bs->backing_hd);
2236 /* ignoring error return here */
2237 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2243 int bdrv_commit_all(void)
2245 BlockDriverState *bs;
2247 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2248 if (bs->drv && bs->backing_hd) {
2249 int ret = bdrv_commit(bs);
2259 * Remove an active request from the tracked requests list
2261 * This function should be called when a tracked request is completing.
2263 static void tracked_request_end(BdrvTrackedRequest *req)
2265 if (req->serialising) {
2266 req->bs->serialising_in_flight--;
2269 QLIST_REMOVE(req, list);
2270 qemu_co_queue_restart_all(&req->wait_queue);
2274 * Add an active request to the tracked requests list
2276 static void tracked_request_begin(BdrvTrackedRequest *req,
2277 BlockDriverState *bs,
2279 unsigned int bytes, bool is_write)
2281 *req = (BdrvTrackedRequest){
2285 .is_write = is_write,
2286 .co = qemu_coroutine_self(),
2287 .serialising = false,
2288 .overlap_offset = offset,
2289 .overlap_bytes = bytes,
2292 qemu_co_queue_init(&req->wait_queue);
2294 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2297 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2299 int64_t overlap_offset = req->offset & ~(align - 1);
2300 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2303 if (!req->serialising) {
2304 req->bs->serialising_in_flight++;
2305 req->serialising = true;
2308 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2309 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2313 * Round a region to cluster boundaries
2315 void bdrv_round_to_clusters(BlockDriverState *bs,
2316 int64_t sector_num, int nb_sectors,
2317 int64_t *cluster_sector_num,
2318 int *cluster_nb_sectors)
2320 BlockDriverInfo bdi;
2322 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2323 *cluster_sector_num = sector_num;
2324 *cluster_nb_sectors = nb_sectors;
2326 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2327 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2328 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2333 static int bdrv_get_cluster_size(BlockDriverState *bs)
2335 BlockDriverInfo bdi;
2338 ret = bdrv_get_info(bs, &bdi);
2339 if (ret < 0 || bdi.cluster_size == 0) {
2340 return bs->request_alignment;
2342 return bdi.cluster_size;
2346 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2347 int64_t offset, unsigned int bytes)
2350 if (offset >= req->overlap_offset + req->overlap_bytes) {
2354 if (req->overlap_offset >= offset + bytes) {
2360 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2362 BlockDriverState *bs = self->bs;
2363 BdrvTrackedRequest *req;
2365 bool waited = false;
2367 if (!bs->serialising_in_flight) {
2373 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2374 if (req == self || (!req->serialising && !self->serialising)) {
2377 if (tracked_request_overlaps(req, self->overlap_offset,
2378 self->overlap_bytes))
2380 /* Hitting this means there was a reentrant request, for
2381 * example, a block driver issuing nested requests. This must
2382 * never happen since it means deadlock.
2384 assert(qemu_coroutine_self() != req->co);
2386 /* If the request is already (indirectly) waiting for us, or
2387 * will wait for us as soon as it wakes up, then just go on
2388 * (instead of producing a deadlock in the former case). */
2389 if (!req->waiting_for) {
2390 self->waiting_for = req;
2391 qemu_co_queue_wait(&req->wait_queue);
2392 self->waiting_for = NULL;
2407 * -EINVAL - backing format specified, but no file
2408 * -ENOSPC - can't update the backing file because no space is left in the
2410 * -ENOTSUP - format driver doesn't support changing the backing file
2412 int bdrv_change_backing_file(BlockDriverState *bs,
2413 const char *backing_file, const char *backing_fmt)
2415 BlockDriver *drv = bs->drv;
2418 /* Backing file format doesn't make sense without a backing file */
2419 if (backing_fmt && !backing_file) {
2423 if (drv->bdrv_change_backing_file != NULL) {
2424 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2430 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2431 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2437 * Finds the image layer in the chain that has 'bs' as its backing file.
2439 * active is the current topmost image.
2441 * Returns NULL if bs is not found in active's image chain,
2442 * or if active == bs.
2444 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2445 BlockDriverState *bs)
2447 BlockDriverState *overlay = NULL;
2448 BlockDriverState *intermediate;
2450 assert(active != NULL);
2453 /* if bs is the same as active, then by definition it has no overlay
2459 intermediate = active;
2460 while (intermediate->backing_hd) {
2461 if (intermediate->backing_hd == bs) {
2462 overlay = intermediate;
2465 intermediate = intermediate->backing_hd;
2471 typedef struct BlkIntermediateStates {
2472 BlockDriverState *bs;
2473 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2474 } BlkIntermediateStates;
2478 * Drops images above 'base' up to and including 'top', and sets the image
2479 * above 'top' to have base as its backing file.
2481 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2482 * information in 'bs' can be properly updated.
2484 * E.g., this will convert the following chain:
2485 * bottom <- base <- intermediate <- top <- active
2489 * bottom <- base <- active
2491 * It is allowed for bottom==base, in which case it converts:
2493 * base <- intermediate <- top <- active
2500 * if active == top, that is considered an error
2503 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2504 BlockDriverState *base)
2506 BlockDriverState *intermediate;
2507 BlockDriverState *base_bs = NULL;
2508 BlockDriverState *new_top_bs = NULL;
2509 BlkIntermediateStates *intermediate_state, *next;
2512 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2513 QSIMPLEQ_INIT(&states_to_delete);
2515 if (!top->drv || !base->drv) {
2519 new_top_bs = bdrv_find_overlay(active, top);
2521 if (new_top_bs == NULL) {
2522 /* we could not find the image above 'top', this is an error */
2526 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2527 * to do, no intermediate images */
2528 if (new_top_bs->backing_hd == base) {
2535 /* now we will go down through the list, and add each BDS we find
2536 * into our deletion queue, until we hit the 'base'
2538 while (intermediate) {
2539 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2540 intermediate_state->bs = intermediate;
2541 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2543 if (intermediate->backing_hd == base) {
2544 base_bs = intermediate->backing_hd;
2547 intermediate = intermediate->backing_hd;
2549 if (base_bs == NULL) {
2550 /* something went wrong, we did not end at the base. safely
2551 * unravel everything, and exit with error */
2555 /* success - we can delete the intermediate states, and link top->base */
2556 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2557 base_bs->drv ? base_bs->drv->format_name : "");
2561 new_top_bs->backing_hd = base_bs;
2563 bdrv_refresh_limits(new_top_bs);
2565 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2566 /* so that bdrv_close() does not recursively close the chain */
2567 intermediate_state->bs->backing_hd = NULL;
2568 bdrv_unref(intermediate_state->bs);
2573 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2574 g_free(intermediate_state);
2580 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2585 if (!bdrv_is_inserted(bs))
2591 len = bdrv_getlength(bs);
2596 if ((offset > len) || (len - offset < size))
2602 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2605 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2606 nb_sectors * BDRV_SECTOR_SIZE);
2609 typedef struct RwCo {
2610 BlockDriverState *bs;
2615 BdrvRequestFlags flags;
2618 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2620 RwCo *rwco = opaque;
2622 if (!rwco->is_write) {
2623 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2624 rwco->qiov->size, rwco->qiov,
2627 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2628 rwco->qiov->size, rwco->qiov,
2634 * Process a vectored synchronous request using coroutines
2636 static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2637 QEMUIOVector *qiov, bool is_write,
2638 BdrvRequestFlags flags)
2645 .is_write = is_write,
2651 * In sync call context, when the vcpu is blocked, this throttling timer
2652 * will not fire; so the I/O throttling function has to be disabled here
2653 * if it has been enabled.
2655 if (bs->io_limits_enabled) {
2656 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2657 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2658 bdrv_io_limits_disable(bs);
2661 if (qemu_in_coroutine()) {
2662 /* Fast-path if already in coroutine context */
2663 bdrv_rw_co_entry(&rwco);
2665 co = qemu_coroutine_create(bdrv_rw_co_entry);
2666 qemu_coroutine_enter(co, &rwco);
2667 while (rwco.ret == NOT_DONE) {
2675 * Process a synchronous request using coroutines
2677 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
2678 int nb_sectors, bool is_write, BdrvRequestFlags flags)
2681 struct iovec iov = {
2682 .iov_base = (void *)buf,
2683 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2686 qemu_iovec_init_external(&qiov, &iov, 1);
2687 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2688 &qiov, is_write, flags);
2691 /* return < 0 if error. See bdrv_write() for the return codes */
2692 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2693 uint8_t *buf, int nb_sectors)
2695 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
2698 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2699 int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2700 uint8_t *buf, int nb_sectors)
2705 enabled = bs->io_limits_enabled;
2706 bs->io_limits_enabled = false;
2707 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
2708 bs->io_limits_enabled = enabled;
2712 /* Return < 0 if error. Important errors are:
2713 -EIO generic I/O error (may happen for all errors)
2714 -ENOMEDIUM No media inserted.
2715 -EINVAL Invalid sector number or nb_sectors
2716 -EACCES Trying to write a read-only device
2718 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2719 const uint8_t *buf, int nb_sectors)
2721 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
2724 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2725 int nb_sectors, BdrvRequestFlags flags)
2727 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2728 BDRV_REQ_ZERO_WRITE | flags);
2732 * Completely zero out a block device with the help of bdrv_write_zeroes.
2733 * The operation is sped up by checking the block status and only writing
2734 * zeroes to the device if they currently do not return zeroes. Optional
2735 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2737 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2739 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2741 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2742 int64_t ret, nb_sectors, sector_num = 0;
2746 nb_sectors = target_size - sector_num;
2747 if (nb_sectors <= 0) {
2750 if (nb_sectors > INT_MAX) {
2751 nb_sectors = INT_MAX;
2753 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
2755 error_report("error getting block status at sector %" PRId64 ": %s",
2756 sector_num, strerror(-ret));
2759 if (ret & BDRV_BLOCK_ZERO) {
2763 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2765 error_report("error writing zeroes at sector %" PRId64 ": %s",
2766 sector_num, strerror(-ret));
2773 int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
2776 struct iovec iov = {
2777 .iov_base = (void *)buf,
2786 qemu_iovec_init_external(&qiov, &iov, 1);
2787 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2795 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
2799 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2807 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2808 const void *buf, int bytes)
2811 struct iovec iov = {
2812 .iov_base = (void *) buf,
2820 qemu_iovec_init_external(&qiov, &iov, 1);
2821 return bdrv_pwritev(bs, offset, &qiov);
2825 * Writes to the file and ensures that no writes are reordered across this
2826 * request (acts as a barrier)
2828 * Returns 0 on success, -errno in error cases.
2830 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2831 const void *buf, int count)
2835 ret = bdrv_pwrite(bs, offset, buf, count);
2840 /* No flush needed for cache modes that already do it */
2841 if (bs->enable_write_cache) {
2848 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2849 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2851 /* Perform I/O through a temporary buffer so that users who scribble over
2852 * their read buffer while the operation is in progress do not end up
2853 * modifying the image file. This is critical for zero-copy guest I/O
2854 * where anything might happen inside guest memory.
2856 void *bounce_buffer;
2858 BlockDriver *drv = bs->drv;
2860 QEMUIOVector bounce_qiov;
2861 int64_t cluster_sector_num;
2862 int cluster_nb_sectors;
2866 /* Cover entire cluster so no additional backing file I/O is required when
2867 * allocating cluster in the image file.
2869 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2870 &cluster_sector_num, &cluster_nb_sectors);
2872 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2873 cluster_sector_num, cluster_nb_sectors);
2875 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2876 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2877 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2879 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2885 if (drv->bdrv_co_write_zeroes &&
2886 buffer_is_zero(bounce_buffer, iov.iov_len)) {
2887 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2888 cluster_nb_sectors, 0);
2890 /* This does not change the data on the disk, it is not necessary
2891 * to flush even in cache=writethrough mode.
2893 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
2898 /* It might be okay to ignore write errors for guest requests. If this
2899 * is a deliberate copy-on-read then we don't want to ignore the error.
2900 * Simply report it in all cases.
2905 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
2906 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2907 nb_sectors * BDRV_SECTOR_SIZE);
2910 qemu_vfree(bounce_buffer);
2915 * Forwards an already correctly aligned request to the BlockDriver. This
2916 * handles copy on read and zeroing after EOF; any other features must be
2917 * implemented by the caller.
2919 static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
2920 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
2921 int64_t align, QEMUIOVector *qiov, int flags)
2923 BlockDriver *drv = bs->drv;
2926 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2927 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
2929 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2930 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2932 /* Handle Copy on Read and associated serialisation */
2933 if (flags & BDRV_REQ_COPY_ON_READ) {
2934 /* If we touch the same cluster it counts as an overlap. This
2935 * guarantees that allocating writes will be serialized and not race
2936 * with each other for the same cluster. For example, in copy-on-read
2937 * it ensures that the CoR read and write operations are atomic and
2938 * guest writes cannot interleave between them. */
2939 mark_request_serialising(req, bdrv_get_cluster_size(bs));
2942 wait_serialising_requests(req);
2944 if (flags & BDRV_REQ_COPY_ON_READ) {
2947 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
2952 if (!ret || pnum != nb_sectors) {
2953 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
2958 /* Forward the request to the BlockDriver */
2959 if (!(bs->zero_beyond_eof && bs->growable)) {
2960 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2962 /* Read zeros after EOF of growable BDSes */
2963 int64_t len, total_sectors, max_nb_sectors;
2965 len = bdrv_getlength(bs);
2971 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
2972 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2973 align >> BDRV_SECTOR_BITS);
2974 if (max_nb_sectors > 0) {
2975 ret = drv->bdrv_co_readv(bs, sector_num,
2976 MIN(nb_sectors, max_nb_sectors), qiov);
2981 /* Reading beyond end of file is supposed to produce zeroes */
2982 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2983 uint64_t offset = MAX(0, total_sectors - sector_num);
2984 uint64_t bytes = (sector_num + nb_sectors - offset) *
2986 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2995 * Handle a read request in coroutine context
2997 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2998 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
2999 BdrvRequestFlags flags)
3001 BlockDriver *drv = bs->drv;
3002 BdrvTrackedRequest req;
3004 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3005 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3006 uint8_t *head_buf = NULL;
3007 uint8_t *tail_buf = NULL;
3008 QEMUIOVector local_qiov;
3009 bool use_local_qiov = false;
3015 if (bdrv_check_byte_request(bs, offset, bytes)) {
3019 if (bs->copy_on_read) {
3020 flags |= BDRV_REQ_COPY_ON_READ;
3023 /* throttling disk I/O */
3024 if (bs->io_limits_enabled) {
3025 bdrv_io_limits_intercept(bs, bytes, false);
3028 /* Align read if necessary by padding qiov */
3029 if (offset & (align - 1)) {
3030 head_buf = qemu_blockalign(bs, align);
3031 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3032 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3033 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3034 use_local_qiov = true;
3036 bytes += offset & (align - 1);
3037 offset = offset & ~(align - 1);
3040 if ((offset + bytes) & (align - 1)) {
3041 if (!use_local_qiov) {
3042 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3043 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3044 use_local_qiov = true;
3046 tail_buf = qemu_blockalign(bs, align);
3047 qemu_iovec_add(&local_qiov, tail_buf,
3048 align - ((offset + bytes) & (align - 1)));
3050 bytes = ROUND_UP(bytes, align);
3053 tracked_request_begin(&req, bs, offset, bytes, false);
3054 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
3055 use_local_qiov ? &local_qiov : qiov,
3057 tracked_request_end(&req);
3059 if (use_local_qiov) {
3060 qemu_iovec_destroy(&local_qiov);
3061 qemu_vfree(head_buf);
3062 qemu_vfree(tail_buf);
3068 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3069 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3070 BdrvRequestFlags flags)
3072 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3076 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3077 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3080 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3081 int nb_sectors, QEMUIOVector *qiov)
3083 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3085 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3088 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3089 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3091 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3093 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3094 BDRV_REQ_COPY_ON_READ);
3097 /* if no limit is specified in the BlockLimits use a default
3098 * of 32768 512-byte sectors (16 MiB) per request.
3100 #define MAX_WRITE_ZEROES_DEFAULT 32768
3102 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3103 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3105 BlockDriver *drv = bs->drv;
3107 struct iovec iov = {0};
3110 int max_write_zeroes = bs->bl.max_write_zeroes ?
3111 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
3113 while (nb_sectors > 0 && !ret) {
3114 int num = nb_sectors;
3116 /* Align request. Block drivers can expect the "bulk" of the request
3119 if (bs->bl.write_zeroes_alignment
3120 && num > bs->bl.write_zeroes_alignment) {
3121 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3122 /* Make a small request up to the first aligned sector. */
3123 num = bs->bl.write_zeroes_alignment;
3124 num -= sector_num % bs->bl.write_zeroes_alignment;
3125 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3126 /* Shorten the request to the last aligned sector. num cannot
3127 * underflow because num > bs->bl.write_zeroes_alignment.
3129 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3133 /* limit request size */
3134 if (num > max_write_zeroes) {
3135 num = max_write_zeroes;
3139 /* First try the efficient write zeroes operation */
3140 if (drv->bdrv_co_write_zeroes) {
3141 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3144 if (ret == -ENOTSUP) {
3145 /* Fall back to bounce buffer if write zeroes is unsupported */
3146 iov.iov_len = num * BDRV_SECTOR_SIZE;
3147 if (iov.iov_base == NULL) {
3148 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3149 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3151 qemu_iovec_init_external(&qiov, &iov, 1);
3153 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3155 /* Keep bounce buffer around if it is big enough for all
3156 * all future requests.
3158 if (num < max_write_zeroes) {
3159 qemu_vfree(iov.iov_base);
3160 iov.iov_base = NULL;
3168 qemu_vfree(iov.iov_base);
3173 * Forwards an already correctly aligned write request to the BlockDriver.
3175 static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
3176 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3177 QEMUIOVector *qiov, int flags)
3179 BlockDriver *drv = bs->drv;
3183 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3184 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3186 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3187 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
3189 waited = wait_serialising_requests(req);
3190 assert(!waited || !req->serialising);
3191 assert(req->overlap_offset <= offset);
3192 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3194 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3197 /* Do nothing, write notifier decided to fail this request */
3198 } else if (flags & BDRV_REQ_ZERO_WRITE) {
3199 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3200 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3202 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
3203 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3205 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
3207 if (ret == 0 && !bs->enable_write_cache) {
3208 ret = bdrv_co_flush(bs);
3211 bdrv_set_dirty(bs, sector_num, nb_sectors);
3213 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3214 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3216 if (bs->growable && ret >= 0) {
3217 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3224 * Handle a write request in coroutine context
3226 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3227 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3228 BdrvRequestFlags flags)
3230 BdrvTrackedRequest req;
3231 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3232 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3233 uint8_t *head_buf = NULL;
3234 uint8_t *tail_buf = NULL;
3235 QEMUIOVector local_qiov;
3236 bool use_local_qiov = false;
3242 if (bs->read_only) {
3245 if (bdrv_check_byte_request(bs, offset, bytes)) {
3249 /* throttling disk I/O */
3250 if (bs->io_limits_enabled) {
3251 bdrv_io_limits_intercept(bs, bytes, true);
3255 * Align write if necessary by performing a read-modify-write cycle.
3256 * Pad qiov with the read parts and be sure to have a tracked request not
3257 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3259 tracked_request_begin(&req, bs, offset, bytes, true);
3261 if (offset & (align - 1)) {
3262 QEMUIOVector head_qiov;
3263 struct iovec head_iov;
3265 mark_request_serialising(&req, align);
3266 wait_serialising_requests(&req);
3268 head_buf = qemu_blockalign(bs, align);
3269 head_iov = (struct iovec) {
3270 .iov_base = head_buf,
3273 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3275 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3276 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3277 align, &head_qiov, 0);
3281 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3283 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3284 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3285 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3286 use_local_qiov = true;
3288 bytes += offset & (align - 1);
3289 offset = offset & ~(align - 1);
3292 if ((offset + bytes) & (align - 1)) {
3293 QEMUIOVector tail_qiov;
3294 struct iovec tail_iov;
3298 mark_request_serialising(&req, align);
3299 waited = wait_serialising_requests(&req);
3300 assert(!waited || !use_local_qiov);
3302 tail_buf = qemu_blockalign(bs, align);
3303 tail_iov = (struct iovec) {
3304 .iov_base = tail_buf,
3307 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3309 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3310 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3311 align, &tail_qiov, 0);
3315 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3317 if (!use_local_qiov) {
3318 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3319 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3320 use_local_qiov = true;
3323 tail_bytes = (offset + bytes) & (align - 1);
3324 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3326 bytes = ROUND_UP(bytes, align);
3329 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3330 use_local_qiov ? &local_qiov : qiov,
3334 tracked_request_end(&req);
3336 if (use_local_qiov) {
3337 qemu_iovec_destroy(&local_qiov);
3339 qemu_vfree(head_buf);
3340 qemu_vfree(tail_buf);
3345 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3346 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3347 BdrvRequestFlags flags)
3349 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3353 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3354 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3357 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3358 int nb_sectors, QEMUIOVector *qiov)
3360 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3362 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3365 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3366 int64_t sector_num, int nb_sectors,
3367 BdrvRequestFlags flags)
3369 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3371 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3372 flags &= ~BDRV_REQ_MAY_UNMAP;
3375 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3376 BDRV_REQ_ZERO_WRITE | flags);
3380 * Truncate file to 'offset' bytes (needed only for file protocols)
3382 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3384 BlockDriver *drv = bs->drv;
3388 if (!drv->bdrv_truncate)
3392 if (bdrv_in_use(bs))
3394 ret = drv->bdrv_truncate(bs, offset);
3396 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3397 bdrv_dev_resize_cb(bs);
3403 * Length of a allocated file in bytes. Sparse files are counted by actual
3404 * allocated space. Return < 0 if error or unknown.
3406 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3408 BlockDriver *drv = bs->drv;
3412 if (drv->bdrv_get_allocated_file_size) {
3413 return drv->bdrv_get_allocated_file_size(bs);
3416 return bdrv_get_allocated_file_size(bs->file);
3422 * Length of a file in bytes. Return < 0 if error or unknown.
3424 int64_t bdrv_getlength(BlockDriverState *bs)
3426 BlockDriver *drv = bs->drv;
3430 if (drv->has_variable_length) {
3431 int ret = refresh_total_sectors(bs, bs->total_sectors);
3436 return bs->total_sectors * BDRV_SECTOR_SIZE;
3439 /* return 0 as number of sectors if no device present or error */
3440 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3443 length = bdrv_getlength(bs);
3447 length = length >> BDRV_SECTOR_BITS;
3448 *nb_sectors_ptr = length;
3451 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3452 BlockdevOnError on_write_error)
3454 bs->on_read_error = on_read_error;
3455 bs->on_write_error = on_write_error;
3458 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3460 return is_read ? bs->on_read_error : bs->on_write_error;
3463 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3465 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3468 case BLOCKDEV_ON_ERROR_ENOSPC:
3469 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3470 case BLOCKDEV_ON_ERROR_STOP:
3471 return BDRV_ACTION_STOP;
3472 case BLOCKDEV_ON_ERROR_REPORT:
3473 return BDRV_ACTION_REPORT;
3474 case BLOCKDEV_ON_ERROR_IGNORE:
3475 return BDRV_ACTION_IGNORE;
3481 /* This is done by device models because, while the block layer knows
3482 * about the error, it does not know whether an operation comes from
3483 * the device or the block layer (from a job, for example).
3485 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3486 bool is_read, int error)
3489 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
3490 if (action == BDRV_ACTION_STOP) {
3491 vm_stop(RUN_STATE_IO_ERROR);
3492 bdrv_iostatus_set_err(bs, error);
3496 int bdrv_is_read_only(BlockDriverState *bs)
3498 return bs->read_only;
3501 int bdrv_is_sg(BlockDriverState *bs)
3506 int bdrv_enable_write_cache(BlockDriverState *bs)
3508 return bs->enable_write_cache;
3511 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3513 bs->enable_write_cache = wce;
3515 /* so a reopen() will preserve wce */
3517 bs->open_flags |= BDRV_O_CACHE_WB;
3519 bs->open_flags &= ~BDRV_O_CACHE_WB;
3523 int bdrv_is_encrypted(BlockDriverState *bs)
3525 if (bs->backing_hd && bs->backing_hd->encrypted)
3527 return bs->encrypted;
3530 int bdrv_key_required(BlockDriverState *bs)
3532 BlockDriverState *backing_hd = bs->backing_hd;
3534 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3536 return (bs->encrypted && !bs->valid_key);
3539 int bdrv_set_key(BlockDriverState *bs, const char *key)
3542 if (bs->backing_hd && bs->backing_hd->encrypted) {
3543 ret = bdrv_set_key(bs->backing_hd, key);
3549 if (!bs->encrypted) {
3551 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3554 ret = bs->drv->bdrv_set_key(bs, key);
3557 } else if (!bs->valid_key) {
3559 /* call the change callback now, we skipped it on open */
3560 bdrv_dev_change_media_cb(bs, true);
3565 const char *bdrv_get_format_name(BlockDriverState *bs)
3567 return bs->drv ? bs->drv->format_name : NULL;
3570 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3575 QLIST_FOREACH(drv, &bdrv_drivers, list) {
3576 it(opaque, drv->format_name);
3580 /* This function is to find block backend bs */
3581 BlockDriverState *bdrv_find(const char *name)
3583 BlockDriverState *bs;
3585 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3586 if (!strcmp(name, bs->device_name)) {
3593 /* This function is to find a node in the bs graph */
3594 BlockDriverState *bdrv_find_node(const char *node_name)
3596 BlockDriverState *bs;
3600 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3601 if (!strcmp(node_name, bs->node_name)) {
3608 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3609 BlockDeviceInfoList *bdrv_named_nodes_list(void)
3611 BlockDeviceInfoList *list, *entry;
3612 BlockDriverState *bs;
3615 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3616 entry = g_malloc0(sizeof(*entry));
3617 entry->value = bdrv_block_device_info(bs);
3625 BlockDriverState *bdrv_lookup_bs(const char *device,
3626 const char *node_name,
3629 BlockDriverState *bs = NULL;
3632 bs = bdrv_find(device);
3640 bs = bdrv_find_node(node_name);
3647 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3648 device ? device : "",
3649 node_name ? node_name : "");
3653 BlockDriverState *bdrv_next(BlockDriverState *bs)
3656 return QTAILQ_FIRST(&bdrv_states);
3658 return QTAILQ_NEXT(bs, device_list);
3661 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
3663 BlockDriverState *bs;
3665 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3670 const char *bdrv_get_device_name(BlockDriverState *bs)
3672 return bs->device_name;
3675 int bdrv_get_flags(BlockDriverState *bs)
3677 return bs->open_flags;
3680 int bdrv_flush_all(void)
3682 BlockDriverState *bs;
3685 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3686 int ret = bdrv_flush(bs);
3687 if (ret < 0 && !result) {
3695 int bdrv_has_zero_init_1(BlockDriverState *bs)
3700 int bdrv_has_zero_init(BlockDriverState *bs)
3704 /* If BS is a copy on write image, it is initialized to
3705 the contents of the base image, which may not be zeroes. */
3706 if (bs->backing_hd) {
3709 if (bs->drv->bdrv_has_zero_init) {
3710 return bs->drv->bdrv_has_zero_init(bs);
3717 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3719 BlockDriverInfo bdi;
3721 if (bs->backing_hd) {
3725 if (bdrv_get_info(bs, &bdi) == 0) {
3726 return bdi.unallocated_blocks_are_zero;
3732 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3734 BlockDriverInfo bdi;
3736 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3740 if (bdrv_get_info(bs, &bdi) == 0) {
3741 return bdi.can_write_zeroes_with_unmap;
3747 typedef struct BdrvCoGetBlockStatusData {
3748 BlockDriverState *bs;
3749 BlockDriverState *base;
3755 } BdrvCoGetBlockStatusData;
3758 * Returns true iff the specified sector is present in the disk image. Drivers
3759 * not implementing the functionality are assumed to not support backing files,
3760 * hence all their sectors are reported as allocated.
3762 * If 'sector_num' is beyond the end of the disk image the return value is 0
3763 * and 'pnum' is set to 0.
3765 * 'pnum' is set to the number of sectors (including and immediately following
3766 * the specified sector) that are known to be in the same
3767 * allocated/unallocated state.
3769 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3770 * beyond the end of the disk image it will be clamped.
3772 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3774 int nb_sectors, int *pnum)
3780 length = bdrv_getlength(bs);
3785 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
3790 n = bs->total_sectors - sector_num;
3791 if (n < nb_sectors) {
3795 if (!bs->drv->bdrv_co_get_block_status) {
3797 ret = BDRV_BLOCK_DATA;
3798 if (bs->drv->protocol_name) {
3799 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3804 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3810 if (ret & BDRV_BLOCK_RAW) {
3811 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3812 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3816 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3817 if (bdrv_unallocated_blocks_are_zero(bs)) {
3818 ret |= BDRV_BLOCK_ZERO;
3819 } else if (bs->backing_hd) {
3820 BlockDriverState *bs2 = bs->backing_hd;
3821 int64_t length2 = bdrv_getlength(bs2);
3822 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3823 ret |= BDRV_BLOCK_ZERO;
3829 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3830 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3831 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3834 /* Ignore errors. This is just providing extra information, it
3835 * is useful but not necessary.
3837 ret |= (ret2 & BDRV_BLOCK_ZERO);
3844 /* Coroutine wrapper for bdrv_get_block_status() */
3845 static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
3847 BdrvCoGetBlockStatusData *data = opaque;
3848 BlockDriverState *bs = data->bs;
3850 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3856 * Synchronous wrapper around bdrv_co_get_block_status().
3858 * See bdrv_co_get_block_status() for details.
3860 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3861 int nb_sectors, int *pnum)
3864 BdrvCoGetBlockStatusData data = {
3866 .sector_num = sector_num,
3867 .nb_sectors = nb_sectors,
3872 if (qemu_in_coroutine()) {
3873 /* Fast-path if already in coroutine context */
3874 bdrv_get_block_status_co_entry(&data);
3876 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
3877 qemu_coroutine_enter(co, &data);
3878 while (!data.done) {
3885 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3886 int nb_sectors, int *pnum)
3888 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3893 (ret & BDRV_BLOCK_DATA) ||
3894 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
3898 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3900 * Return true if the given sector is allocated in any image between
3901 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3902 * sector is allocated in any image of the chain. Return false otherwise.
3904 * 'pnum' is set to the number of sectors (including and immediately following
3905 * the specified sector) that are known to be in the same
3906 * allocated/unallocated state.
3909 int bdrv_is_allocated_above(BlockDriverState *top,
3910 BlockDriverState *base,
3912 int nb_sectors, int *pnum)
3914 BlockDriverState *intermediate;
3915 int ret, n = nb_sectors;
3918 while (intermediate && intermediate != base) {
3920 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3930 * [sector_num, nb_sectors] is unallocated on top but intermediate
3933 * [sector_num+x, nr_sectors] allocated.
3935 if (n > pnum_inter &&
3936 (intermediate == top ||
3937 sector_num + pnum_inter < intermediate->total_sectors)) {
3941 intermediate = intermediate->backing_hd;
3948 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3950 if (bs->backing_hd && bs->backing_hd->encrypted)
3951 return bs->backing_file;
3952 else if (bs->encrypted)
3953 return bs->filename;
3958 void bdrv_get_backing_filename(BlockDriverState *bs,
3959 char *filename, int filename_size)
3961 pstrcpy(filename, filename_size, bs->backing_file);
3964 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
3965 const uint8_t *buf, int nb_sectors)
3967 BlockDriver *drv = bs->drv;
3970 if (!drv->bdrv_write_compressed)
3972 if (bdrv_check_request(bs, sector_num, nb_sectors))
3975 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
3977 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3980 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3982 BlockDriver *drv = bs->drv;
3985 if (!drv->bdrv_get_info)
3987 memset(bdi, 0, sizeof(*bdi));
3988 return drv->bdrv_get_info(bs, bdi);
3991 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3993 BlockDriver *drv = bs->drv;
3994 if (drv && drv->bdrv_get_specific_info) {
3995 return drv->bdrv_get_specific_info(bs);
4000 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4001 int64_t pos, int size)
4004 struct iovec iov = {
4005 .iov_base = (void *) buf,
4009 qemu_iovec_init_external(&qiov, &iov, 1);
4010 return bdrv_writev_vmstate(bs, &qiov, pos);
4013 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4015 BlockDriver *drv = bs->drv;
4019 } else if (drv->bdrv_save_vmstate) {
4020 return drv->bdrv_save_vmstate(bs, qiov, pos);
4021 } else if (bs->file) {
4022 return bdrv_writev_vmstate(bs->file, qiov, pos);
4028 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4029 int64_t pos, int size)
4031 BlockDriver *drv = bs->drv;
4034 if (drv->bdrv_load_vmstate)
4035 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4037 return bdrv_load_vmstate(bs->file, buf, pos, size);
4041 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4043 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
4047 bs->drv->bdrv_debug_event(bs, event);
4050 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4053 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4057 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4058 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4064 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4066 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4070 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4071 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4077 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4079 while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
4083 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4084 return bs->drv->bdrv_debug_resume(bs, tag);
4090 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4092 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4096 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4097 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4103 int bdrv_is_snapshot(BlockDriverState *bs)
4105 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4108 /* backing_file can either be relative, or absolute, or a protocol. If it is
4109 * relative, it must be relative to the chain. So, passing in bs->filename
4110 * from a BDS as backing_file should not be done, as that may be relative to
4111 * the CWD rather than the chain. */
4112 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4113 const char *backing_file)
4115 char *filename_full = NULL;
4116 char *backing_file_full = NULL;
4117 char *filename_tmp = NULL;
4118 int is_protocol = 0;
4119 BlockDriverState *curr_bs = NULL;
4120 BlockDriverState *retval = NULL;
4122 if (!bs || !bs->drv || !backing_file) {
4126 filename_full = g_malloc(PATH_MAX);
4127 backing_file_full = g_malloc(PATH_MAX);
4128 filename_tmp = g_malloc(PATH_MAX);
4130 is_protocol = path_has_protocol(backing_file);
4132 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4134 /* If either of the filename paths is actually a protocol, then
4135 * compare unmodified paths; otherwise make paths relative */
4136 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4137 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4138 retval = curr_bs->backing_hd;
4142 /* If not an absolute filename path, make it relative to the current
4143 * image's filename path */
4144 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4147 /* We are going to compare absolute pathnames */
4148 if (!realpath(filename_tmp, filename_full)) {
4152 /* We need to make sure the backing filename we are comparing against
4153 * is relative to the current image filename (or absolute) */
4154 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4155 curr_bs->backing_file);
4157 if (!realpath(filename_tmp, backing_file_full)) {
4161 if (strcmp(backing_file_full, filename_full) == 0) {
4162 retval = curr_bs->backing_hd;
4168 g_free(filename_full);
4169 g_free(backing_file_full);
4170 g_free(filename_tmp);
4174 int bdrv_get_backing_file_depth(BlockDriverState *bs)
4180 if (!bs->backing_hd) {
4184 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4187 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4189 BlockDriverState *curr_bs = NULL;
4197 while (curr_bs->backing_hd) {
4198 curr_bs = curr_bs->backing_hd;
4203 /**************************************************************/
4206 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4207 QEMUIOVector *qiov, int nb_sectors,
4208 BlockDriverCompletionFunc *cb, void *opaque)
4210 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4212 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4216 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4217 QEMUIOVector *qiov, int nb_sectors,
4218 BlockDriverCompletionFunc *cb, void *opaque)
4220 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4222 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4226 BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4227 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4228 BlockDriverCompletionFunc *cb, void *opaque)
4230 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4232 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4233 BDRV_REQ_ZERO_WRITE | flags,
4238 typedef struct MultiwriteCB {
4243 BlockDriverCompletionFunc *cb;
4245 QEMUIOVector *free_qiov;
4249 static void multiwrite_user_cb(MultiwriteCB *mcb)
4253 for (i = 0; i < mcb->num_callbacks; i++) {
4254 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
4255 if (mcb->callbacks[i].free_qiov) {
4256 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4258 g_free(mcb->callbacks[i].free_qiov);
4262 static void multiwrite_cb(void *opaque, int ret)
4264 MultiwriteCB *mcb = opaque;
4266 trace_multiwrite_cb(mcb, ret);
4268 if (ret < 0 && !mcb->error) {
4272 mcb->num_requests--;
4273 if (mcb->num_requests == 0) {
4274 multiwrite_user_cb(mcb);
4279 static int multiwrite_req_compare(const void *a, const void *b)
4281 const BlockRequest *req1 = a, *req2 = b;
4284 * Note that we can't simply subtract req2->sector from req1->sector
4285 * here as that could overflow the return value.
4287 if (req1->sector > req2->sector) {
4289 } else if (req1->sector < req2->sector) {
4297 * Takes a bunch of requests and tries to merge them. Returns the number of
4298 * requests that remain after merging.
4300 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4301 int num_reqs, MultiwriteCB *mcb)
4305 // Sort requests by start sector
4306 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4308 // Check if adjacent requests touch the same clusters. If so, combine them,
4309 // filling up gaps with zero sectors.
4311 for (i = 1; i < num_reqs; i++) {
4313 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4315 // Handle exactly sequential writes and overlapping writes.
4316 if (reqs[i].sector <= oldreq_last) {
4320 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4326 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
4327 qemu_iovec_init(qiov,
4328 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4330 // Add the first request to the merged one. If the requests are
4331 // overlapping, drop the last sectors of the first request.
4332 size = (reqs[i].sector - reqs[outidx].sector) << 9;
4333 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
4335 // We should need to add any zeros between the two requests
4336 assert (reqs[i].sector <= oldreq_last);
4338 // Add the second request
4339 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
4341 reqs[outidx].nb_sectors = qiov->size >> 9;
4342 reqs[outidx].qiov = qiov;
4344 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4347 reqs[outidx].sector = reqs[i].sector;
4348 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4349 reqs[outidx].qiov = reqs[i].qiov;
4357 * Submit multiple AIO write requests at once.
4359 * On success, the function returns 0 and all requests in the reqs array have
4360 * been submitted. In error case this function returns -1, and any of the
4361 * requests may or may not be submitted yet. In particular, this means that the
4362 * callback will be called for some of the requests, for others it won't. The
4363 * caller must check the error field of the BlockRequest to wait for the right
4364 * callbacks (if error != 0, no callback will be called).
4366 * The implementation may modify the contents of the reqs array, e.g. to merge
4367 * requests. However, the fields opaque and error are left unmodified as they
4368 * are used to signal failure for a single request to the caller.
4370 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4375 /* don't submit writes if we don't have a medium */
4376 if (bs->drv == NULL) {
4377 for (i = 0; i < num_reqs; i++) {
4378 reqs[i].error = -ENOMEDIUM;
4383 if (num_reqs == 0) {
4387 // Create MultiwriteCB structure
4388 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
4389 mcb->num_requests = 0;
4390 mcb->num_callbacks = num_reqs;
4392 for (i = 0; i < num_reqs; i++) {
4393 mcb->callbacks[i].cb = reqs[i].cb;
4394 mcb->callbacks[i].opaque = reqs[i].opaque;
4397 // Check for mergable requests
4398 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4400 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4402 /* Run the aio requests. */
4403 mcb->num_requests = num_reqs;
4404 for (i = 0; i < num_reqs; i++) {
4405 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4406 reqs[i].nb_sectors, reqs[i].flags,
4414 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
4416 acb->aiocb_info->cancel(acb);
4419 /**************************************************************/
4420 /* async block device emulation */
4422 typedef struct BlockDriverAIOCBSync {
4423 BlockDriverAIOCB common;
4426 /* vector translation state */
4430 } BlockDriverAIOCBSync;
4432 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4434 BlockDriverAIOCBSync *acb =
4435 container_of(blockacb, BlockDriverAIOCBSync, common);
4436 qemu_bh_delete(acb->bh);
4438 qemu_aio_release(acb);
4441 static const AIOCBInfo bdrv_em_aiocb_info = {
4442 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4443 .cancel = bdrv_aio_cancel_em,
4446 static void bdrv_aio_bh_cb(void *opaque)
4448 BlockDriverAIOCBSync *acb = opaque;
4451 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4452 qemu_vfree(acb->bounce);
4453 acb->common.cb(acb->common.opaque, acb->ret);
4454 qemu_bh_delete(acb->bh);
4456 qemu_aio_release(acb);
4459 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4463 BlockDriverCompletionFunc *cb,
4468 BlockDriverAIOCBSync *acb;
4470 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4471 acb->is_write = is_write;
4473 acb->bounce = qemu_blockalign(bs, qiov->size);
4474 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
4477 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
4478 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4480 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4483 qemu_bh_schedule(acb->bh);
4485 return &acb->common;
4488 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4489 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4490 BlockDriverCompletionFunc *cb, void *opaque)
4492 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4495 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4496 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4497 BlockDriverCompletionFunc *cb, void *opaque)
4499 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4503 typedef struct BlockDriverAIOCBCoroutine {
4504 BlockDriverAIOCB common;
4509 } BlockDriverAIOCBCoroutine;
4511 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4513 BlockDriverAIOCBCoroutine *acb =
4514 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4523 static const AIOCBInfo bdrv_em_co_aiocb_info = {
4524 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4525 .cancel = bdrv_aio_co_cancel_em,
4528 static void bdrv_co_em_bh(void *opaque)
4530 BlockDriverAIOCBCoroutine *acb = opaque;
4532 acb->common.cb(acb->common.opaque, acb->req.error);
4538 qemu_bh_delete(acb->bh);
4539 qemu_aio_release(acb);
4542 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4543 static void coroutine_fn bdrv_co_do_rw(void *opaque)
4545 BlockDriverAIOCBCoroutine *acb = opaque;
4546 BlockDriverState *bs = acb->common.bs;
4548 if (!acb->is_write) {
4549 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4550 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4552 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4553 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4556 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4557 qemu_bh_schedule(acb->bh);
4560 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4564 BdrvRequestFlags flags,
4565 BlockDriverCompletionFunc *cb,
4570 BlockDriverAIOCBCoroutine *acb;
4572 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4573 acb->req.sector = sector_num;
4574 acb->req.nb_sectors = nb_sectors;
4575 acb->req.qiov = qiov;
4576 acb->req.flags = flags;
4577 acb->is_write = is_write;
4580 co = qemu_coroutine_create(bdrv_co_do_rw);
4581 qemu_coroutine_enter(co, acb);
4583 return &acb->common;
4586 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4588 BlockDriverAIOCBCoroutine *acb = opaque;
4589 BlockDriverState *bs = acb->common.bs;
4591 acb->req.error = bdrv_co_flush(bs);
4592 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4593 qemu_bh_schedule(acb->bh);
4596 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4597 BlockDriverCompletionFunc *cb, void *opaque)
4599 trace_bdrv_aio_flush(bs, opaque);
4602 BlockDriverAIOCBCoroutine *acb;
4604 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4607 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4608 qemu_coroutine_enter(co, acb);
4610 return &acb->common;
4613 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4615 BlockDriverAIOCBCoroutine *acb = opaque;
4616 BlockDriverState *bs = acb->common.bs;
4618 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4619 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4620 qemu_bh_schedule(acb->bh);
4623 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4624 int64_t sector_num, int nb_sectors,
4625 BlockDriverCompletionFunc *cb, void *opaque)
4628 BlockDriverAIOCBCoroutine *acb;
4630 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4632 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4633 acb->req.sector = sector_num;
4634 acb->req.nb_sectors = nb_sectors;
4636 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4637 qemu_coroutine_enter(co, acb);
4639 return &acb->common;
4642 void bdrv_init(void)
4644 module_call_init(MODULE_INIT_BLOCK);
4647 void bdrv_init_with_whitelist(void)
4649 use_bdrv_whitelist = 1;
4653 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
4654 BlockDriverCompletionFunc *cb, void *opaque)
4656 BlockDriverAIOCB *acb;
4658 acb = g_slice_alloc(aiocb_info->aiocb_size);
4659 acb->aiocb_info = aiocb_info;
4662 acb->opaque = opaque;
4666 void qemu_aio_release(void *p)
4668 BlockDriverAIOCB *acb = p;
4669 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4672 /**************************************************************/
4673 /* Coroutine block device emulation */
4675 typedef struct CoroutineIOCompletion {
4676 Coroutine *coroutine;
4678 } CoroutineIOCompletion;
4680 static void bdrv_co_io_em_complete(void *opaque, int ret)
4682 CoroutineIOCompletion *co = opaque;
4685 qemu_coroutine_enter(co->coroutine, NULL);
4688 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4689 int nb_sectors, QEMUIOVector *iov,
4692 CoroutineIOCompletion co = {
4693 .coroutine = qemu_coroutine_self(),
4695 BlockDriverAIOCB *acb;
4698 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4699 bdrv_co_io_em_complete, &co);
4701 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4702 bdrv_co_io_em_complete, &co);
4705 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
4709 qemu_coroutine_yield();
4714 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4715 int64_t sector_num, int nb_sectors,
4718 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4721 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4722 int64_t sector_num, int nb_sectors,
4725 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4728 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
4730 RwCo *rwco = opaque;
4732 rwco->ret = bdrv_co_flush(rwco->bs);
4735 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4739 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
4743 /* Write back cached data to the OS even with cache=unsafe */
4744 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
4745 if (bs->drv->bdrv_co_flush_to_os) {
4746 ret = bs->drv->bdrv_co_flush_to_os(bs);
4752 /* But don't actually force it to the disk with cache=unsafe */
4753 if (bs->open_flags & BDRV_O_NO_FLUSH) {
4757 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
4758 if (bs->drv->bdrv_co_flush_to_disk) {
4759 ret = bs->drv->bdrv_co_flush_to_disk(bs);
4760 } else if (bs->drv->bdrv_aio_flush) {
4761 BlockDriverAIOCB *acb;
4762 CoroutineIOCompletion co = {
4763 .coroutine = qemu_coroutine_self(),
4766 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4770 qemu_coroutine_yield();
4775 * Some block drivers always operate in either writethrough or unsafe
4776 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4777 * know how the server works (because the behaviour is hardcoded or
4778 * depends on server-side configuration), so we can't ensure that
4779 * everything is safe on disk. Returning an error doesn't work because
4780 * that would break guests even if the server operates in writethrough
4783 * Let's hope the user knows what he's doing.
4791 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4792 * in the case of cache=unsafe, so there are no useless flushes.
4795 return bdrv_co_flush(bs->file);
4798 void bdrv_invalidate_cache(BlockDriverState *bs)
4800 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
4801 bs->drv->bdrv_invalidate_cache(bs);
4805 void bdrv_invalidate_cache_all(void)
4807 BlockDriverState *bs;
4809 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4810 bdrv_invalidate_cache(bs);
4814 void bdrv_clear_incoming_migration_all(void)
4816 BlockDriverState *bs;
4818 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4819 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4823 int bdrv_flush(BlockDriverState *bs)
4831 if (qemu_in_coroutine()) {
4832 /* Fast-path if already in coroutine context */
4833 bdrv_flush_co_entry(&rwco);
4835 co = qemu_coroutine_create(bdrv_flush_co_entry);
4836 qemu_coroutine_enter(co, &rwco);
4837 while (rwco.ret == NOT_DONE) {
4845 typedef struct DiscardCo {
4846 BlockDriverState *bs;
4851 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4853 DiscardCo *rwco = opaque;
4855 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4858 /* if no limit is specified in the BlockLimits use a default
4859 * of 32768 512-byte sectors (16 MiB) per request.
4861 #define MAX_DISCARD_DEFAULT 32768
4863 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4870 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4872 } else if (bs->read_only) {
4876 bdrv_reset_dirty(bs, sector_num, nb_sectors);
4878 /* Do nothing if disabled. */
4879 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4883 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
4887 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4888 while (nb_sectors > 0) {
4890 int num = nb_sectors;
4893 if (bs->bl.discard_alignment &&
4894 num >= bs->bl.discard_alignment &&
4895 sector_num % bs->bl.discard_alignment) {
4896 if (num > bs->bl.discard_alignment) {
4897 num = bs->bl.discard_alignment;
4899 num -= sector_num % bs->bl.discard_alignment;
4902 /* limit request size */
4903 if (num > max_discard) {
4907 if (bs->drv->bdrv_co_discard) {
4908 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4910 BlockDriverAIOCB *acb;
4911 CoroutineIOCompletion co = {
4912 .coroutine = qemu_coroutine_self(),
4915 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4916 bdrv_co_io_em_complete, &co);
4920 qemu_coroutine_yield();
4924 if (ret && ret != -ENOTSUP) {
4934 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4939 .sector_num = sector_num,
4940 .nb_sectors = nb_sectors,
4944 if (qemu_in_coroutine()) {
4945 /* Fast-path if already in coroutine context */
4946 bdrv_discard_co_entry(&rwco);
4948 co = qemu_coroutine_create(bdrv_discard_co_entry);
4949 qemu_coroutine_enter(co, &rwco);
4950 while (rwco.ret == NOT_DONE) {
4958 /**************************************************************/
4959 /* removable device support */
4962 * Return TRUE if the media is present
4964 int bdrv_is_inserted(BlockDriverState *bs)
4966 BlockDriver *drv = bs->drv;
4970 if (!drv->bdrv_is_inserted)
4972 return drv->bdrv_is_inserted(bs);
4976 * Return whether the media changed since the last call to this
4977 * function, or -ENOTSUP if we don't know. Most drivers don't know.
4979 int bdrv_media_changed(BlockDriverState *bs)
4981 BlockDriver *drv = bs->drv;
4983 if (drv && drv->bdrv_media_changed) {
4984 return drv->bdrv_media_changed(bs);
4990 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4992 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
4994 BlockDriver *drv = bs->drv;
4996 if (drv && drv->bdrv_eject) {
4997 drv->bdrv_eject(bs, eject_flag);
5000 if (bs->device_name[0] != '\0') {
5001 bdrv_emit_qmp_eject_event(bs, eject_flag);
5006 * Lock or unlock the media (if it is locked, the user won't be able
5007 * to eject it manually).
5009 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
5011 BlockDriver *drv = bs->drv;
5013 trace_bdrv_lock_medium(bs, locked);
5015 if (drv && drv->bdrv_lock_medium) {
5016 drv->bdrv_lock_medium(bs, locked);
5020 /* needed for generic scsi interface */
5022 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5024 BlockDriver *drv = bs->drv;
5026 if (drv && drv->bdrv_ioctl)
5027 return drv->bdrv_ioctl(bs, req, buf);
5031 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5032 unsigned long int req, void *buf,
5033 BlockDriverCompletionFunc *cb, void *opaque)
5035 BlockDriver *drv = bs->drv;
5037 if (drv && drv->bdrv_aio_ioctl)
5038 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5042 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
5044 bs->guest_block_size = align;
5047 void *qemu_blockalign(BlockDriverState *bs, size_t size)
5049 return qemu_memalign(bdrv_opt_mem_align(bs), size);
5053 * Check if all memory in this vector is sector aligned.
5055 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5058 size_t alignment = bdrv_opt_mem_align(bs);
5060 for (i = 0; i < qiov->niov; i++) {
5061 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5064 if (qiov->iov[i].iov_len % alignment) {
5072 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
5074 int64_t bitmap_size;
5075 BdrvDirtyBitmap *bitmap;
5077 assert((granularity & (granularity - 1)) == 0);
5079 granularity >>= BDRV_SECTOR_BITS;
5080 assert(granularity);
5081 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5082 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5083 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5084 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5088 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5090 BdrvDirtyBitmap *bm, *next;
5091 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5093 QLIST_REMOVE(bitmap, list);
5094 hbitmap_free(bitmap->bitmap);
5101 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5103 BdrvDirtyBitmap *bm;
5104 BlockDirtyInfoList *list = NULL;
5105 BlockDirtyInfoList **plist = &list;
5107 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5108 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5109 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5110 info->count = bdrv_get_dirty_count(bs, bm);
5112 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5113 entry->value = info;
5115 plist = &entry->next;
5121 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
5124 return hbitmap_get(bitmap->bitmap, sector);
5130 void bdrv_dirty_iter_init(BlockDriverState *bs,
5131 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
5133 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
5136 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5139 BdrvDirtyBitmap *bitmap;
5140 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5141 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5145 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5147 BdrvDirtyBitmap *bitmap;
5148 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5149 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5153 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5155 return hbitmap_count(bitmap->bitmap);
5158 /* Get a reference to bs */
5159 void bdrv_ref(BlockDriverState *bs)
5164 /* Release a previously grabbed reference to bs.
5165 * If after releasing, reference count is zero, the BlockDriverState is
5167 void bdrv_unref(BlockDriverState *bs)
5169 assert(bs->refcnt > 0);
5170 if (--bs->refcnt == 0) {
5175 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5177 assert(bs->in_use != in_use);
5178 bs->in_use = in_use;
5181 int bdrv_in_use(BlockDriverState *bs)
5186 void bdrv_iostatus_enable(BlockDriverState *bs)
5188 bs->iostatus_enabled = true;
5189 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5192 /* The I/O status is only enabled if the drive explicitly
5193 * enables it _and_ the VM is configured to stop on errors */
5194 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5196 return (bs->iostatus_enabled &&
5197 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5198 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5199 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
5202 void bdrv_iostatus_disable(BlockDriverState *bs)
5204 bs->iostatus_enabled = false;
5207 void bdrv_iostatus_reset(BlockDriverState *bs)
5209 if (bdrv_iostatus_is_enabled(bs)) {
5210 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5212 block_job_iostatus_reset(bs->job);
5217 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5219 assert(bdrv_iostatus_is_enabled(bs));
5220 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
5221 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5222 BLOCK_DEVICE_IO_STATUS_FAILED;
5227 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5228 enum BlockAcctType type)
5230 assert(type < BDRV_MAX_IOTYPE);
5232 cookie->bytes = bytes;
5233 cookie->start_time_ns = get_clock();
5234 cookie->type = type;
5238 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5240 assert(cookie->type < BDRV_MAX_IOTYPE);
5242 bs->nr_bytes[cookie->type] += cookie->bytes;
5243 bs->nr_ops[cookie->type]++;
5244 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
5247 void bdrv_img_create(const char *filename, const char *fmt,
5248 const char *base_filename, const char *base_fmt,
5249 char *options, uint64_t img_size, int flags,
5250 Error **errp, bool quiet)
5252 QEMUOptionParameter *param = NULL, *create_options = NULL;
5253 QEMUOptionParameter *backing_fmt, *backing_file, *size;
5254 BlockDriver *drv, *proto_drv;
5255 BlockDriver *backing_drv = NULL;
5256 Error *local_err = NULL;
5259 /* Find driver and parse its options */
5260 drv = bdrv_find_format(fmt);
5262 error_setg(errp, "Unknown file format '%s'", fmt);
5266 proto_drv = bdrv_find_protocol(filename, true);
5268 error_setg(errp, "Unknown protocol '%s'", filename);
5272 create_options = append_option_parameters(create_options,
5273 drv->create_options);
5274 create_options = append_option_parameters(create_options,
5275 proto_drv->create_options);
5277 /* Create parameter list with default values */
5278 param = parse_option_parameters("", create_options, param);
5280 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5282 /* Parse -o options */
5284 param = parse_option_parameters(options, create_options, param);
5285 if (param == NULL) {
5286 error_setg(errp, "Invalid options for file format '%s'.", fmt);
5291 if (base_filename) {
5292 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5294 error_setg(errp, "Backing file not supported for file format '%s'",
5301 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
5302 error_setg(errp, "Backing file format not supported for file "
5303 "format '%s'", fmt);
5308 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5309 if (backing_file && backing_file->value.s) {
5310 if (!strcmp(filename, backing_file->value.s)) {
5311 error_setg(errp, "Error: Trying to create an image with the "
5312 "same filename as the backing file");
5317 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5318 if (backing_fmt && backing_fmt->value.s) {
5319 backing_drv = bdrv_find_format(backing_fmt->value.s);
5321 error_setg(errp, "Unknown backing file format '%s'",
5322 backing_fmt->value.s);
5327 // The size for the image must always be specified, with one exception:
5328 // If we are using a backing file, we can obtain the size from there
5329 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5330 if (size && size->value.n == -1) {
5331 if (backing_file && backing_file->value.s) {
5332 BlockDriverState *bs;
5337 /* backing files always opened read-only */
5339 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
5342 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
5343 backing_drv, &local_err);
5345 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5346 backing_file->value.s,
5347 error_get_pretty(local_err));
5348 error_free(local_err);
5352 bdrv_get_geometry(bs, &size);
5355 snprintf(buf, sizeof(buf), "%" PRId64, size);
5356 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
5360 error_setg(errp, "Image creation needs a size parameter");
5366 printf("Formatting '%s', fmt=%s ", filename, fmt);
5367 print_option_parameters(param);
5370 ret = bdrv_create(drv, filename, param, &local_err);
5371 if (ret == -EFBIG) {
5372 /* This is generally a better message than whatever the driver would
5373 * deliver (especially because of the cluster_size_hint), since that
5374 * is most probably not much different from "image too large". */
5375 const char *cluster_size_hint = "";
5376 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5377 cluster_size_hint = " (try using a larger cluster size)";
5379 error_setg(errp, "The image size is too large for file format '%s'"
5380 "%s", fmt, cluster_size_hint);
5381 error_free(local_err);
5386 free_option_parameters(create_options);
5387 free_option_parameters(param);
5390 error_propagate(errp, local_err);
5394 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5396 /* Currently BlockDriverState always uses the main loop AioContext */
5397 return qemu_get_aio_context();
5400 void bdrv_add_before_write_notifier(BlockDriverState *bs,
5401 NotifierWithReturn *notifier)
5403 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5406 int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5408 if (bs->drv->bdrv_amend_options == NULL) {
5411 return bs->drv->bdrv_amend_options(bs, options);
5414 /* Used to recurse on single child block filters.
5415 * Single child block filter will store their child in bs->file.
5417 bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
5418 BlockDriverState *candidate)
5424 if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
5425 if (bs == candidate) {
5432 if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
5440 return bdrv_recurse_is_first_non_filter(bs->file, candidate);
5443 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5444 BlockDriverState *candidate)
5446 if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
5447 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5450 return bdrv_generic_is_first_non_filter(bs, candidate);
5453 /* This function checks if the candidate is the first non filter bs down it's
5454 * bs chain. Since we don't have pointers to parents it explore all bs chains
5455 * from the top. Some filters can choose not to pass down the recursion.
5457 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5459 BlockDriverState *bs;
5461 /* walk down the bs forest recursively */
5462 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5465 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
5467 /* candidate is the first non filter */