in block/qcow2.c [1297:1862]
static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
int flags, Error **errp)
{
ERRP_GUARD();
BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
int ret = 0;
QCowHeader header;
uint64_t ext_end;
uint64_t l1_vm_state_index;
bool update_header = false;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read qcow2 header");
goto fail;
}
header.magic = be32_to_cpu(header.magic);
header.version = be32_to_cpu(header.version);
header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
header.backing_file_size = be32_to_cpu(header.backing_file_size);
header.size = be64_to_cpu(header.size);
header.cluster_bits = be32_to_cpu(header.cluster_bits);
header.crypt_method = be32_to_cpu(header.crypt_method);
header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
header.l1_size = be32_to_cpu(header.l1_size);
header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
header.refcount_table_clusters =
be32_to_cpu(header.refcount_table_clusters);
header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
if (header.magic != QCOW_MAGIC) {
error_setg(errp, "Image is not in qcow2 format");
ret = -EINVAL;
goto fail;
}
if (header.version < 2 || header.version > 3) {
error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
s->qcow_version = header.version;
/* Initialise cluster size */
if (header.cluster_bits < MIN_CLUSTER_BITS ||
header.cluster_bits > MAX_CLUSTER_BITS) {
error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
header.cluster_bits);
ret = -EINVAL;
goto fail;
}
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
/* Initialise version 3 header fields */
if (header.version == 2) {
header.incompatible_features = 0;
header.compatible_features = 0;
header.autoclear_features = 0;
header.refcount_order = 4;
header.header_length = 72;
} else {
header.incompatible_features =
be64_to_cpu(header.incompatible_features);
header.compatible_features = be64_to_cpu(header.compatible_features);
header.autoclear_features = be64_to_cpu(header.autoclear_features);
header.refcount_order = be32_to_cpu(header.refcount_order);
header.header_length = be32_to_cpu(header.header_length);
if (header.header_length < 104) {
error_setg(errp, "qcow2 header too short");
ret = -EINVAL;
goto fail;
}
}
if (header.header_length > s->cluster_size) {
error_setg(errp, "qcow2 header exceeds cluster size");
ret = -EINVAL;
goto fail;
}
if (header.header_length > sizeof(header)) {
s->unknown_header_fields_size = header.header_length - sizeof(header);
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
s->unknown_header_fields_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
"fields");
goto fail;
}
}
if (header.backing_file_offset > s->cluster_size) {
error_setg(errp, "Invalid backing file offset");
ret = -EINVAL;
goto fail;
}
if (header.backing_file_offset) {
ext_end = header.backing_file_offset;
} else {
ext_end = 1 << header.cluster_bits;
}
/* Handle feature bits */
s->incompatible_features = header.incompatible_features;
s->compatible_features = header.compatible_features;
s->autoclear_features = header.autoclear_features;
/*
* Handle compression type
* Older qcow2 images don't contain the compression type header.
* Distinguish them by the header length and use
* the only valid (default) compression type in that case
*/
if (header.header_length > offsetof(QCowHeader, compression_type)) {
s->compression_type = header.compression_type;
} else {
s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
}
ret = validate_compression_type(s, errp);
if (ret) {
goto fail;
}
if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
void *feature_table = NULL;
qcow2_read_extensions(bs, header.header_length, ext_end,
&feature_table, flags, NULL, NULL);
report_unsupported_feature(errp, feature_table,
s->incompatible_features &
~QCOW2_INCOMPAT_MASK);
ret = -ENOTSUP;
g_free(feature_table);
goto fail;
}
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
/* Corrupt images may not be written to unless they are being repaired
*/
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
"read/write");
ret = -EACCES;
goto fail;
}
}
s->subclusters_per_cluster =
has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
s->subcluster_size = s->cluster_size / s->subclusters_per_cluster;
s->subcluster_bits = ctz32(s->subcluster_size);
if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) {
error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size);
ret = -EINVAL;
goto fail;
}
/* Check support for various header values */
if (header.refcount_order > 6) {
error_setg(errp, "Reference count entry width too large; may not "
"exceed 64 bits");
ret = -EINVAL;
goto fail;
}
s->refcount_order = header.refcount_order;
s->refcount_bits = 1 << s->refcount_order;
s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
s->refcount_max += s->refcount_max - 1;
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
s->crypt_method_header == QCOW_CRYPT_AES) {
error_setg(errp,
"Use of AES-CBC encrypted qcow2 images is no longer "
"supported in system emulators");
error_append_hint(errp,
"You can use 'qemu-img convert' to convert your "
"image to an alternative supported format, such "
"as unencrypted qcow2, or raw with the LUKS "
"format instead.\n");
ret = -ENOSYS;
goto fail;
}
if (s->crypt_method_header == QCOW_CRYPT_AES) {
s->crypt_physical_offset = false;
} else {
/* Assuming LUKS and any future crypt methods we
* add will all use physical offsets, due to the
* fact that the alternative is insecure... */
s->crypt_physical_offset = true;
}
bs->encrypted = true;
}
s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s));
s->l2_size = 1 << s->l2_bits;
/* 2^(s->refcount_order - 3) is the refcount width in bytes */
s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
s->refcount_block_size = 1 << s->refcount_block_bits;
bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
s->csize_shift = (62 - (s->cluster_bits - 8));
s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
s->refcount_table_offset = header.refcount_table_offset;
s->refcount_table_size =
header.refcount_table_clusters << (s->cluster_bits - 3);
if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
error_setg(errp, "Image does not contain a reference count table");
ret = -EINVAL;
goto fail;
}
ret = qcow2_validate_table(bs, s->refcount_table_offset,
header.refcount_table_clusters,
s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
"Reference count table", errp);
if (ret < 0) {
goto fail;
}
if (!(flags & BDRV_O_CHECK)) {
/*
* The total size in bytes of the snapshot table is checked in
* qcow2_read_snapshots() because the size of each snapshot is
* variable and we don't know it yet.
* Here we only check the offset and number of snapshots.
*/
ret = qcow2_validate_table(bs, header.snapshots_offset,
header.nb_snapshots,
sizeof(QCowSnapshotHeader),
sizeof(QCowSnapshotHeader) *
QCOW_MAX_SNAPSHOTS,
"Snapshot table", errp);
if (ret < 0) {
goto fail;
}
}
/* read the level 1 table */
ret = qcow2_validate_table(bs, header.l1_table_offset,
header.l1_size, L1E_SIZE,
QCOW_MAX_L1_SIZE, "Active L1 table", errp);
if (ret < 0) {
goto fail;
}
s->l1_size = header.l1_size;
s->l1_table_offset = header.l1_table_offset;
l1_vm_state_index = size_to_l1(s, header.size);
if (l1_vm_state_index > INT_MAX) {
error_setg(errp, "Image is too big");
ret = -EFBIG;
goto fail;
}
s->l1_vm_state_index = l1_vm_state_index;
/* the L1 table must contain at least enough entries to put
header.size bytes */
if (s->l1_size < s->l1_vm_state_index) {
error_setg(errp, "L1 table is too small");
ret = -EINVAL;
goto fail;
}
if (s->l1_size > 0) {
s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE);
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * L1E_SIZE);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read L1 table");
goto fail;
}
for(i = 0;i < s->l1_size; i++) {
s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
}
}
/* Parse driver-specific options */
ret = qcow2_update_options(bs, options, flags, errp);
if (ret < 0) {
goto fail;
}
s->flags = flags;
ret = qcow2_refcount_init(bs);
if (ret != 0) {
error_setg_errno(errp, -ret, "Could not initialize refcount handling");
goto fail;
}
QLIST_INIT(&s->cluster_allocs);
QTAILQ_INIT(&s->discards);
/* read qcow2 extensions */
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
flags, &update_header, errp)) {
ret = -EINVAL;
goto fail;
}
/* Open external data file */
s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
&child_of_bds, BDRV_CHILD_DATA,
true, errp);
if (*errp) {
ret = -EINVAL;
goto fail;
}
if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
if (!s->data_file && s->image_data_file) {
s->data_file = bdrv_open_child(s->image_data_file, options,
"data-file", bs, &child_of_bds,
BDRV_CHILD_DATA, false, errp);
if (!s->data_file) {
ret = -EINVAL;
goto fail;
}
}
if (!s->data_file) {
error_setg(errp, "'data-file' is required for this image");
ret = -EINVAL;
goto fail;
}
/* No data here */
bs->file->role &= ~BDRV_CHILD_DATA;
/* Must succeed because we have given up permissions if anything */
bdrv_child_refresh_perms(bs, bs->file, &error_abort);
} else {
if (s->data_file) {
error_setg(errp, "'data-file' can only be set for images with an "
"external data file");
ret = -EINVAL;
goto fail;
}
s->data_file = bs->file;
if (data_file_is_raw(bs)) {
error_setg(errp, "data-file-raw requires a data file");
ret = -EINVAL;
goto fail;
}
}
/* qcow2_read_extension may have set up the crypto context
* if the crypt method needs a header region, some methods
* don't need header extensions, so must check here
*/
if (s->crypt_method_header && !s->crypto) {
if (s->crypt_method_header == QCOW_CRYPT_AES) {
unsigned int cflags = 0;
if (flags & BDRV_O_NO_IO) {
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
}
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
NULL, NULL, cflags,
QCOW2_MAX_THREADS, errp);
if (!s->crypto) {
ret = -EINVAL;
goto fail;
}
} else if (!(flags & BDRV_O_NO_IO)) {
error_setg(errp, "Missing CRYPTO header for crypt method %d",
s->crypt_method_header);
ret = -EINVAL;
goto fail;
}
}
/* read the backing file name */
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
len >= sizeof(bs->backing_file)) {
error_setg(errp, "Backing file name too long");
ret = -EINVAL;
goto fail;
}
ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->auto_backing_file, len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read backing file name");
goto fail;
}
bs->auto_backing_file[len] = '\0';
pstrcpy(bs->backing_file, sizeof(bs->backing_file),
bs->auto_backing_file);
s->image_backing_file = g_strdup(bs->auto_backing_file);
}
/*
* Internal snapshots; skip reading them in check mode, because
* we do not need them then, and we do not want to abort because
* of a broken table.
*/
if (!(flags & BDRV_O_CHECK)) {
s->snapshots_offset = header.snapshots_offset;
s->nb_snapshots = header.nb_snapshots;
ret = qcow2_read_snapshots(bs, errp);
if (ret < 0) {
goto fail;
}
}
/* Clear unknown autoclear feature bits */
update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
update_header = update_header && bdrv_is_writable(bs);
if (update_header) {
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
}
/* == Handle persistent dirty bitmaps ==
*
* We want load dirty bitmaps in three cases:
*
* 1. Normal open of the disk in active mode, not related to invalidation
* after migration.
*
* 2. Invalidation of the target vm after pre-copy phase of migration, if
* bitmaps are _not_ migrating through migration channel, i.e.
* 'dirty-bitmaps' capability is disabled.
*
* 3. Invalidation of source vm after failed or canceled migration.
* This is a very interesting case. There are two possible types of
* bitmaps:
*
* A. Stored on inactivation and removed. They should be loaded from the
* image.
*
* B. Not stored: not-persistent bitmaps and bitmaps, migrated through
* the migration channel (with dirty-bitmaps capability).
*
* On the other hand, there are two possible sub-cases:
*
* 3.1 disk was changed by somebody else while were inactive. In this
* case all in-RAM dirty bitmaps (both persistent and not) are
* definitely invalid. And we don't have any method to determine
* this.
*
* Simple and safe thing is to just drop all the bitmaps of type B on
* inactivation. But in this case we lose bitmaps in valid 4.2 case.
*
* On the other hand, resuming source vm, if disk was already changed
* is a bad thing anyway: not only bitmaps, the whole vm state is
* out of sync with disk.
*
* This means, that user or management tool, who for some reason
* decided to resume source vm, after disk was already changed by
* target vm, should at least drop all dirty bitmaps by hand.
*
* So, we can ignore this case for now, but TODO: "generation"
* extension for qcow2, to determine, that image was changed after
* last inactivation. And if it is changed, we will drop (or at least
* mark as 'invalid' all the bitmaps of type B, both persistent
* and not).
*
* 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
* to disk ('dirty-bitmaps' capability disabled), or not saved
* ('dirty-bitmaps' capability enabled), but we don't need to care
* of: let's load bitmaps as always: stored bitmaps will be loaded,
* and not stored has flag IN_USE=1 in the image and will be skipped
* on loading.
*
* One remaining possible case when we don't want load bitmaps:
*
* 4. Open disk in inactive mode in target vm (bitmaps are migrating or
* will be loaded on invalidation, no needs try loading them before)
*/
if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
/* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
bool header_updated;
if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) {
ret = -EINVAL;
goto fail;
}
update_header = update_header && !header_updated;
}
if (update_header) {
ret = qcow2_update_header(bs);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update qcow2 header");
goto fail;
}
}
bs->supported_zero_flags = header.version >= 3 ?
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
/* Repair image if dirty */
if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
BdrvCheckResult result = {0};
ret = qcow2_co_check_locked(bs, &result,
BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
if (ret < 0 || result.check_errors) {
if (ret >= 0) {
ret = -EIO;
}
error_setg_errno(errp, -ret, "Could not repair dirty image");
goto fail;
}
}
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
qcow2_check_refcounts(bs, &result, 0);
}
#endif
qemu_co_queue_init(&s->thread_task_queue);
return ret;
fail:
g_free(s->image_data_file);
if (has_data_file(bs)) {
bdrv_unref_child(bs, s->data_file);
s->data_file = NULL;
}
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
cache_clean_timer_del(bs);
if (s->l2_table_cache) {
qcow2_cache_destroy(s->l2_table_cache);
}
if (s->refcount_block_cache) {
qcow2_cache_destroy(s->refcount_block_cache);
}
qcrypto_block_free(s->crypto);
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
return ret;
}