in dm-writecache.c [2204:2647]
static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
struct dm_writecache *wc;
struct dm_arg_set as;
const char *string;
unsigned opt_params;
size_t offset, data_size;
int i, r;
char dummy;
int high_wm_percent = HIGH_WATERMARK;
int low_wm_percent = LOW_WATERMARK;
uint64_t x;
struct wc_memory_superblock s;
static struct dm_arg _args[] = {
{0, 18, "Invalid number of feature args"},
};
as.argc = argc;
as.argv = argv;
wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL);
if (!wc) {
ti->error = "Cannot allocate writecache structure";
r = -ENOMEM;
goto bad;
}
ti->private = wc;
wc->ti = ti;
mutex_init(&wc->lock);
wc->max_age = MAX_AGE_UNSPECIFIED;
writecache_poison_lists(wc);
init_waitqueue_head(&wc->freelist_wait);
timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0);
timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0);
for (i = 0; i < 2; i++) {
atomic_set(&wc->bio_in_progress[i], 0);
init_waitqueue_head(&wc->bio_in_progress_wait[i]);
}
wc->dm_io = dm_io_client_create();
if (IS_ERR(wc->dm_io)) {
r = PTR_ERR(wc->dm_io);
ti->error = "Unable to allocate dm-io client";
wc->dm_io = NULL;
goto bad;
}
wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1);
if (!wc->writeback_wq) {
r = -ENOMEM;
ti->error = "Could not allocate writeback workqueue";
goto bad;
}
INIT_WORK(&wc->writeback_work, writecache_writeback);
INIT_WORK(&wc->flush_work, writecache_flush_work);
dm_iot_init(&wc->iot);
raw_spin_lock_init(&wc->endio_list_lock);
INIT_LIST_HEAD(&wc->endio_list);
wc->endio_thread = kthread_run(writecache_endio_thread, wc, "writecache_endio");
if (IS_ERR(wc->endio_thread)) {
r = PTR_ERR(wc->endio_thread);
wc->endio_thread = NULL;
ti->error = "Couldn't spawn endio thread";
goto bad;
}
/*
* Parse the mode (pmem or ssd)
*/
string = dm_shift_arg(&as);
if (!string)
goto bad_arguments;
if (!strcasecmp(string, "s")) {
wc->pmem_mode = false;
} else if (!strcasecmp(string, "p")) {
#ifdef DM_WRITECACHE_HAS_PMEM
wc->pmem_mode = true;
wc->writeback_fua = true;
#else
/*
* If the architecture doesn't support persistent memory or
* the kernel doesn't support any DAX drivers, this driver can
* only be used in SSD-only mode.
*/
r = -EOPNOTSUPP;
ti->error = "Persistent memory or DAX not supported on this system";
goto bad;
#endif
} else {
goto bad_arguments;
}
if (WC_MODE_PMEM(wc)) {
r = bioset_init(&wc->bio_set, BIO_POOL_SIZE,
offsetof(struct writeback_struct, bio),
BIOSET_NEED_BVECS);
if (r) {
ti->error = "Could not allocate bio set";
goto bad;
}
} else {
wc->pause = PAUSE_WRITEBACK;
r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
if (r) {
ti->error = "Could not allocate mempool";
goto bad;
}
}
/*
* Parse the origin data device
*/
string = dm_shift_arg(&as);
if (!string)
goto bad_arguments;
r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev);
if (r) {
ti->error = "Origin data device lookup failed";
goto bad;
}
/*
* Parse cache data device (be it pmem or ssd)
*/
string = dm_shift_arg(&as);
if (!string)
goto bad_arguments;
r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev);
if (r) {
ti->error = "Cache data device lookup failed";
goto bad;
}
wc->memory_map_size = bdev_nr_bytes(wc->ssd_dev->bdev);
/*
* Parse the cache block size
*/
string = dm_shift_arg(&as);
if (!string)
goto bad_arguments;
if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 ||
wc->block_size < 512 || wc->block_size > PAGE_SIZE ||
(wc->block_size & (wc->block_size - 1))) {
r = -EINVAL;
ti->error = "Invalid block size";
goto bad;
}
if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) ||
wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) {
r = -EINVAL;
ti->error = "Block size is smaller than device logical block size";
goto bad;
}
wc->block_size_bits = __ffs(wc->block_size);
wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM;
wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC);
/*
* Parse optional arguments
*/
r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
if (r)
goto bad;
while (opt_params) {
string = dm_shift_arg(&as), opt_params--;
if (!strcasecmp(string, "start_sector") && opt_params >= 1) {
unsigned long long start_sector;
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
goto invalid_optional;
wc->start_sector = start_sector;
wc->start_sector_set = true;
if (wc->start_sector != start_sector ||
wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT)
goto invalid_optional;
} else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1)
goto invalid_optional;
if (high_wm_percent < 0 || high_wm_percent > 100)
goto invalid_optional;
wc->high_wm_percent_value = high_wm_percent;
wc->high_wm_percent_set = true;
} else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1)
goto invalid_optional;
if (low_wm_percent < 0 || low_wm_percent > 100)
goto invalid_optional;
wc->low_wm_percent_value = low_wm_percent;
wc->low_wm_percent_set = true;
} else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1)
goto invalid_optional;
wc->max_writeback_jobs_set = true;
} else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) {
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1)
goto invalid_optional;
wc->autocommit_blocks_set = true;
} else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) {
unsigned autocommit_msecs;
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1)
goto invalid_optional;
if (autocommit_msecs > 3600000)
goto invalid_optional;
wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
wc->autocommit_time_value = autocommit_msecs;
wc->autocommit_time_set = true;
} else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
unsigned max_age_msecs;
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1)
goto invalid_optional;
if (max_age_msecs > 86400000)
goto invalid_optional;
wc->max_age = msecs_to_jiffies(max_age_msecs);
wc->max_age_set = true;
wc->max_age_value = max_age_msecs;
} else if (!strcasecmp(string, "cleaner")) {
wc->cleaner_set = true;
wc->cleaner = true;
} else if (!strcasecmp(string, "fua")) {
if (WC_MODE_PMEM(wc)) {
wc->writeback_fua = true;
wc->writeback_fua_set = true;
} else goto invalid_optional;
} else if (!strcasecmp(string, "nofua")) {
if (WC_MODE_PMEM(wc)) {
wc->writeback_fua = false;
wc->writeback_fua_set = true;
} else goto invalid_optional;
} else if (!strcasecmp(string, "metadata_only")) {
wc->metadata_only = true;
} else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
unsigned pause_msecs;
if (WC_MODE_PMEM(wc))
goto invalid_optional;
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
goto invalid_optional;
if (pause_msecs > 60000)
goto invalid_optional;
wc->pause = msecs_to_jiffies(pause_msecs);
wc->pause_set = true;
wc->pause_value = pause_msecs;
} else {
invalid_optional:
r = -EINVAL;
ti->error = "Invalid optional argument";
goto bad;
}
}
if (high_wm_percent < low_wm_percent) {
r = -EINVAL;
ti->error = "High watermark must be greater than or equal to low watermark";
goto bad;
}
if (WC_MODE_PMEM(wc)) {
if (!dax_synchronous(wc->ssd_dev->dax_dev)) {
r = -EOPNOTSUPP;
ti->error = "Asynchronous persistent memory not supported as pmem cache";
goto bad;
}
r = persistent_memory_claim(wc);
if (r) {
ti->error = "Unable to map persistent memory for cache";
goto bad;
}
} else {
size_t n_blocks, n_metadata_blocks;
uint64_t n_bitmap_bits;
wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT;
bio_list_init(&wc->flush_list);
wc->flush_thread = kthread_run(writecache_flush_thread, wc, "dm_writecache_flush");
if (IS_ERR(wc->flush_thread)) {
r = PTR_ERR(wc->flush_thread);
wc->flush_thread = NULL;
ti->error = "Couldn't spawn flush thread";
goto bad;
}
r = calculate_memory_size(wc->memory_map_size, wc->block_size,
&n_blocks, &n_metadata_blocks);
if (r) {
ti->error = "Invalid device size";
goto bad;
}
n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) +
BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY;
/* this is limitation of test_bit functions */
if (n_bitmap_bits > 1U << 31) {
r = -EFBIG;
ti->error = "Invalid device size";
goto bad;
}
wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits);
if (!wc->memory_map) {
r = -ENOMEM;
ti->error = "Unable to allocate memory for metadata";
goto bad;
}
wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(wc->dm_kcopyd)) {
r = PTR_ERR(wc->dm_kcopyd);
ti->error = "Unable to allocate dm-kcopyd client";
wc->dm_kcopyd = NULL;
goto bad;
}
wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT);
wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) /
BITS_PER_LONG * sizeof(unsigned long);
wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size);
if (!wc->dirty_bitmap) {
r = -ENOMEM;
ti->error = "Unable to allocate dirty bitmap";
goto bad;
}
r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT);
if (r) {
ti->error = "Unable to read first block of metadata";
goto bad;
}
}
r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
}
if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) {
r = init_memory(wc);
if (r) {
ti->error = "Unable to initialize device";
goto bad;
}
r = copy_mc_to_kernel(&s, sb(wc),
sizeof(struct wc_memory_superblock));
if (r) {
ti->error = "Hardware memory error when reading superblock";
goto bad;
}
}
if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) {
ti->error = "Invalid magic in the superblock";
r = -EINVAL;
goto bad;
}
if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) {
ti->error = "Invalid version in the superblock";
r = -EINVAL;
goto bad;
}
if (le32_to_cpu(s.block_size) != wc->block_size) {
ti->error = "Block size does not match superblock";
r = -EINVAL;
goto bad;
}
wc->n_blocks = le64_to_cpu(s.n_blocks);
offset = wc->n_blocks * sizeof(struct wc_memory_entry);
if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) {
overflow:
ti->error = "Overflow in size calculation";
r = -EINVAL;
goto bad;
}
offset += sizeof(struct wc_memory_superblock);
if (offset < sizeof(struct wc_memory_superblock))
goto overflow;
offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1);
data_size = wc->n_blocks * (size_t)wc->block_size;
if (!offset || (data_size / wc->block_size != wc->n_blocks) ||
(offset + data_size < offset))
goto overflow;
if (offset + data_size > wc->memory_map_size) {
ti->error = "Memory area is too small";
r = -EINVAL;
goto bad;
}
wc->metadata_sectors = offset >> SECTOR_SHIFT;
wc->block_start = (char *)sb(wc) + offset;
x = (uint64_t)wc->n_blocks * (100 - high_wm_percent);
x += 50;
do_div(x, 100);
wc->freelist_high_watermark = x;
x = (uint64_t)wc->n_blocks * (100 - low_wm_percent);
x += 50;
do_div(x, 100);
wc->freelist_low_watermark = x;
if (wc->cleaner)
activate_cleaner(wc);
r = writecache_alloc_entries(wc);
if (r) {
ti->error = "Cannot allocate memory";
goto bad;
}
ti->num_flush_bios = WC_MODE_PMEM(wc) ? 1 : 2;
ti->flush_supported = true;
ti->num_discard_bios = 1;
if (WC_MODE_PMEM(wc))
persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
return 0;
bad_arguments:
r = -EINVAL;
ti->error = "Bad arguments";
bad:
writecache_dtr(ti);
return r;
}