[Devel] [PATCH RH8 37/61] ploop: Convert COW
Kirill Tkhai
ktkhai at virtuozzo.com
Fri May 14 18:57:55 MSK 2021
Make COW proceed via pio
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
drivers/md/dm-ploop-cmd.c | 54 ++++++-------
drivers/md/dm-ploop-map.c | 180 +++++++++++++++++++++---------------------
drivers/md/dm-ploop-target.c | 2
drivers/md/dm-ploop.h | 22 ++---
4 files changed, 124 insertions(+), 134 deletions(-)
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index fc106204e654..ecea3040b1cb 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -197,20 +197,20 @@ static unsigned int ploop_find_bat_entry(struct ploop *ploop,
return cluster;
}
-void bio_prepare_offsets(struct ploop *ploop, struct bio *bio,
+void pio_prepare_offsets(struct ploop *ploop, struct pio *pio,
unsigned int cluster)
{
unsigned int cluster_log = ploop->cluster_log;
int i, nr_pages = nr_pages_in_cluster(ploop);
- bio->bi_vcnt = nr_pages;
+ pio->bi_vcnt = nr_pages;
for (i = 0; i < nr_pages; i++) {
- bio->bi_io_vec[i].bv_offset = 0;
- bio->bi_io_vec[i].bv_len = PAGE_SIZE;
+ pio->bi_io_vec[i].bv_offset = 0;
+ pio->bi_io_vec[i].bv_len = PAGE_SIZE;
}
- bio->bi_iter.bi_sector = cluster << cluster_log;
- bio->bi_iter.bi_size = 1 << (cluster_log + 9);
+ pio->bi_iter.bi_sector = cluster << cluster_log;
+ pio->bi_iter.bi_size = 1 << (cluster_log + 9);
}
static int rw_pages_sync(int rw, struct file *file, u64 page_id, void *data)
@@ -445,41 +445,41 @@ static void process_resize_cmd(struct ploop *ploop, struct ploop_index_wb *piwb,
complete(&cmd->comp); /* Last touch of cmd memory */
}
-struct bio *alloc_bio_with_pages(struct ploop *ploop)
+struct pio *alloc_pio_with_pages(struct ploop *ploop)
{
unsigned int cluster_log = ploop->cluster_log;
int i, nr_pages = nr_pages_in_cluster(ploop);
- struct bio *bio;
+ struct pio *pio;
+ u32 size;
- if (nr_pages <= BIO_MAX_PAGES)
- bio = bio_alloc(GFP_NOIO, nr_pages);
- else
- bio = bio_kmalloc(GFP_NOIO, nr_pages);
- if (!bio)
+ size = sizeof(*pio) + sizeof(*pio->bi_io_vec) * nr_pages;
+ pio = kmalloc(size, GFP_NOIO);
+ if (!pio)
return NULL;
+ pio->bi_io_vec = (void *)(pio + 1);
for (i = 0; i < nr_pages; i++) {
- bio->bi_io_vec[i].bv_page = alloc_page(GFP_NOIO);
- if (!bio->bi_io_vec[i].bv_page)
+ pio->bi_io_vec[i].bv_page = alloc_page(GFP_NOIO);
+ if (!pio->bi_io_vec[i].bv_page)
goto err;
- bio->bi_io_vec[i].bv_offset = 0;
- bio->bi_io_vec[i].bv_len = PAGE_SIZE;
+ pio->bi_io_vec[i].bv_offset = 0;
+ pio->bi_io_vec[i].bv_len = PAGE_SIZE;
}
- bio->bi_vcnt = nr_pages;
- bio->bi_iter.bi_size = 1 << (cluster_log + 9);
+ pio->bi_vcnt = nr_pages;
+ pio->bi_iter.bi_size = 1 << (cluster_log + 9);
- return bio;
+ return pio;
err:
while (i-- > 0)
- put_page(bio->bi_io_vec[i].bv_page);
- bio_put(bio);
+ put_page(pio->bi_io_vec[i].bv_page);
+ kfree(pio);
return NULL;
}
-void free_bio_with_pages(struct ploop *ploop, struct bio *bio)
+void free_pio_with_pages(struct ploop *ploop, struct pio *pio)
{
- int i, nr_pages = bio->bi_vcnt;
+ int i, nr_pages = pio->bi_vcnt;
struct page *page;
/*
@@ -489,11 +489,11 @@ void free_bio_with_pages(struct ploop *ploop, struct bio *bio)
WARN_ON_ONCE(nr_pages != nr_pages_in_cluster(ploop));
for (i = 0; i < nr_pages; i++) {
- page = bio->bi_io_vec[i].bv_page;
+ page = pio->bi_io_vec[i].bv_page;
put_page(page);
}
- bio_put(bio);
+ kfree(pio);
}
/* @new_size is in sectors */
@@ -1516,7 +1516,7 @@ static bool ploop_has_pending_activity(struct ploop *ploop)
has = ploop->deferred_cmd;
has |= !list_empty(&ploop->deferred_pios);
has |= !list_empty(&ploop->discard_pios);
- has |= !bio_list_empty(&ploop->delta_cow_action_list);
+ has |= !list_empty(&ploop->delta_cow_action_list);
spin_unlock_irq(&ploop->deferred_lock);
return has;
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 46cc98c945e8..5c2b4e32e525 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -74,7 +74,7 @@ static void ploop_index_wb_init(struct ploop_index_wb *piwb, struct ploop *ploop
piwb->bat_page = NULL;
piwb->bi_status = 0;
INIT_LIST_HEAD(&piwb->ready_data_pios);
- bio_list_init(&piwb->cow_list);
+ INIT_LIST_HEAD(&piwb->cow_list);
/* For ploop_bat_write_complete() */
atomic_set(&piwb->count, 1);
piwb->completed = false;
@@ -522,12 +522,12 @@ static int ploop_discard_index_pio_end(struct ploop *ploop, struct pio *pio)
static void complete_cow(struct ploop_cow *cow, blk_status_t bi_status)
{
unsigned int dst_cluster = cow->dst_cluster;
- struct bio *cluster_bio = cow->cluster_bio;
+ struct pio *cluster_pio = cow->cluster_pio;
struct ploop *ploop = cow->ploop;
unsigned long flags;
struct pio *h;
- WARN_ON_ONCE(cluster_bio->bi_next);
+ WARN_ON_ONCE(!list_empty(&cluster_pio->list));
h = &cow->hook;
del_cluster_lk(ploop, h);
@@ -542,7 +542,7 @@ static void complete_cow(struct ploop_cow *cow, blk_status_t bi_status)
cow->end_fn(ploop, blk_status_to_errno(bi_status), cow->data);
queue_work(ploop->wq, &ploop->worker);
- free_bio_with_pages(ploop, cow->cluster_bio);
+ free_pio_with_pages(ploop, cow->cluster_pio);
kmem_cache_free(cow_cache, cow);
}
@@ -668,7 +668,7 @@ static void ploop_bat_write_complete(struct ploop_index_wb *piwb,
blk_status_t bi_status)
{
struct ploop *ploop = piwb->ploop;
- struct bio *cluster_bio;
+ struct pio *cluster_pio;
struct ploop_cow *cow;
struct pio *data_pio;
unsigned long flags;
@@ -700,8 +700,8 @@ static void ploop_bat_write_complete(struct ploop_index_wb *piwb,
pio_endio(data_pio);
}
- while ((cluster_bio = bio_list_pop(&piwb->cow_list))) {
- cow = cluster_bio->bi_private;
+ while ((cluster_pio = pio_list_pop(&piwb->cow_list))) {
+ cow = cluster_pio->endio_cb_data;
complete_cow(cow, bi_status);
}
@@ -883,33 +883,62 @@ static bool ploop_attach_end_action(struct pio *h, struct ploop_index_wb *piwb)
static void ploop_read_aio_do_completion(struct ploop_iocb *piocb)
{
- struct bio *bio = piocb->bio;
+ struct pio *pio = piocb->pio;
if (!atomic_dec_and_test(&piocb->count))
return;
- bio_endio(bio);
+ pio_endio(pio);
kmem_cache_free(piocb_cache, piocb);
}
static void ploop_read_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct ploop_iocb *piocb = container_of(iocb, struct ploop_iocb, iocb);
- struct bio *bio = piocb->bio;
+ struct pio *pio = piocb->pio;
- if (ret != bio->bi_iter.bi_size)
- bio->bi_status = BLK_STS_IOERR;
- else
- bio->bi_status = BLK_STS_OK;
+ if (ret != pio->bi_iter.bi_size)
+ pio->bi_status = BLK_STS_IOERR;
ploop_read_aio_do_completion(piocb);
}
+
+static void data_rw_complete(struct pio *pio)
+{
+ if (pio->ret != pio->bi_iter.bi_size)
+ pio->bi_status = BLK_STS_IOERR;
+
+ pio_endio(pio);
+}
+
+static void submit_rw_mapped(struct ploop *ploop, u32 dst_clu, struct pio *pio)
+{
+ unsigned int rw, nr_segs;
+ struct bio_vec *bvec;
+ struct iov_iter iter;
+ loff_t pos;
+
+ pio->complete = data_rw_complete;
+
+ rw = (op_is_write(pio->bi_opf) ? WRITE : READ);
+ nr_segs = pio_nr_segs(pio);
+ bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter);
+
+ iov_iter_bvec(&iter, rw, bvec, nr_segs, pio->bi_iter.bi_size);
+ iter.iov_offset = pio->bi_iter.bi_bvec_done;
+
+ remap_to_cluster(ploop, pio, dst_clu);
+ pos = to_bytes(pio->bi_iter.bi_sector);
+
+ call_rw_iter(top_delta(ploop)->file, pos, rw, &iter, pio);
+}
+
/*
* Read cluster or its part from secondary delta.
- * @bio is dm's or plain (w/o pio container and ploop_endio()).
+ * @pio is dm's or plain (w/o bio container and ploop_endio()).
* Note, that nr inflight is not incremented here, so delegate this to caller
* (if you need).
*/
static void submit_delta_read(struct ploop *ploop, unsigned int level,
- unsigned int dst_cluster, struct bio *bio)
+ unsigned int dst_cluster, struct pio *pio)
{
unsigned int flags, offset;
struct ploop_iocb *piocb;
@@ -921,22 +950,22 @@ static void submit_delta_read(struct ploop *ploop, unsigned int level,
piocb = kmem_cache_zalloc(piocb_cache, GFP_NOIO);
if (!piocb) {
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
+ pio->bi_status = BLK_STS_RESOURCE;
+ pio_endio(pio);
return;
}
atomic_set(&piocb->count, 2);
- piocb->bio = bio;
+ piocb->pio = pio;
- remap_to_cluster_bio(ploop, bio, dst_cluster);
+ remap_to_cluster(ploop, pio, dst_cluster);
- bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
- offset = bio->bi_iter.bi_bvec_done;
+ bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter);
+ offset = pio->bi_iter.bi_bvec_done;
- iov_iter_bvec(&iter, READ, bvec, 1, bio->bi_iter.bi_size);
+ iov_iter_bvec(&iter, READ, bvec, 1, pio->bi_iter.bi_size);
iter.iov_offset = offset;
- pos = (bio->bi_iter.bi_sector << SECTOR_SHIFT);
+ pos = (pio->bi_iter.bi_sector << SECTOR_SHIFT);
file = ploop->deltas[level].file;
piocb->iocb.ki_pos = pos;
@@ -958,29 +987,27 @@ static void submit_delta_read(struct ploop *ploop, unsigned int level,
static void initiate_delta_read(struct ploop *ploop, unsigned int level,
unsigned int dst_cluster, struct pio *pio)
{
- struct bio *bio = dm_bio_from_per_bio_data(pio, sizeof(*pio));
-
if (dst_cluster == BAT_ENTRY_NONE) {
+ struct bio *bio = dm_bio_from_per_bio_data(pio, sizeof(*pio));
/* No one delta contains dst_cluster. */
zero_fill_bio(bio);
pio_endio(pio);
return;
}
- submit_delta_read(ploop, level, dst_cluster, bio);
+ submit_delta_read(ploop, level, dst_cluster, pio);
}
-static void ploop_cow_endio(struct bio *cluster_bio)
+static void ploop_cow_endio(struct pio *cluster_pio, void *data, blk_status_t bi_status)
{
- struct ploop_cow *cow = cluster_bio->bi_private;
+ struct ploop_cow *cow = data;
struct ploop *ploop = cow->ploop;
- unsigned int dst_cluster = cluster_bio->bi_iter.bi_sector >> ploop->cluster_log;
unsigned long flags;
- track_dst_cluster(ploop, dst_cluster);
+ track_pio(ploop, cluster_pio);
spin_lock_irqsave(&ploop->deferred_lock, flags);
- bio_list_add(&ploop->delta_cow_action_list, cluster_bio);
+ list_add_tail(&cluster_pio->list, &ploop->delta_cow_action_list);
spin_unlock_irqrestore(&ploop->deferred_lock, flags);
dec_nr_inflight_raw(ploop, &cow->hook);
@@ -1050,13 +1077,14 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level,
unsigned int cluster, unsigned int dst_cluster,
void (*end_fn)(struct ploop *, int, void *), void *data)
{
- struct bio *bio = NULL;
+ struct pio *pio = NULL;
struct ploop_cow *cow;
/* Prepare new delta read */
- bio = alloc_bio_with_pages(ploop);
- if (!bio)
+ pio = alloc_pio_with_pages(ploop);
+ if (!pio)
goto err;
+ ploop_init_end_io(ploop, pio);
cow = kmem_cache_alloc(cow_cache, GFP_NOIO);
if (!cow)
@@ -1064,24 +1092,24 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level,
cow->ploop = ploop;
cow->dst_cluster = BAT_ENTRY_NONE;
- cow->cluster_bio = bio;
+ cow->cluster_pio = pio;
cow->end_fn = end_fn;
cow->data = data;
- bio_prepare_offsets(ploop, bio, cluster);
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bio->bi_end_io = ploop_cow_endio;
- bio->bi_private = cow;
+ pio_prepare_offsets(ploop, pio, cluster);
+ pio->bi_opf = REQ_OP_READ;
+ pio->endio_cb = ploop_cow_endio;
+ pio->endio_cb_data = cow;
ploop_init_end_io(ploop, &cow->hook);
add_cluster_lk(ploop, &cow->hook, cluster);
/* Stage #0: read secondary delta full cluster */
- submit_delta_read(ploop, level, dst_cluster, bio);
+ submit_delta_read(ploop, level, dst_cluster, pio);
return 0;
err:
- if (bio)
- free_bio_with_pages(ploop, bio);
+ if (pio)
+ free_pio_with_pages(ploop, pio);
return -ENOMEM;
}
@@ -1111,7 +1139,7 @@ static void initiate_cluster_cow(struct ploop *ploop, unsigned int level,
static void submit_cluster_write(struct ploop_cow *cow)
{
- struct bio *bio = cow->cluster_bio;
+ struct pio *pio = cow->cluster_pio;
struct ploop *ploop = cow->ploop;
unsigned int dst_cluster;
@@ -1119,19 +1147,17 @@ static void submit_cluster_write(struct ploop_cow *cow)
goto error;
cow->dst_cluster = dst_cluster;
- bio_reset(bio);
- bio_prepare_offsets(ploop, bio, dst_cluster);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- remap_to_origin(ploop, bio);
+ pio_prepare_offsets(ploop, pio, dst_cluster);
+ pio->bi_opf = REQ_OP_WRITE;
BUG_ON(irqs_disabled());
read_lock_irq(&ploop->bat_rwlock);
inc_nr_inflight_raw(ploop, &cow->hook);
read_unlock_irq(&ploop->bat_rwlock);
- bio->bi_end_io = ploop_cow_endio;
- bio->bi_private = cow;
+ pio->endio_cb = ploop_cow_endio;
+ pio->endio_cb_data = cow;
- submit_bio(bio);
+ submit_rw_mapped(ploop, dst_cluster, pio);
return;
error:
complete_cow(cow, BLK_STS_IOERR);
@@ -1157,7 +1183,8 @@ static void submit_cow_index_wb(struct ploop_cow *cow,
if (piwb->page_nr != page_nr || piwb->type != PIWB_TYPE_ALLOC) {
/* Another BAT page wb is in process */
spin_lock_irq(&ploop->deferred_lock);
- bio_list_add(&ploop->delta_cow_action_list, cow->cluster_bio);
+ list_add_tail(&cow->cluster_pio->list,
+ &ploop->delta_cow_action_list);
spin_unlock_irq(&ploop->deferred_lock);
queue_work(ploop->wq, &ploop->worker);
goto out;
@@ -1173,7 +1200,7 @@ static void submit_cow_index_wb(struct ploop_cow *cow,
/* Prevent double clearing of holes_bitmap bit on complete_cow() */
cow->dst_cluster = BAT_ENTRY_NONE;
spin_lock_irq(&ploop->deferred_lock);
- bio_list_add(&piwb->cow_list, cow->cluster_bio);
+ list_add_tail(&cow->cluster_pio->list, &piwb->cow_list);
spin_unlock_irq(&ploop->deferred_lock);
out:
return;
@@ -1183,20 +1210,19 @@ static void submit_cow_index_wb(struct ploop_cow *cow,
static void process_delta_wb(struct ploop *ploop, struct ploop_index_wb *piwb)
{
- struct bio_list cow_list = BIO_EMPTY_LIST;
- struct bio *cluster_bio;
+ struct pio *cluster_pio;
struct ploop_cow *cow;
+ LIST_HEAD(cow_list);
- if (bio_list_empty(&ploop->delta_cow_action_list))
+ if (list_empty(&ploop->delta_cow_action_list))
return;
- bio_list_merge(&cow_list, &ploop->delta_cow_action_list);
- bio_list_init(&ploop->delta_cow_action_list);
+ list_splice_tail_init(&ploop->delta_cow_action_list, &cow_list);
spin_unlock_irq(&ploop->deferred_lock);
- while ((cluster_bio = bio_list_pop(&cow_list)) != NULL) {
- cow = cluster_bio->bi_private;
- if (unlikely(cluster_bio->bi_status != BLK_STS_OK)) {
- complete_cow(cow, cluster_bio->bi_status);
+ while ((cluster_pio = pio_list_pop(&cow_list)) != NULL) {
+ cow = cluster_pio->endio_cb_data;
+ if (unlikely(cluster_pio->bi_status != BLK_STS_OK)) {
+ complete_cow(cow, cluster_pio->bi_status);
continue;
}
@@ -1283,36 +1309,6 @@ static bool locate_new_cluster_and_attach_pio(struct ploop *ploop,
return false;
}
-static void data_rw_complete(struct pio *pio)
-{
- if (pio->ret != pio->bi_iter.bi_size)
- pio->bi_status = BLK_STS_IOERR;
-
- pio_endio(pio);
-}
-
-static void submit_rw_mapped(struct ploop *ploop, loff_t clu_pos, struct pio *pio)
-{
- unsigned int rw, nr_segs;
- struct bio_vec *bvec;
- struct iov_iter iter;
- loff_t pos;
-
- pio->complete = data_rw_complete;
-
- rw = (op_is_write(pio->bi_opf) ? WRITE : READ);
- nr_segs = pio_nr_segs(pio);
- bvec = __bvec_iter_bvec(pio->bi_io_vec, pio->bi_iter);
-
- iov_iter_bvec(&iter, rw, bvec, nr_segs, pio->bi_iter.bi_size);
- iter.iov_offset = pio->bi_iter.bi_bvec_done;
-
- remap_to_cluster(ploop, pio, clu_pos);
- pos = to_bytes(pio->bi_iter.bi_sector);
-
- call_rw_iter(top_delta(ploop)->file, pos, rw, &iter, pio);
-}
-
static int process_one_deferred_bio(struct ploop *ploop, struct pio *pio,
struct ploop_index_wb *piwb)
{
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 14bd37610b25..bac1d74c4dcf 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -312,7 +312,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&ploop->flush_pios);
INIT_LIST_HEAD(&ploop->discard_pios);
INIT_LIST_HEAD(&ploop->cluster_lk_list);
- bio_list_init(&ploop->delta_cow_action_list);
+ INIT_LIST_HEAD(&ploop->delta_cow_action_list);
atomic_set(&ploop->nr_discard_bios, 0);
ploop->bat_entries = RB_ROOT;
ploop->exclusive_bios_rbtree = RB_ROOT;
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index dad0e5857a50..550550dfb435 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -120,7 +120,7 @@ struct ploop_index_wb {
spinlock_t lock;
struct page *bat_page;
struct list_head ready_data_pios;
- struct bio_list cow_list;
+ struct list_head cow_list;
atomic_t count;
bool completed;
int bi_status;
@@ -216,7 +216,7 @@ struct ploop {
struct list_head cluster_lk_list;
/* List of COW requests requiring action. */
- struct bio_list delta_cow_action_list;
+ struct list_head delta_cow_action_list;
/* Resume is prohibited */
bool noresume;
@@ -242,6 +242,7 @@ struct pio {
struct bvec_iter bi_iter;
struct bio_vec *bi_io_vec;
unsigned int bi_opf;
+ unsigned int bi_vcnt;
blk_status_t bi_status;
ploop_endio_t endio_cb;
@@ -272,14 +273,14 @@ struct pio {
struct ploop_iocb {
struct kiocb iocb;
- struct bio *bio;
+ struct pio *pio;
atomic_t count;
};
/* Delta COW private */
struct ploop_cow {
struct ploop *ploop;
- struct bio *cluster_bio;
+ struct pio *cluster_pio;
unsigned int dst_cluster;
struct pio hook;
@@ -321,13 +322,6 @@ static inline void remap_to_cluster(struct ploop *ploop, struct pio *pio,
pio->bi_iter.bi_sector |= (cluster << ploop->cluster_log);
}
-static inline void remap_to_cluster_bio(struct ploop *ploop, struct bio *bio,
- unsigned int cluster)
-{
- bio->bi_iter.bi_sector &= ((1 << ploop->cluster_log) - 1);
- bio->bi_iter.bi_sector |= (cluster << ploop->cluster_log);
-}
-
static inline bool whole_cluster(struct ploop *ploop, struct pio *pio)
{
sector_t end_sector = bvec_iter_end_sector(pio->bi_iter);
@@ -552,9 +546,9 @@ extern int submit_cluster_cow(struct ploop *ploop, unsigned int level,
unsigned int cluster, unsigned int dst_cluster,
void (*end_fn)(struct ploop *, int, void *), void *data);
-extern struct bio * alloc_bio_with_pages(struct ploop *ploop);
-extern void free_bio_with_pages(struct ploop *ploop, struct bio *bio);
-extern void bio_prepare_offsets(struct ploop *, struct bio *, unsigned int);
+extern struct pio * alloc_pio_with_pages(struct ploop *ploop);
+extern void free_pio_with_pages(struct ploop *ploop, struct pio *pio);
+extern void pio_prepare_offsets(struct ploop *, struct pio *, unsigned int);
extern void ploop_free_pb(struct push_backup *pb);
extern void cleanup_backup(struct ploop *ploop);
More information about the Devel
mailing list