[Devel] [PATCH RHEL7 COMMIT] ploop: ploop_grow must nullify holes
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Nov 25 06:32:58 PST 2016
The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.20.3
------>
commit 34c7bf175575f737c0e7a8fbbfce8ead3b642cbf
Author: Maxim Patlasov <mpatlasov at virtuozzo.com>
Date: Fri Nov 25 18:32:58 2016 +0400
ploop: ploop_grow must nullify holes
Before the patch, ploop_grow nullified only those image-blocks of
future extended BAT, which were referenced from the former BAT. That was
obviously wrong: such blocks might contain garbage that would be
interpreted as a cluster-to-image block mapping after ploop_grow.
The patch splits ploop_grow into two parts: firstly, relocate all
refrenced image-blocks of future BAT to the end of image file. Then,
secondly, nullify the whole range of future BAT.
https://jira.sw.ru/browse/PSBM-55685
Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
drivers/block/ploop/dev.c | 137 +++++++++++++++++++++++++++++++++++-------
drivers/block/ploop/events.h | 1 +
drivers/block/ploop/io_kaio.c | 4 +-
drivers/block/ploop/map.c | 31 +++-------
include/linux/ploop/ploop.h | 5 +-
5 files changed, 132 insertions(+), 46 deletions(-)
diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 921ec8b..26017eb 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1359,7 +1359,8 @@ static void ploop_complete_request(struct ploop_request * preq)
WARN_ON(!preq->error && test_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state));
if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
- test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
+ test_bit(PLOOP_REQ_RELOC_S, &preq->state) ||
+ test_bit(PLOOP_REQ_RELOC_N, &preq->state)) {
if (preq->error)
set_bit(PLOOP_S_ABORT, &plo->state);
@@ -1403,8 +1404,11 @@ static void ploop_complete_request(struct ploop_request * preq)
int i;
struct bio * bio = preq->aux_bio;
- for (i = 0; i < bio->bi_vcnt; i++)
- put_page(bio->bi_io_vec[i].bv_page);
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ struct page *page = bio->bi_io_vec[i].bv_page;
+ if (page != ZERO_PAGE(0))
+ put_page(page);
+ }
bio_put(bio);
@@ -1985,6 +1989,61 @@ ploop_entry_reloc_req(struct ploop_request *preq, iblock_t *iblk)
BUG();
}
+static void fill_zero_bio(struct ploop_device *plo, struct bio * bio)
+{
+ int pages = block_vecs(plo);
+
+ for (; bio->bi_vcnt < pages; bio->bi_vcnt++) {
+ bio->bi_io_vec[bio->bi_vcnt].bv_page = ZERO_PAGE(0);
+ bio->bi_io_vec[bio->bi_vcnt].bv_offset = 0;
+ bio->bi_io_vec[bio->bi_vcnt].bv_len = PAGE_SIZE;
+ }
+ bio->bi_sector = 0;
+ bio->bi_size = (1 << (plo->cluster_log + 9));
+}
+
+/*
+ * Returns 0 if and only if RELOC_A preq was successfully processed.
+ *
+ * Advance preq->req_cluster till it points to *iblk in grow range.
+ * Returning 0, always set *iblk to a meaningful value: either zero
+ * (if preq->req_cluster went out of allowed range or map is being read)
+ * or iblock in grow range that preq->req_cluster points to.
+ */
+static int
+ploop_entry_nullify_req(struct ploop_request *preq)
+{
+ struct ploop_device *plo = preq->plo;
+ struct ploop_delta *top_delta = ploop_top_delta(plo);
+ struct bio_list sbl;
+
+ if (!preq->aux_bio) {
+ preq->aux_bio = bio_alloc(GFP_NOFS, block_vecs(plo));
+ if (!preq->aux_bio)
+ return -ENOMEM;
+ fill_zero_bio(plo, preq->aux_bio);
+ }
+
+ sbl.head = sbl.tail = preq->aux_bio;
+ preq->eng_state = PLOOP_E_RELOC_NULLIFY;
+ list_del_init(&preq->list);
+
+ /*
+ * Lately we think we does sync of nullified blocks at format
+ * driver by image fsync before header update.
+ * But we write this data directly into underlying device
+ * bypassing EXT4 by usage of extent map tree
+ * (see dio_submit()). So fsync of EXT4 image doesnt help us.
+ * We need to force sync of nullified blocks.
+ */
+
+ preq->eng_io = &top_delta->io;
+ set_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state);
+ top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
+ &sbl, preq->iblock, 1<<plo->cluster_log);
+ return 0;
+}
+
static int discard_get_index(struct ploop_request *preq)
{
struct ploop_device *plo = preq->plo;
@@ -2077,6 +2136,7 @@ static inline bool preq_is_special(struct ploop_request * preq)
return state & (PLOOP_REQ_MERGE_FL |
PLOOP_REQ_RELOC_A_FL |
PLOOP_REQ_RELOC_S_FL |
+ PLOOP_REQ_RELOC_N_FL |
PLOOP_REQ_DISCARD_FL |
PLOOP_REQ_ZERO_FL);
}
@@ -2164,6 +2224,11 @@ restart:
if (iblk)
ploop_reloc_sched_read(preq, iblk);
return;
+ } else if (test_bit(PLOOP_REQ_RELOC_N, &preq->state)) {
+ err = ploop_entry_nullify_req(preq);
+ if (err)
+ goto error;
+ return;
} else if (preq->req_cluster == ~0U) {
BUG_ON(!test_bit(PLOOP_REQ_MERGE, &preq->state));
BUG_ON(preq->trans_map);
@@ -2710,7 +2775,7 @@ restart:
del_lockout(preq);
preq->eng_state = PLOOP_E_ENTRY;
- preq->req_cluster++;
+ preq->iblock++;
goto restart;
}
case PLOOP_E_TRANS_DELTA_READ:
@@ -2866,8 +2931,11 @@ static void ploop_handle_enospc_req(struct ploop_request *preq)
int i;
struct bio * bio = preq->aux_bio;
- for (i = 0; i < bio->bi_vcnt; i++)
- put_page(bio->bi_io_vec[i].bv_page);
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ struct page *page = bio->bi_io_vec[i].bv_page;
+ if (page != ZERO_PAGE(0))
+ put_page(page);
+ }
bio_put(bio);
@@ -4131,6 +4199,7 @@ static int ploop_clear(struct ploop_device * plo, struct block_device * bdev)
clear_bit(PLOOP_S_DISCARD_LOADED, &plo->state);
clear_bit(PLOOP_S_DISCARD, &plo->state);
+ clear_bit(PLOOP_S_NULLIFY, &plo->state);
destroy_deltas(plo, &plo->map);
@@ -4190,15 +4259,29 @@ static int ploop_index_update_ioc(struct ploop_device *plo, unsigned long arg)
return 0;
}
-static void ploop_relocate(struct ploop_device * plo)
+enum {
+ PLOOP_GROW_RELOC = 0,
+ PLOOP_GROW_NULLIFY,
+ PLOOP_GROW_MAX,
+};
+
+static void ploop_relocate(struct ploop_device * plo, int grow_stage)
{
struct ploop_request * preq;
+ int reloc_type = (grow_stage == PLOOP_GROW_RELOC) ?
+ PLOOP_REQ_RELOC_A : PLOOP_REQ_RELOC_N;
+
+ BUG_ON(grow_stage != PLOOP_GROW_RELOC &&
+ grow_stage != PLOOP_GROW_NULLIFY);
spin_lock_irq(&plo->lock);
atomic_set(&plo->maintenance_cnt, 1);
plo->grow_relocated = 0;
+ if (grow_stage == PLOOP_GROW_NULLIFY)
+ set_bit(PLOOP_S_NULLIFY, &plo->state);
+
init_completion(&plo->maintenance_comp);
preq = ploop_alloc_request(plo);
@@ -4208,10 +4291,10 @@ static void ploop_relocate(struct ploop_device * plo)
preq->req_size = 0;
preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
- preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
+ preq->state = (1 << PLOOP_REQ_SYNC) | (1 << reloc_type);
preq->error = 0;
preq->tstamp = jiffies;
- preq->iblock = 0;
+ preq->iblock = (reloc_type == PLOOP_REQ_RELOC_A) ? 0 : plo->grow_start;
preq->prealloc_size = 0;
atomic_inc(&plo->maintenance_cnt);
@@ -4235,12 +4318,16 @@ static int ploop_grow(struct ploop_device *plo, struct block_device *bdev,
struct ploop_delta *delta = ploop_top_delta(plo);
int reloc = 0; /* 'relocation needed' flag */
int err;
+ int grow_stage = PLOOP_GROW_RELOC;
if (!delta)
return -ENOENT;
- if (plo->maintenance_type == PLOOP_MNTN_GROW)
+ if (plo->maintenance_type == PLOOP_MNTN_GROW) {
+ if (test_bit(PLOOP_S_NULLIFY, &plo->state))
+ grow_stage = PLOOP_GROW_NULLIFY;
goto already;
+ }
if (plo->maintenance_type != PLOOP_MNTN_OFF)
return -EBUSY;
@@ -4276,24 +4363,28 @@ static int ploop_grow(struct ploop_device *plo, struct block_device *bdev,
if (reloc) {
plo->maintenance_type = PLOOP_MNTN_GROW;
ploop_relax(plo);
- ploop_relocate(plo);
+ for (; grow_stage < PLOOP_GROW_MAX; grow_stage++) {
+ ploop_relocate(plo, grow_stage);
already:
- err = ploop_maintenance_wait(plo);
- if (err)
- return err;
+ err = ploop_maintenance_wait(plo);
+ if (err)
+ return err;
- BUG_ON(atomic_read(&plo->maintenance_cnt));
+ BUG_ON(atomic_read(&plo->maintenance_cnt));
- if (plo->maintenance_type != PLOOP_MNTN_GROW)
- return -EALREADY;
+ if (plo->maintenance_type != PLOOP_MNTN_GROW)
+ return -EALREADY;
- if (test_bit(PLOOP_S_ABORT, &plo->state)) {
- plo->maintenance_type = PLOOP_MNTN_OFF;
- return -EIO;
+ if (test_bit(PLOOP_S_ABORT, &plo->state)) {
+ clear_bit(PLOOP_S_NULLIFY, &plo->state);
+ plo->maintenance_type = PLOOP_MNTN_OFF;
+ return -EIO;
+ }
}
ploop_quiesce(plo);
new_size = plo->grow_new_size;
+ clear_bit(PLOOP_S_NULLIFY, &plo->state);
plo->maintenance_type = PLOOP_MNTN_OFF;
}
@@ -5278,8 +5369,11 @@ static struct ploop_device *ploop_dev_init(int index)
{
struct ploop_device *plo = ploop_dev_search(index);
- if (plo)
+ if (plo) {
+ BUG_ON(list_empty(&plo->map.delta_list) &&
+ test_bit(PLOOP_S_NULLIFY, &plo->state));
return plo;
+ }
plo = __ploop_dev_alloc(index);
if (plo) {
@@ -5387,6 +5481,7 @@ static int ploop_minor_open(struct inode *inode, struct file *file)
ploop_sysfs_init(plo);
ploop_dev_insert(plo);
}
+ BUG_ON(test_bit(PLOOP_S_NULLIFY, &plo->state));
set_bit(PLOOP_S_LOCKED, &plo->locking_state);
mutex_unlock(&ploop_devices_mutex);
diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
index ac0f343..62144e1 100644
--- a/drivers/block/ploop/events.h
+++ b/drivers/block/ploop/events.h
@@ -43,6 +43,7 @@
{ 1 << PLOOP_REQ_MERGE, "M"}, \
{ 1 << PLOOP_REQ_RELOC_A, "RA"}, \
{ 1 << PLOOP_REQ_RELOC_S, "RS"}, \
+ { 1 << PLOOP_REQ_RELOC_N, "RN"}, \
{ 1 << PLOOP_REQ_ZERO, "Z"}, \
{ 1 << PLOOP_REQ_DISCARD, "D"})
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 85863df..ee9ba26 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -70,7 +70,9 @@ static void kaio_complete_io_state(struct ploop_request * preq)
int post_fsync = 0;
int need_fua = !!(preq->req_rw & REQ_FUA);
unsigned long state = READ_ONCE(preq->state);
- int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
+ int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|
+ PLOOP_REQ_RELOC_S_FL|
+ PLOOP_REQ_RELOC_N_FL));
if (preq->error || !(preq->req_rw & REQ_FUA) ||
preq->eng_state == PLOOP_E_INDEX_READ ||
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 715dc15..f21b9ab 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -1072,9 +1072,6 @@ static void map_wb_complete_post_process(struct ploop_map *map,
struct ploop_request *preq, int err)
{
struct ploop_device *plo = map->plo;
- struct ploop_delta *top_delta = map_top_delta(map);
- struct bio_list sbl;
- int i;
if (likely(err ||
(!test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
@@ -1098,26 +1095,14 @@ static void map_wb_complete_post_process(struct ploop_map *map,
BUG_ON (!test_bit(PLOOP_REQ_RELOC_A, &preq->state));
BUG_ON (!preq->aux_bio);
- sbl.head = sbl.tail = preq->aux_bio;
- preq->eng_state = PLOOP_E_RELOC_NULLIFY;
- list_del_init(&preq->list);
- for (i = 0; i < preq->aux_bio->bi_vcnt; i++)
- memset(page_address(preq->aux_bio->bi_io_vec[i].bv_page),
- 0, PAGE_SIZE);
-
- /*
- * Lately we think we does sync of nullified blocks at format
- * driver by image fsync before header update.
- * But we write this data directly into underlying device
- * bypassing EXT4 by usage of extent map tree
- * (see dio_submit()). So fsync of EXT4 image doesnt help us.
- * We need to force sync of nullified blocks.
- */
- preq->eng_io = &top_delta->io;
- BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
- set_bit(PLOOP_REQ_ISSUE_FLUSH, &preq->state);
- top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
- &sbl, preq->iblock, 1<<plo->cluster_log);
+ if (++plo->grow_relocated > plo->grow_end - plo->grow_start) {
+ requeue_req(preq, PLOOP_E_COMPLETE);
+ return;
+ }
+
+ del_lockout(preq);
+ preq->req_cluster++;
+ requeue_req(preq, PLOOP_E_ENTRY);
}
static void map_wb_complete(struct map_node * m, int err)
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index b8c480a..8abc6f9 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -61,6 +61,7 @@ enum {
(for minor mgmt only) */
PLOOP_S_ONCE, /* An event (e.g. printk once) happened */
PLOOP_S_PUSH_BACKUP, /* Push_backup is in progress */
+ PLOOP_S_NULLIFY, /* Nullifying BAT is in progress */
};
enum {
@@ -486,6 +487,7 @@ enum
PLOOP_REQ_MERGE,
PLOOP_REQ_RELOC_A, /* 'A' stands for allocate() */
PLOOP_REQ_RELOC_S, /* 'S' stands for submit() */
+ PLOOP_REQ_RELOC_N, /* 'N' stands for "nullify" */
PLOOP_REQ_ZERO,
PLOOP_REQ_DISCARD,
PLOOP_REQ_RSYNC,
@@ -500,6 +502,7 @@ enum
#define PLOOP_REQ_MERGE_FL (1 << PLOOP_REQ_MERGE)
#define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
#define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
+#define PLOOP_REQ_RELOC_N_FL (1 << PLOOP_REQ_RELOC_N)
#define PLOOP_REQ_DISCARD_FL (1 << PLOOP_REQ_DISCARD)
#define PLOOP_REQ_ZERO_FL (1 << PLOOP_REQ_ZERO)
@@ -514,7 +517,7 @@ enum
PLOOP_E_DELTA_COPIED, /* Data from previos delta was bcopy-ied */
PLOOP_E_TRANS_DELTA_READ,/* Write request reads data from trans delta */
PLOOP_E_RELOC_DATA_READ,/* Read user data to relocate */
- PLOOP_E_RELOC_NULLIFY, /* Zeroing relocated block is in progress */
+ PLOOP_E_RELOC_NULLIFY, /* Zeroing given iblock is in progress */
PLOOP_E_INDEX_DELAY, /* Index update is blocked by already queued
* index update.
*/
More information about the Devel
mailing list