[Devel] [PATCH RHEL7 COMMIT] Revert "ploop: Remove obsolete ioctls"
Konstantin Khorenko
khorenko at virtuozzo.com
Tue May 21 17:44:30 MSK 2019
The commit is pushed to "branch-rh7-3.10.0-957.12.2.vz7.96.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.1
------>
commit e865278c5477598565bf62bce5179f769c58c49c
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Tue May 21 17:44:28 2019 +0300
Revert "ploop: Remove obsolete ioctls"
This reverts commit 29859bbe8d14113dcad7d6613f19c7d1f856d333.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
=====================
Patchset description:
ploop: Return maintaince mode
This patch set enables it for vstorage.
Also, added file /sys/block/ploopXXX/pstate/native_discard,
which shows, whether we should use maintaince-mode based
discard or not.
https://jira.sw.ru/browse/PSBM-94662
Kirill Tkhai (8):
Revert "ploop: Remove now unused PLOOP_E_ZERO_INDEX and PLOOP_E_DELTA_ZERO_INDEX branches"
Revert "ploop: Remove now unused PLOOP_REQ_RELOC_S branches"
Revert "ploop: Remove now unused PLOOP_REQ_DISCARD branches"
Revert "ploop: Remove now unused PLOOP_REQ_ZERO branches"
Revert "ploop: Remove obsolete ioctls"
Partial revert "ploop: Enable native discard support for kaio engine"
ploop: Return maintaince mode ioctls again
ploop: Show whether device supports native discard
---
drivers/block/ploop/Makefile | 2 +-
drivers/block/ploop/dev.c | 830 +++++++++++++++++++++++++++++-
drivers/block/ploop/discard.c | 121 +++++
drivers/block/ploop/discard.h | 15 +
drivers/block/ploop/freeblks.c | 1110 ++++++++++++++++++++++++++++++++++++++++
drivers/block/ploop/freeblks.h | 58 +++
include/linux/ploop/ploop.h | 19 +-
include/linux/ploop/ploop_if.h | 20 +-
8 files changed, 2130 insertions(+), 45 deletions(-)
diff --git a/drivers/block/ploop/Makefile b/drivers/block/ploop/Makefile
index 7741ec584164..e572dd54a9b5 100644
--- a/drivers/block/ploop/Makefile
+++ b/drivers/block/ploop/Makefile
@@ -8,7 +8,7 @@ CFLAGS_io_direct.o = -I$(src)
CFLAGS_ploop_events.o = -I$(src)
obj-$(CONFIG_BLK_DEV_PLOOP) += ploop.o
-ploop-objs := dev.o map.o io.o sysfs.o tracker.o ploop_events.o push_backup.o
+ploop-objs := dev.o map.o io.o sysfs.o tracker.o freeblks.o ploop_events.o discard.o push_backup.o
obj-$(CONFIG_BLK_DEV_PLOOP) += pfmt_ploop1.o
pfmt_ploop1-objs := fmt_ploop1.o
diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 4d3b62d85251..4bf0240df622 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -25,6 +25,8 @@
#include <bc/beancounter.h>
#include <linux/ploop/ploop.h>
#include "ploop_events.h"
+#include "freeblks.h"
+#include "discard.h"
#include "push_backup.h"
/* Structures and terms:
@@ -526,7 +528,42 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
ploop_pb_check_and_clear_bit(plo->pbd, preq->req_cluster))
ploop_set_blockable(plo, preq);
- preq->bl.head = preq->bl.tail = bio;
+ if (test_bit(PLOOP_S_DISCARD, &plo->state) &&
+ unlikely(bio->bi_rw & REQ_DISCARD)) {
+ int clu_size = cluster_size_in_sec(plo);
+ int i = (clu_size - 1) & bio->bi_sector;
+ int err = 0;
+
+ if (i) {
+ preq->req_cluster++;
+ if (preq->req_size >= clu_size)
+ preq->req_size -= clu_size - i;
+ }
+
+ if (preq->req_size < clu_size ||
+ (err = ploop_discard_add_bio(plo->fbd, bio))) {
+ if (test_bit(BIO_BDEV_REUSED, &bio->bi_flags)) {
+ struct io_context *ioc;
+ ioc = (struct io_context *)(bio->bi_bdev);
+ atomic_dec(&ioc->nr_tasks);
+ put_io_context_active(ioc);
+
+ bio->bi_bdev = plo->bdev;
+ clear_bit(BIO_BDEV_REUSED, &bio->bi_flags);
+ }
+ BIO_ENDIO(plo->queue, bio, err);
+ list_add(&preq->list, &plo->free_list);
+ plo->free_qlen++;
+ plo->bio_discard_qlen--;
+ plo->bio_total--;
+ return;
+ }
+
+ preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_DISCARD);
+ preq->dst_iblock = 0;
+ preq->bl.head = preq->bl.tail = NULL;
+ } else
+ preq->bl.head = preq->bl.tail = bio;
if (test_bit(BIO_BDEV_REUSED, &bio->bi_flags)) {
preq->ioc = (struct io_context *)(bio->bi_bdev);
@@ -786,9 +823,15 @@ static void ploop_unplug(struct blk_plug_cb *cb, bool from_schedule)
static void
process_discard_bio_queue(struct ploop_device * plo, struct list_head *drop_list)
{
+ bool discard = test_bit(PLOOP_S_DISCARD, &plo->state);
+
while (!list_empty(&plo->free_list)) {
struct bio *tmp;
+ /* Only one discard bio can be handled concurrently */
+ if (discard && ploop_discard_is_inprogress(plo->fbd))
+ return;
+
tmp = bio_list_pop(&plo->bio_discard_list);
if (tmp == NULL)
break;
@@ -1273,6 +1316,22 @@ static void del_pb_lockout(struct ploop_request *preq)
__del_lockout(preq, true);
}
+static void ploop_discard_wakeup(struct ploop_request *preq, int err)
+{
+ struct ploop_device *plo = preq->plo;
+
+ if (err || !ploop_fb_get_n_free(plo->fbd)) {
+ /* Only one discard request is processed */
+ ploop_fb_reinit(plo->fbd, err);
+ } else
+ set_bit(PLOOP_S_DISCARD_LOADED, &plo->state);
+
+ if (atomic_dec_and_test(&plo->maintenance_cnt))
+ if (test_bit(PLOOP_S_DISCARD_LOADED, &plo->state) ||
+ !test_bit(PLOOP_S_DISCARD, &plo->state))
+ complete(&plo->maintenance_comp);
+}
+
static void ploop_complete_request(struct ploop_request * preq)
{
struct ploop_device * plo = preq->plo;
@@ -1334,7 +1393,8 @@ static void ploop_complete_request(struct ploop_request * preq)
if (atomic_dec_and_test(&plo->maintenance_cnt))
complete(&plo->maintenance_comp);
- }
+ } else if (test_bit(PLOOP_REQ_DISCARD, &preq->state))
+ ploop_discard_wakeup(preq, preq->error);
if (preq->aux_bio) {
int i;
@@ -1378,15 +1438,19 @@ static void ploop_complete_request(struct ploop_request * preq)
plo->active_reqs--;
- ploop_uncongest(plo);
- list_add(&preq->list, &plo->free_list);
- plo->free_qlen++;
- if (waitqueue_active(&plo->req_waitq))
- wake_up(&plo->req_waitq);
- else if (waitqueue_active(&plo->waitq) &&
- (plo->bio_head ||
- !bio_list_empty(&plo->bio_discard_list)))
- wake_up_interruptible(&plo->waitq);
+ if (unlikely(test_bit(PLOOP_REQ_ZERO, &preq->state))) {
+ ploop_fb_put_zero_request(plo->fbd, preq);
+ } else {
+ ploop_uncongest(plo);
+ list_add(&preq->list, &plo->free_list);
+ plo->free_qlen++;
+ if (waitqueue_active(&plo->req_waitq))
+ wake_up(&plo->req_waitq);
+ else if (waitqueue_active(&plo->waitq) &&
+ (plo->bio_head ||
+ !bio_list_empty(&plo->bio_discard_list)))
+ wake_up_interruptible(&plo->waitq);
+ }
plo->bio_total -= nr_completed;
if (plo->tune.congestion_detection &&
@@ -1606,6 +1670,44 @@ static int prepare_merge_req(struct ploop_request * preq)
return 1;
}
+void ploop_queue_zero_request(struct ploop_device *plo,
+ struct ploop_request *orig_preq, cluster_t clu)
+{
+ struct ploop_request * preq;
+
+ spin_lock_irq(&plo->lock);
+
+ preq = ploop_fb_get_zero_request(plo->fbd);
+ preq->bl.tail = preq->bl.head = NULL;
+ preq->req_cluster = clu;
+ preq->req_size = 0;
+ preq->req_rw = WRITE_SYNC;
+ preq->eng_state = PLOOP_E_ENTRY;
+ preq->state = (1 << PLOOP_REQ_ZERO);
+ if (test_bit(PLOOP_REQ_SYNC, &orig_preq->state))
+ preq->state |= (1 << PLOOP_REQ_SYNC);
+ preq->error = 0;
+ preq->tstamp = jiffies;
+ preq->iblock = 0;
+ preq->preq_ub = get_beancounter(get_exec_ub());
+
+ if (test_bit(PLOOP_REQ_RELOC_S, &orig_preq->state)) {
+ if (orig_preq->dst_iblock == ~0U)
+ orig_preq->eng_state = PLOOP_E_RELOC_COMPLETE;
+ } else {
+ orig_preq->eng_state = orig_preq->iblock ?
+ PLOOP_E_DELTA_ZERO_INDEX : PLOOP_E_ZERO_INDEX;
+ }
+ orig_preq->iblock = 0;
+ INIT_LIST_HEAD(&preq->delay_list);
+ list_add_tail(&orig_preq->list, &preq->delay_list);
+
+ list_add(&preq->list, &plo->ready_queue);
+ plo->active_reqs++;
+
+ spin_unlock_irq(&plo->lock);
+}
+
static void
ploop_reloc_sched_read(struct ploop_request *preq, iblock_t iblk)
{
@@ -1639,6 +1741,100 @@ ploop_reloc_sched_read(struct ploop_request *preq, iblock_t iblk)
&sbl, iblk, cluster_size_in_sec(plo));
}
+/*
+ * Returns 0 if and only if a free block was successfully reused
+ */
+static int
+ploop_reuse_free_block(struct ploop_request *preq)
+{
+ struct ploop_device *plo = preq->plo;
+ struct ploop_delta *top_delta = ploop_top_delta(plo);
+ iblock_t iblk;
+ cluster_t clu;
+ int rc;
+ unsigned long pin_state;
+
+ if (plo->maintenance_type != PLOOP_MNTN_FBLOADED &&
+ plo->maintenance_type != PLOOP_MNTN_RELOC)
+ return -1;
+
+ rc = ploop_fb_get_free_block(plo->fbd, &clu, &iblk);
+
+ /* simple case - no free blocks left */
+ if (rc < 0)
+ return rc;
+
+ /* a free block to reuse requires zeroing index */
+ if (rc > 0) {
+ ploop_queue_zero_request(plo, preq, clu);
+ return 0;
+ }
+
+ /* 'rc == 0' - use iblk as a lost block */
+ pin_state = preq->iblock ? PLOOP_E_DELTA_ZERO_INDEX :
+ PLOOP_E_ZERO_INDEX;
+ preq->iblock = iblk;
+
+ /* pin preq to some reloc request processing iblk ? */
+ if (ploop_fb_check_reloc_req(plo->fbd, preq, pin_state))
+ return 0;
+
+ /* iblk is a lost block and nobody is relocating it now */
+ preq->eng_state = PLOOP_E_DATA_WBI;
+ __TRACE("T2 %p %u\n", preq, preq->req_cluster);
+ plo->st.bio_out++;
+
+ if (pin_state == PLOOP_E_ZERO_INDEX) {
+ top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
+ &preq->bl, preq->iblock,
+ preq->req_size);
+ } else { /* PLOOP_E_DELTA_READ */
+ struct bio_list sbl;
+
+ BUG_ON (preq->aux_bio == NULL);
+ sbl.head = sbl.tail = preq->aux_bio;
+
+ top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
+ &sbl, preq->iblock, cluster_size_in_sec(plo));
+ }
+
+ return 0;
+}
+
+/*
+ * Returns 0 if and only if zero preq was successfully processed
+ */
+static int
+ploop_entry_zero_req(struct ploop_request *preq)
+{
+ struct ploop_device *plo = preq->plo;
+ struct ploop_delta *top_delta = ploop_top_delta(plo);
+ int level;
+ iblock_t iblk = 0;
+ int err;
+
+ err = ploop_find_map(&plo->map, preq);
+ if (err) {
+ if (err == 1) {
+ __TRACE("m %p %u\n", preq, *clu);
+ return 0;
+ }
+ return err;
+ }
+
+ level = map_get_index(preq, preq->req_cluster, &iblk);
+ if (level != top_delta->level) {
+ printk("Can't zero index on wrong level=%d "
+ "(top_level=%d req_cluster=%u iblk=%u/%u)\n",
+ level, top_delta->level, preq->req_cluster,
+ iblk, preq->iblock);
+ return -EIO;
+ }
+
+ ploop_index_update(preq);
+ return 0;
+}
+
#define MAP_MAX_IND(preq) min(map_get_mn_end(preq->map), \
preq->plo->map.max_index - 1)
@@ -1699,12 +1895,96 @@ ploop_entry_reloc_a_req(struct ploop_request *preq, iblock_t *iblk)
return 0;
}
+/*
+ * Returns 0 if and only if RELOC_S preq was successfully processed.
+ *
+ * Sets preq->req_cluster to the block we're going to relocate.
+ * Returning 0, always set *iblk to a meaningful value: either
+ * zero (if no more blocks to relocate or block to relocate is free
+ * (and zero-index op is scheduled) or map is being read)
+ * or iblock that preq->req_cluster points to.
+ */
+static int
+ploop_entry_reloc_s_req(struct ploop_request *preq, iblock_t *iblk)
+{
+ struct ploop_device *plo = preq->plo;
+ struct ploop_delta *top_delta = ploop_top_delta(plo);
+
+ cluster_t from_clu, to_clu;
+ iblock_t from_iblk, to_iblk;
+ u32 free;
+ int level;
+ int err;
+
+ *iblk = 0;
+
+ if (preq->req_cluster == ~0U) {
+ cluster_t zero_cluster;
+
+ BUG_ON (preq->error);
+ err = ploop_fb_get_reloc_block(plo->fbd, &from_clu, &from_iblk,
+ &to_clu, &to_iblk, &free);
+ if (err < 0) {
+ preq->eng_state = PLOOP_E_COMPLETE;
+ ploop_complete_request(preq);
+ return 0;
+ }
+
+ preq->req_cluster = from_clu;
+ preq->src_iblock = from_iblk;
+ ploop_fb_add_reloc_req(plo->fbd, preq);
+
+ if (free) {
+ preq->dst_iblock = ~0U;
+ preq->dst_cluster = ~0U;
+ zero_cluster = preq->req_cluster;
+ } else {
+ preq->dst_iblock = to_iblk;
+ preq->dst_cluster = to_clu;
+ zero_cluster = preq->dst_cluster;
+ }
+
+ ploop_queue_zero_request(plo, preq, zero_cluster);
+ return 0;
+ }
+
+ err = ploop_find_map(&plo->map, preq);
+ if (err) {
+ if (err == 1) {
+ __TRACE("m %p %u\n", preq, *clu);
+ return 0;
+ }
+ return err;
+ }
+ BUG_ON (preq->map == NULL);
+
+ level = map_get_index(preq, preq->req_cluster, iblk);
+ if (level != top_delta->level) {
+ printk("Can't relocate block on wrong level=%d "
+ "(top_level=%d req_cluster=%u iblk=%u/%u)\n",
+ level, top_delta->level, preq->req_cluster,
+ *iblk, preq->iblock);
+ return -EIO;
+ }
+ if (preq->src_iblock != *iblk) {
+ printk("Can't relocate block due to wrong mapping: "
+ "req_cluster=%u should point to iblk=%u while "
+ "map_get_index() calculated iblk=%u\n",
+ preq->req_cluster, preq->src_iblock, *iblk);
+ return -EIO;
+ }
+
+ return 0;
+}
+
/* dummy wrapper around ploop_entry_reloc_[a|s]_req() */
static int
ploop_entry_reloc_req(struct ploop_request *preq, iblock_t *iblk)
{
if (test_bit(PLOOP_REQ_RELOC_A, &preq->state))
return ploop_entry_reloc_a_req(preq, iblk);
+ else if (test_bit(PLOOP_REQ_RELOC_S, &preq->state))
+ return ploop_entry_reloc_s_req(preq, iblk);
else
BUG();
}
@@ -1766,6 +2046,106 @@ ploop_entry_nullify_req(struct ploop_request *preq)
return 0;
}
+static int discard_get_index(struct ploop_request *preq)
+{
+ struct ploop_device *plo = preq->plo;
+ struct ploop_delta *top_delta = ploop_top_delta(plo);
+ int level;
+ int err;
+
+ preq->iblock = 0;
+
+ err = ploop_find_map(&plo->map, preq);
+ if (err)
+ return err;
+
+ level = map_get_index(preq, preq->req_cluster, &preq->iblock);
+ if (level != top_delta->level)
+ preq->iblock = 0;
+
+ if (preq->map) {
+ spin_lock_irq(&plo->lock);
+ map_release(preq->map);
+ preq->map = NULL;
+ spin_unlock_irq(&plo->lock);
+ }
+
+ return 0;
+}
+
+static int ploop_entry_discard_req(struct ploop_request *preq)
+{
+ int err = 0;
+ struct ploop_device * plo = preq->plo;
+ unsigned int len = 0;
+ cluster_t last_clu;
+
+ if (!test_bit(PLOOP_S_DISCARD, &plo->state)) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ BUG_ON(plo->maintenance_type != PLOOP_MNTN_DISCARD);
+
+ last_clu = (preq->req_sector + preq->req_size) >> plo->cluster_log;
+
+ for (; preq->req_cluster < last_clu; preq->req_cluster++) {
+ len = preq->req_cluster - preq->dst_cluster;
+
+ err = discard_get_index(preq);
+ if (err) {
+ if (err == 1)
+ return 0;
+ goto err;
+ }
+
+ if (preq->dst_iblock &&
+ (!preq->iblock || preq->dst_iblock + len != preq->iblock)) {
+ err = ploop_fb_add_free_extent(plo->fbd,
+ preq->dst_cluster,
+ preq->dst_iblock, len);
+ preq->dst_iblock = 0;
+ if (err) {
+ if (err == -EINVAL) {
+ printk("ploop_entry_discard_req1: "
+ "(%lu %u; %u %u; %u %u)\n",
+ preq->req_sector, preq->req_size,
+ preq->req_cluster, preq->iblock,
+ preq->dst_cluster, preq->dst_iblock);
+ WARN_ONCE(1, "add_free_extent failed\n");
+ }
+ goto err;
+ }
+ }
+
+ if (!preq->dst_iblock && preq->iblock) {
+ preq->dst_cluster = preq->req_cluster;
+ preq->dst_iblock = preq->iblock;
+ }
+ }
+
+ if (preq->dst_iblock) {
+ len = preq->req_cluster - preq->dst_cluster;
+ err = ploop_fb_add_free_extent(plo->fbd, preq->dst_cluster,
+ preq->dst_iblock, len);
+ if (err == -EINVAL) {
+ printk("ploop_entry_discard_req2: "
+ "(%lu %u; %u %u; %u %u)\n",
+ preq->req_sector, preq->req_size,
+ preq->req_cluster, preq->iblock,
+ preq->dst_cluster, preq->dst_iblock);
+ WARN_ONCE(1, "add_free_extent failed\n");
+ }
+ }
+
+err:
+ preq->error = err;
+ preq->eng_state = PLOOP_E_COMPLETE;
+ ploop_complete_request(preq);
+
+ return 0;
+}
+
/* Main preq state machine */
static inline bool preq_is_special(struct ploop_request * preq)
@@ -1877,7 +2257,17 @@ ploop_entry_request(struct ploop_request * preq)
preq->req_rw |= REQ_SYNC;
restart:
- if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
+ if (test_bit(PLOOP_REQ_DISCARD, &preq->state)) {
+ err = ploop_entry_discard_req(preq);
+ if (err)
+ goto error;
+ return;
+ } else if (test_bit(PLOOP_REQ_ZERO, &preq->state)) {
+ err = ploop_entry_zero_req(preq);
+ if (err)
+ goto error;
+ return;
+ } else if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
err = ploop_entry_reloc_req(preq, &iblk);
if (err)
@@ -2099,9 +2489,10 @@ ploop_entry_request(struct ploop_request * preq)
ploop_add_lockout(preq, 0);
spin_unlock_irq(&plo->lock);
- top_delta->ops->allocate(top_delta,
- preq, &preq->bl,
- preq->req_size);
+ if (likely(ploop_reuse_free_block(preq)))
+ top_delta->ops->allocate(top_delta,
+ preq, &preq->bl,
+ preq->req_size);
} else {
struct bio_list sbl;
@@ -2161,11 +2552,13 @@ ploop_entry_request(struct ploop_request * preq)
spin_unlock_irq(&plo->lock);
/* Block does not exist. */
- __TRACE("K %p %u\n", preq, preq->req_cluster);
- plo->st.bio_alloc++;
- top_delta->ops->allocate(top_delta, preq,
- &preq->bl,
- preq->req_size);
+ if (likely(ploop_reuse_free_block(preq))) {
+ __TRACE("K %p %u\n", preq, preq->req_cluster);
+ plo->st.bio_alloc++;
+ top_delta->ops->allocate(top_delta, preq,
+ &preq->bl,
+ preq->req_size);
+ }
}
}
return;
@@ -2258,6 +2651,8 @@ static void ploop_req_state_process(struct ploop_request * preq)
case PLOOP_E_RELOC_COMPLETE:
BUG_ON (!test_bit(PLOOP_REQ_RELOC_S, &preq->state));
if (!preq->error) {
+ ploop_fb_relocate_req_completed(plo->fbd);
+ ploop_fb_del_reloc_req(plo->fbd, preq);
spin_lock_irq(&plo->lock);
if (!list_empty(&preq->delay_list)) {
struct ploop_request *pr;
@@ -2283,6 +2678,7 @@ static void ploop_req_state_process(struct ploop_request * preq)
preq->error, preq->req_cluster, preq->iblock,
preq->src_iblock, preq->dst_cluster,
preq->dst_iblock);
+ ploop_fb_del_reloc_req(plo->fbd, preq);
}
if (!preq->error &&
@@ -2332,11 +2728,13 @@ static void ploop_req_state_process(struct ploop_request * preq)
}
case PLOOP_E_DELTA_COPIED:
{
- struct bio_list sbl;
- sbl.head = sbl.tail = preq->aux_bio;
- top_delta = ploop_top_delta(plo);
- top_delta->ops->allocate(top_delta, preq,
- &sbl, cluster_size_in_sec(plo));
+ if (likely(ploop_reuse_free_block(preq))) {
+ struct bio_list sbl;
+ sbl.head = sbl.tail = preq->aux_bio;
+ top_delta = ploop_top_delta(plo);
+ top_delta->ops->allocate(top_delta, preq,
+ &sbl, cluster_size_in_sec(plo));
+ }
break;
}
case PLOOP_E_ZERO_INDEX:
@@ -2551,7 +2949,8 @@ static void ploop_wait(struct ploop_device * plo, int once, struct blk_plug *plu
!plo->active_reqs))
break;
} else if (plo->bio_head ||
- !bio_list_empty(&plo->bio_discard_list)) {
+ (!bio_list_empty(&plo->bio_discard_list) &&
+ !ploop_discard_is_inprogress(plo->fbd))) {
/* ready_queue and entry_queue are empty, but
* bio list not. Obviously, we'd like to process
* bio_list instead of sleeping */
@@ -3901,6 +4300,8 @@ static int ploop_clear(struct ploop_device * plo, struct block_device * bdev)
return -EBUSY;
}
+ clear_bit(PLOOP_S_DISCARD_LOADED, &plo->state);
+ clear_bit(PLOOP_S_DISCARD, &plo->state);
clear_bit(PLOOP_S_NULLIFY, &plo->state);
destroy_deltas(plo, &plo->map);
@@ -3913,6 +4314,7 @@ static int ploop_clear(struct ploop_device * plo, struct block_device * bdev)
kfree(map);
}
+ ploop_fb_fini(plo->fbd, 0);
ploop_pb_fini(plo->pbd);
plo->maintenance_type = PLOOP_MNTN_OFF;
@@ -4155,6 +4557,357 @@ static int ploop_balloon_ioc(struct ploop_device *plo, unsigned long arg)
return copy_to_user((void*)arg, &ctl, sizeof(ctl));
}
+static int ploop_freeblks_ioc(struct ploop_device *plo, unsigned long arg)
+{
+ struct ploop_delta *delta;
+ struct ploop_freeblks_ctl ctl;
+ struct ploop_freeblks_ctl_extent __user *extents;
+ struct ploop_freeblks_desc *fbd;
+ int i;
+ int rc = 0;
+
+ return 0;
+
+ if (list_empty(&plo->map.delta_list))
+ return -ENOENT;
+
+ if (plo->maintenance_type == PLOOP_MNTN_OFF)
+ return -EINVAL;
+ if (plo->maintenance_type != PLOOP_MNTN_BALLOON)
+ return -EBUSY;
+ BUG_ON (plo->fbd);
+
+ if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ delta = ploop_top_delta(plo);
+ if (delta->level != ctl.level) {
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ fbd = ploop_fb_init(plo);
+ if (!fbd) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ extents = (void __user *)(arg + sizeof(ctl));
+
+ for (i = 0; i < ctl.n_extents; i++) {
+ struct ploop_freeblks_ctl_extent extent;
+
+ if (copy_from_user(&extent, &extents[i],
+ sizeof(extent))) {
+ rc = -EFAULT;
+ ploop_fb_fini(fbd, rc);
+ goto exit;
+ }
+
+ rc = ploop_fb_add_free_extent(fbd, extent.clu,
+ extent.iblk, extent.len);
+ if (rc) {
+ if (rc == -EINVAL) {
+ printk("ploop_freeblks_ioc: n=%d\n", ctl.n_extents);
+ for (i = 0; i < ctl.n_extents; i++) {
+ if (copy_from_user(&extent, &extents[i],
+ sizeof(extent))) {
+ printk("copy failed: i=%d\n", i);
+ break;
+ }
+ printk("ploop_freeblks_ioc: i=%d: %u %u %u\n",
+ i, extent.clu, extent.iblk, extent.len);
+ }
+ WARN_ONCE(1, "add_free_extent failed\n");
+ }
+ ploop_fb_fini(fbd, rc);
+ goto exit;
+ }
+ }
+
+ ploop_quiesce(plo);
+
+ ctl.alloc_head = delta->io.alloc_head;
+ if (copy_to_user((void*)arg, &ctl, sizeof(ctl))) {
+ rc = -EFAULT;
+ ploop_fb_fini(fbd, rc);
+ } else {
+ iblock_t a_h = delta->io.alloc_head;
+ /* make fbd visible to ploop engine */
+ plo->fbd = fbd;
+ plo->maintenance_type = PLOOP_MNTN_FBLOADED;
+ BUG_ON (a_h != ctl.alloc_head); /* quiesce sanity */
+ ploop_fb_lost_range_init(fbd, a_h);
+ ploop_fb_set_freezed_level(fbd, delta->level);
+ }
+
+ ploop_relax(plo);
+exit:
+ return rc;
+}
+
+static int ploop_fbget_ioc(struct ploop_device *plo, unsigned long arg)
+{
+ struct ploop_freeblks_ctl ctl;
+ int rc = 0;
+
+ return -EINVAL;
+
+ if (list_empty(&plo->map.delta_list))
+ return -ENOENT;
+
+ if (plo->maintenance_type == PLOOP_MNTN_DISCARD) {
+ if (!test_bit(PLOOP_S_DISCARD_LOADED, &plo->state))
+ return -EINVAL;
+ } else if (plo->maintenance_type != PLOOP_MNTN_FBLOADED)
+ return -EINVAL;
+ BUG_ON (!plo->fbd);
+
+ if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ ploop_quiesce(plo);
+ rc = ploop_fb_copy_freeblks_to_user(plo->fbd, (void*)arg, &ctl);
+ ploop_relax(plo);
+
+ return rc;
+}
+
+static int ploop_fbfilter_ioc(struct ploop_device *plo, unsigned long arg)
+{
+ int rc = 0;
+
+ return -EINVAL;
+
+ if (plo->maintenance_type != PLOOP_MNTN_DISCARD ||
+ !test_bit(PLOOP_S_DISCARD_LOADED, &plo->state))
+ return -EINVAL;
+
+ BUG_ON (!plo->fbd);
+
+ ploop_quiesce(plo);
+ rc = ploop_fb_filter_freeblks(plo->fbd, arg);
+ ploop_relax(plo);
+
+ return rc;
+}
+
+static void ploop_relocblks_process(struct ploop_device *plo)
+{
+ int num_reqs;
+ struct ploop_request *preq;
+
+ num_reqs = plo->tune.fsync_max;
+ if (num_reqs > plo->tune.max_requests/2)
+ num_reqs = plo->tune.max_requests/2;
+ if (num_reqs < 1)
+ num_reqs = 1;
+
+ spin_lock_irq(&plo->lock);
+
+ atomic_set(&plo->maintenance_cnt, 1);
+
+ init_completion(&plo->maintenance_comp);
+
+ for (; num_reqs; num_reqs--) {
+ preq = ploop_alloc_request(plo);
+
+ preq->bl.tail = preq->bl.head = NULL;
+ preq->req_cluster = ~0U; /* uninitialized */
+ preq->req_size = 0;
+ preq->req_rw = WRITE_SYNC;
+ preq->eng_state = PLOOP_E_ENTRY;
+ preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
+ preq->error = 0;
+ preq->tstamp = jiffies;
+ preq->iblock = 0;
+ preq->prealloc_size = 0;
+ preq->preq_ub = get_beancounter(get_exec_ub());
+
+ atomic_inc(&plo->maintenance_cnt);
+
+ ploop_entry_add(plo, preq);
+
+ if (waitqueue_active(&plo->waitq))
+ wake_up_interruptible(&plo->waitq);
+ }
+
+ if (atomic_dec_and_test(&plo->maintenance_cnt))
+ complete(&plo->maintenance_comp);
+
+ spin_unlock_irq(&plo->lock);
+}
+
+static int release_fbd(struct ploop_device *plo, int err)
+{
+ clear_bit(PLOOP_S_DISCARD, &plo->state);
+
+ ploop_quiesce(plo);
+ ploop_fb_fini(plo->fbd, err);
+ plo->maintenance_type = PLOOP_MNTN_OFF;
+ ploop_relax(plo);
+
+ return err;
+}
+
+static void ploop_discard_restart(struct ploop_device *plo, int err)
+{
+ if (!err && test_bit(PLOOP_S_DISCARD, &plo->state)) {
+ ploop_fb_reinit(plo->fbd, 0);
+ atomic_set(&plo->maintenance_cnt, 0);
+ init_completion(&plo->maintenance_comp);
+ plo->maintenance_type = PLOOP_MNTN_DISCARD;
+ } else {
+ clear_bit(PLOOP_S_DISCARD, &plo->state);
+ ploop_fb_fini(plo->fbd, err);
+ plo->maintenance_type = PLOOP_MNTN_OFF;
+ }
+}
+
+static int ploop_fbdrop_ioc(struct ploop_device *plo)
+{
+ return -EINVAL;
+
+ if (list_empty(&plo->map.delta_list))
+ return -ENOENT;
+
+ if (plo->maintenance_type == PLOOP_MNTN_DISCARD) {
+ if (!test_bit(PLOOP_S_DISCARD_LOADED, &plo->state))
+ return -EINVAL;
+ } else if (plo->maintenance_type != PLOOP_MNTN_FBLOADED)
+ return -EINVAL;
+ BUG_ON (!plo->fbd);
+
+ ploop_quiesce(plo);
+ ploop_discard_restart(plo, 0);
+ ploop_relax(plo);
+
+ return 0;
+}
+
+static int ploop_relocblks_ioc(struct ploop_device *plo, unsigned long arg)
+{
+ struct ploop_delta *delta = ploop_top_delta(plo);
+ struct ploop_relocblks_ctl ctl;
+ struct ploop_freeblks_desc *fbd = plo->fbd;
+ int i;
+ int err = 0;
+ int n_free;
+
+ return -EINVAL;
+
+ if (list_empty(&plo->map.delta_list))
+ return -ENOENT;
+
+ if (!fbd || (plo->maintenance_type != PLOOP_MNTN_FBLOADED &&
+ plo->maintenance_type != PLOOP_MNTN_RELOC))
+ return -EINVAL;
+
+ BUG_ON(test_bit(PLOOP_S_DISCARD_LOADED, &plo->state));
+
+ if (copy_from_user(&ctl, (void*)arg, sizeof(ctl)))
+ return -EFAULT;
+
+ if (delta->level != ctl.level ||
+ ploop_fb_get_freezed_level(plo->fbd) != ctl.level ||
+ ploop_fb_get_alloc_head(plo->fbd) != ctl.alloc_head) {
+ return -EINVAL;
+ }
+
+ if (plo->maintenance_type == PLOOP_MNTN_RELOC)
+ goto already;
+
+ if (ctl.n_extents) {
+ struct ploop_relocblks_ctl_extent __user *extents;
+
+ extents = (void __user *)(arg + sizeof(ctl));
+
+ for (i = 0; i < ctl.n_extents; i++) {
+ struct ploop_relocblks_ctl_extent extent;
+
+ if (copy_from_user(&extent, &extents[i],
+ sizeof(extent)))
+ return release_fbd(plo, -EFAULT);
+
+ /* this extent is also present in freemap */
+ err = ploop_fb_add_reloc_extent(fbd, extent.clu,
+ extent.iblk, extent.len, extent.free);
+ if (err)
+ return release_fbd(plo, err);
+ }
+ }
+
+ ploop_quiesce(plo);
+
+ /* alloc_head must never decrease */
+ BUG_ON (delta->io.alloc_head < ploop_fb_get_alloc_head(plo->fbd));
+ n_free = ploop_fb_get_n_free(plo->fbd);
+
+ /*
+ * before relocation start, freeblks engine could provide only
+ * free blocks. However delta.io.alloc_head can legaly increased
+ * in maintenance mode due to processing of interleaving WRITEs.
+ */
+ WARN_ON (delta->io.alloc_head > ploop_fb_get_alloc_head(plo->fbd) &&
+ n_free);
+ ploop_fb_relocation_start(plo->fbd, ctl.n_scanned);
+
+ if (!n_free || !ctl.n_extents)
+ goto truncate;
+
+ plo->maintenance_type = PLOOP_MNTN_RELOC;
+
+ ploop_relax(plo);
+
+ ploop_relocblks_process(plo);
+already:
+ err = ploop_maintenance_wait(plo);
+ if (err)
+ return err;
+
+ BUG_ON(atomic_read(&plo->maintenance_cnt));
+
+ if (plo->maintenance_type != PLOOP_MNTN_RELOC)
+ return -EALREADY;
+
+ fbd = plo->fbd;
+ BUG_ON (!fbd);
+
+ if (test_bit(PLOOP_S_ABORT, &plo->state)) {
+ clear_bit(PLOOP_S_DISCARD,&plo->state);
+
+ ploop_fb_fini(plo->fbd, -EIO);
+ plo->maintenance_type = PLOOP_MNTN_OFF;
+ return -EIO;
+ }
+
+ if (ploop_fb_get_n_relocated(fbd) != ploop_fb_get_n_relocating(fbd))
+ return release_fbd(plo, -EIO);
+
+ /* time to truncate */
+ ploop_quiesce(plo);
+truncate:
+ if ((ploop_fb_get_lost_range_len(plo->fbd) != 0) &&
+ (delta->io.alloc_head == ploop_fb_get_alloc_head(plo->fbd))) {
+ err = delta->ops->truncate(delta, NULL,
+ ploop_fb_get_first_lost_iblk(plo->fbd));
+ if (!err) {
+ /* See comment in dio_release_prealloced */
+ delta->io.prealloced_size = 0;
+ ctl.alloc_head = ploop_fb_get_lost_range_len(plo->fbd);
+ err = copy_to_user((void*)arg, &ctl, sizeof(ctl));
+ }
+ } else {
+ ctl.alloc_head = 0;
+ err = copy_to_user((void*)arg, &ctl, sizeof(ctl));
+ }
+
+ ploop_discard_restart(plo, err);
+
+ ploop_relax(plo);
+ return err;
+}
+
static int ploop_getdevice_ioc(unsigned long arg)
{
int err;
@@ -4514,10 +5267,34 @@ static int ploop_ioctl(struct block_device *bdev, fmode_t fmode, unsigned int cm
case PLOOP_IOC_BALLOON:
err = ploop_balloon_ioc(plo, arg);
break;
+ case PLOOP_IOC_FREEBLKS:
+ err = ploop_freeblks_ioc(plo, arg);
+ break;
+ case PLOOP_IOC_FBGET:
+ err = ploop_fbget_ioc(plo, arg);
+ break;
+ case PLOOP_IOC_FBFILTER:
+ err = ploop_fbfilter_ioc(plo, arg);
+ break;
+ case PLOOP_IOC_FBDROP:
+ err = ploop_fbdrop_ioc(plo);
+ break;
+ case PLOOP_IOC_RELOCBLKS:
+ err = ploop_relocblks_ioc(plo, arg);
+ break;
case PLOOP_IOC_GETDEVICE:
err = ploop_getdevice_ioc(arg);
break;
+ case PLOOP_IOC_DISCARD_INIT:
+ err = ploop_discard_init_ioc(plo);
+ break;
+ case PLOOP_IOC_DISCARD_FINI:
+ err = ploop_discard_fini_ioc(plo);
+ break;
+ case PLOOP_IOC_DISCARD_WAIT:
+ err = ploop_discard_wait_ioc(plo);
+ break;
case PLOOP_IOC_MAX_DELTA_SIZE:
err = ploop_set_max_delta_size(plo, arg);
break;
@@ -4661,6 +5438,7 @@ static void ploop_dev_del(struct ploop_device *plo)
blk_cleanup_queue(plo->queue);
put_disk(plo->disk);
rb_erase(&plo->link, &ploop_devices_tree);
+ ploop_fb_fini(plo->fbd, 0);
kobject_put(&plo->kobj);
}
diff --git a/drivers/block/ploop/discard.c b/drivers/block/ploop/discard.c
new file mode 100644
index 000000000000..1920ab05e20b
--- /dev/null
+++ b/drivers/block/ploop/discard.c
@@ -0,0 +1,121 @@
+/*
+ * drivers/block/ploop/discard.c
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bio.h>
+
+#include <linux/ploop/ploop.h>
+#include "discard.h"
+#include "freeblks.h"
+
+int ploop_discard_init_ioc(struct ploop_device *plo)
+{
+ struct ploop_freeblks_desc *fbd;
+ struct ploop_delta *delta = ploop_top_delta(plo);
+
+ return -EINVAL;
+
+ if (delta == NULL)
+ return -EINVAL;
+
+ if (delta->ops->id != PLOOP_FMT_PLOOP1)
+ return -EOPNOTSUPP;
+
+ if (plo->maintenance_type != PLOOP_MNTN_OFF)
+ return -EBUSY;
+
+ fbd = ploop_fb_init(plo);
+ if (!fbd)
+ return -ENOMEM;
+
+ ploop_quiesce(plo);
+
+ ploop_fb_set_freezed_level(fbd, delta->level);
+
+ plo->fbd = fbd;
+
+ atomic_set(&plo->maintenance_cnt, 0);
+ init_completion(&plo->maintenance_comp);
+ plo->maintenance_type = PLOOP_MNTN_DISCARD;
+ set_bit(PLOOP_S_DISCARD, &plo->state);
+
+ ploop_relax(plo);
+
+ return 0;
+}
+
+int ploop_discard_fini_ioc(struct ploop_device *plo)
+{
+ int ret = 0;
+ struct ploop_request *preq, *tmp;
+ LIST_HEAD(drop_list);
+
+ return -EINVAL;
+
+ if (!test_and_clear_bit(PLOOP_S_DISCARD, &plo->state))
+ return 0;
+
+ ploop_quiesce(plo);
+
+ spin_lock_irq(&plo->lock);
+ list_for_each_entry_safe(preq, tmp, &plo->entry_queue, list)
+ if (test_bit(PLOOP_REQ_DISCARD, &preq->state)) {
+ list_move(&preq->list, &drop_list);
+ ploop_entry_qlen_dec(preq);
+ }
+ spin_unlock_irq(&plo->lock);
+
+ if (!list_empty(&drop_list))
+ ploop_preq_drop(plo, &drop_list);
+
+ if (plo->maintenance_type != PLOOP_MNTN_DISCARD) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ploop_fb_fini(plo->fbd, -EOPNOTSUPP);
+
+ clear_bit(PLOOP_S_DISCARD_LOADED, &plo->state);
+
+ plo->maintenance_type = PLOOP_MNTN_OFF;
+ complete(&plo->maintenance_comp);
+
+out:
+ ploop_relax(plo);
+
+ return ret;
+}
+
+int ploop_discard_wait_ioc(struct ploop_device *plo)
+{
+ int err;
+
+ return -EINVAL;
+
+ if (!test_bit(PLOOP_S_DISCARD, &plo->state))
+ return 0;
+
+ if (plo->maintenance_type == PLOOP_MNTN_FBLOADED)
+ return 1;
+
+ if (plo->maintenance_type != PLOOP_MNTN_DISCARD)
+ return -EINVAL;
+
+ err = ploop_maintenance_wait(plo);
+ if (err)
+ goto out;
+
+ /* maintenance_cnt is zero without discard requests,
+ * in this case ploop_maintenance_wait returns 0
+ * instead of ERESTARTSYS */
+ if (test_bit(PLOOP_S_DISCARD_LOADED, &plo->state)) {
+ err = 1;
+ } else if (signal_pending(current))
+ err = -ERESTARTSYS;
+out:
+ return err;
+}
diff --git a/drivers/block/ploop/discard.h b/drivers/block/ploop/discard.h
new file mode 100644
index 000000000000..a8e7e4e59b09
--- /dev/null
+++ b/drivers/block/ploop/discard.h
@@ -0,0 +1,15 @@
+/*
+ * drivers/block/ploop/discard.h
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *
+ */
+
+#ifndef _LINUX_PLOOP_DISCARD_H_
+#define _LINUX_PLOOP_DISCARD_H_
+
+extern int ploop_discard_init_ioc(struct ploop_device *plo);
+extern int ploop_discard_fini_ioc(struct ploop_device *plo);
+extern int ploop_discard_wait_ioc(struct ploop_device *plo);
+
+#endif // _LINUX_PLOOP_DISCARD_H_
diff --git a/drivers/block/ploop/freeblks.c b/drivers/block/ploop/freeblks.c
new file mode 100644
index 000000000000..ab907a39ac65
--- /dev/null
+++ b/drivers/block/ploop/freeblks.c
@@ -0,0 +1,1110 @@
+/*
+ * drivers/block/ploop/freeblks.c
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/buffer_head.h>
+#include <linux/kthread.h>
+
+#include <trace/events/block.h>
+
+#include <linux/ploop/ploop.h>
+#include "freeblks.h"
+
+#define MIN(a, b) (a < b ? a : b)
+
+struct ploop_freeblks_extent
+{
+ struct list_head list; /* List link */
+
+ cluster_t clu;
+ iblock_t iblk;
+ u32 len;
+
+};
+
+struct ploop_relocblks_extent
+{
+ struct list_head list; /* List link */
+
+ cluster_t clu;
+ iblock_t iblk;
+ u32 len;
+ u32 free; /* this extent is also present in freemap */
+};
+
+struct ploop_fextent_ptr {
+ struct ploop_freeblks_extent *ext;
+ u32 off;
+};
+
+struct ploop_rextent_ptr {
+ struct ploop_relocblks_extent *ext;
+ u32 off;
+};
+
+struct ploop_freeblks_desc {
+ struct ploop_device *plo;
+
+ int fbd_n_free; /* # free blocks remaining
+ (i.e. "not re-used") */
+
+ /* fbd_ffb.ext->clu + fbd_ffb.off can be used as
+ * 'clu of first free block to reuse' for WRITE ops */
+ struct ploop_fextent_ptr fbd_ffb; /* 'ffb' stands for
+ 'first free block' */
+
+ /* fbd_lfb.ext->clu + fbd_lfb.off can be used as
+ * 'clu of first block to overwrite' (draining reloc range from end) */
+ struct ploop_fextent_ptr fbd_lfb; /* 'lfb' stands for
+ 'last free block for relocation'*/
+
+ /* fbd_reloc_extents[fbd->fbd_last_reloc_extent].clu +
+ * fbd_last_reloc_off can be used as 'clu of first block to relocate'
+ * (draining reloc range from end)
+ * NB: ffb and lfb above deal with free_list, while lrb deals with
+ * reloc_list! */
+ struct ploop_rextent_ptr fbd_lrb; /* 'lrb' stands for
+ 'last block to relocate' */
+
+ /* counters to trace the progress of relocation */
+ int fbd_n_relocated; /* # blocks actually relocated */
+ int fbd_n_relocating; /* # blocks whose relocation was at
+ least started */
+
+ /* lost_range: [fbd_first_lost_iblk ..
+ * fbd_first_lost_iblk + fbd_lost_range_len - 1] */
+ iblock_t fbd_first_lost_iblk;
+ int fbd_lost_range_len;
+ int fbd_lost_range_addon; /* :)) */
+
+ /* any reloc request resides there while it's "in progress" */
+ struct rb_root reloc_tree;
+
+ /* list of ploop_request-s for PLOOP_REQ_ZERO ops: firstly zero index
+ * for PLOOP_REQ_ZERO req_cluster, then schedule ordinary request
+ * pinned to given PLOOP_REQ_ZERO request */
+ struct list_head free_zero_list;
+
+ /* storage for free-block extents: list for now */
+ struct list_head fbd_free_list;
+
+ /* storage for reloc-block extents: list for now */
+ struct list_head fbd_reloc_list;
+
+ int fbd_freezed_level; /* for sanity - level on
+ * PLOOP_IOC_FREEBLKS stage */
+
+ struct bio_list fbd_dbl; /* dbl stands for 'discard bio list' */
+};
+
+int ploop_fb_get_n_relocated(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_n_relocated;
+}
+int ploop_fb_get_n_relocating(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_n_relocating;
+}
+int ploop_fb_get_n_free(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_n_free;
+}
+iblock_t ploop_fb_get_alloc_head(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_first_lost_iblk + fbd->fbd_lost_range_len;
+}
+int ploop_fb_get_lost_range_len(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_lost_range_len;
+}
+iblock_t ploop_fb_get_first_lost_iblk(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_first_lost_iblk;
+}
+
+int ploop_fb_get_freezed_level(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_freezed_level;
+}
+void ploop_fb_set_freezed_level(struct ploop_freeblks_desc *fbd, int level)
+{
+ fbd->fbd_freezed_level = level;
+}
+
+void ploop_fb_add_reloc_req(struct ploop_freeblks_desc *fbd,
+ struct ploop_request *preq)
+{
+ struct rb_node ** p;
+ struct rb_node *parent = NULL;
+ struct ploop_request * pr;
+
+ if (fbd == NULL)
+ return;
+
+ p = &fbd->reloc_tree.rb_node;
+ while (*p) {
+ parent = *p;
+ pr = rb_entry(parent, struct ploop_request, reloc_link);
+ BUG_ON (preq->src_iblock == pr->src_iblock);
+
+ if (preq->src_iblock < pr->src_iblock)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&preq->reloc_link, parent, p);
+ rb_insert_color(&preq->reloc_link, &fbd->reloc_tree);
+}
+
+void ploop_fb_del_reloc_req(struct ploop_freeblks_desc *fbd,
+ struct ploop_request *preq)
+{
+ BUG_ON (fbd == NULL);
+
+ rb_erase(&preq->reloc_link, &fbd->reloc_tree);
+}
+
+int ploop_fb_check_reloc_req(struct ploop_freeblks_desc *fbd,
+ struct ploop_request *preq,
+ unsigned long pin_state)
+{
+ struct rb_node *n;
+ struct ploop_request * p;
+
+ BUG_ON (fbd == NULL);
+ BUG_ON (preq->iblock == 0);
+ BUG_ON (preq->iblock >= fbd->fbd_first_lost_iblk);
+
+ n = fbd->reloc_tree.rb_node;
+ if (n == NULL)
+ return 0;
+
+ while (n) {
+ p = rb_entry(n, struct ploop_request, reloc_link);
+
+ if (preq->iblock < p->src_iblock)
+ n = n->rb_left;
+ else if (preq->iblock > p->src_iblock)
+ n = n->rb_right;
+ else {
+ spin_lock_irq(&fbd->plo->lock);
+ preq->eng_state = pin_state;
+ list_add_tail(&preq->list, &p->delay_list);
+ spin_unlock_irq(&fbd->plo->lock);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int ploop_fb_copy_freeblks_to_user(struct ploop_freeblks_desc *fbd, void *arg,
+ struct ploop_freeblks_ctl *ctl)
+{
+ int rc = 0;
+ int n = 0;
+ struct ploop_freeblks_extent *fextent;
+ struct ploop_freeblks_ctl_extent cext;
+
+ memset(&cext, 0, sizeof(cext));
+ list_for_each_entry(fextent, &fbd->fbd_free_list, list)
+ if (ctl->n_extents) {
+ int off = offsetof(struct ploop_freeblks_ctl,
+ extents[n]);
+ if (n++ >= ctl->n_extents) {
+ rc = -ENOSPC;
+ break;
+ }
+
+ cext.clu = fextent->clu;
+ cext.iblk = fextent->iblk;
+ cext.len = fextent->len;
+
+ rc = copy_to_user((u8*)arg + off, &cext, sizeof(cext));
+ if (rc)
+ break;
+ } else {
+ n++;
+ }
+
+ if (!rc) {
+ ctl->n_extents = n;
+ rc = copy_to_user((void*)arg, ctl, sizeof(*ctl));
+ }
+
+ return rc;
+}
+
+int ploop_fb_filter_freeblks(struct ploop_freeblks_desc *fbd, unsigned long minlen)
+{
+ struct ploop_freeblks_extent *fextent, *n;
+
+ list_for_each_entry_safe(fextent, n, &fbd->fbd_free_list, list)
+ if (fextent->len < minlen) {
+ list_del(&fextent->list);
+ fbd->fbd_n_free -= fextent->len;
+ kfree(fextent);
+ }
+
+ if (list_empty(&fbd->fbd_free_list))
+ fbd->fbd_ffb.ext = NULL;
+ else
+ fbd->fbd_ffb.ext = list_entry(fbd->fbd_free_list.next,
+ struct ploop_freeblks_extent,
+ list);
+ fbd->fbd_ffb.off = 0;
+
+ return fbd->fbd_n_free;
+}
+
+struct ploop_request *
+ploop_fb_get_zero_request(struct ploop_freeblks_desc *fbd)
+{
+ struct ploop_request * preq;
+
+ BUG_ON (fbd == NULL);
+ BUG_ON (list_empty(&fbd->free_zero_list));
+
+ preq = list_entry(fbd->free_zero_list.next,
+ struct ploop_request, list);
+ list_del(&preq->list);
+ return preq;
+}
+
+void ploop_fb_put_zero_request(struct ploop_freeblks_desc *fbd,
+ struct ploop_request *preq)
+{
+ list_add(&preq->list, &fbd->free_zero_list);
+}
+
+static iblock_t ffb_iblk(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_ffb.ext->iblk + fbd->fbd_ffb.off;
+}
+static cluster_t ffb_clu(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_ffb.ext->clu + fbd->fbd_ffb.off;
+}
+static iblock_t lfb_iblk(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_lfb.ext->iblk + fbd->fbd_lfb.off;
+}
+static cluster_t lfb_clu(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_lfb.ext->clu + fbd->fbd_lfb.off;
+}
+static iblock_t lrb_iblk(struct ploop_freeblks_desc *fbd)
+{
+ return fbd->fbd_lrb.ext->iblk + fbd->fbd_lrb.off;
+}
+
+static iblock_t get_first_reloc_iblk(struct ploop_freeblks_desc *fbd)
+{
+ struct ploop_relocblks_extent *r_extent;
+
+ BUG_ON (list_empty(&fbd->fbd_reloc_list));
+ r_extent = list_entry(fbd->fbd_reloc_list.next,
+ struct ploop_relocblks_extent, list);
+ return r_extent->iblk;
+}
+
+static void advance_ffb_simple(struct ploop_freeblks_desc *fbd)
+{
+ BUG_ON (fbd->fbd_ffb.ext == NULL);
+
+ if (fbd->fbd_ffb.off < fbd->fbd_ffb.ext->len - 1) {
+ fbd->fbd_ffb.off++;
+ } else {
+ if (fbd->fbd_ffb.ext->list.next == &fbd->fbd_free_list)
+ fbd->fbd_ffb.ext = NULL;
+ else
+ fbd->fbd_ffb.ext = list_entry(fbd->fbd_ffb.ext->list.next,
+ struct ploop_freeblks_extent,
+ list);
+ fbd->fbd_ffb.off = 0;
+ }
+
+ if (fbd->fbd_ffb.ext != NULL &&
+ ffb_iblk(fbd) >= fbd->fbd_first_lost_iblk) {
+ /* invalidate ffb */
+ fbd->fbd_ffb.ext = NULL;
+ fbd->fbd_ffb.off = 0;
+ }
+}
+
+static void advance_lrb(struct ploop_freeblks_desc *fbd)
+{
+ iblock_t skip = 0;
+ BUG_ON (fbd->fbd_lrb.ext == NULL);
+
+ if (likely(fbd->fbd_lrb.off)) {
+ fbd->fbd_lrb.off--;
+ } else {
+ struct ploop_relocblks_extent *r_extent = fbd->fbd_lrb.ext;
+ /* here 'skip' means: [new_lrb_ext]<--skip-->[r_extent] */
+
+ if (fbd->fbd_lrb.ext->list.prev == &fbd->fbd_reloc_list) {
+ BUG_ON (fbd->fbd_lost_range_addon < 0);
+ skip = fbd->fbd_lost_range_addon;
+ fbd->fbd_lrb.ext = NULL;
+ } else {
+ fbd->fbd_lrb.ext = list_entry(fbd->fbd_lrb.ext->list.prev,
+ struct ploop_relocblks_extent,
+ list);
+ fbd->fbd_lrb.off = fbd->fbd_lrb.ext->len - 1;
+ BUG_ON (r_extent->iblk < fbd->fbd_lrb.ext->iblk +
+ fbd->fbd_lrb.ext->len);
+ skip = r_extent->iblk - (fbd->fbd_lrb.ext->iblk +
+ fbd->fbd_lrb.ext->len);
+ }
+ }
+
+ fbd->fbd_first_lost_iblk -= 1 + skip;
+ fbd->fbd_lost_range_len += 1 + skip;
+
+ if (fbd->fbd_ffb.ext != NULL &&
+ ffb_iblk(fbd) >= fbd->fbd_first_lost_iblk) {
+ /* invalidate ffb */
+ fbd->fbd_ffb.ext = NULL;
+ fbd->fbd_ffb.off = 0;
+ }
+
+ BUG_ON(fbd->fbd_n_free <= 0);
+ fbd->fbd_n_free--;
+}
+
+static int split_fb_extent(struct ploop_freeblks_extent *extent, u32 *off_p,
+ struct ploop_freeblks_desc *fbd)
+{
+ struct ploop_freeblks_extent *new_extent;
+
+ new_extent = kzalloc(sizeof(*new_extent), GFP_KERNEL);
+ if (new_extent == NULL) {
+ printk("Can't allocate new freeblks extent for splittig!\n");
+ return -ENOMEM;
+ }
+
+ new_extent->clu = extent->clu + *off_p + 1;
+ new_extent->iblk = extent->iblk + *off_p + 1;
+ new_extent->len = extent->len - *off_p - 1;
+
+ extent->len = *off_p;
+
+ list_add(&new_extent->list, &extent->list);
+
+ (*off_p)--;
+ return 0;
+}
+
+static int advance_lfb_left(struct ploop_freeblks_desc *fbd)
+{
+ int rc = 0;
+ struct ploop_freeblks_extent *lfb_ext = fbd->fbd_lfb.ext;
+
+ BUG_ON (fbd->fbd_ffb.ext == NULL);
+ BUG_ON (lfb_ext == NULL);
+ BUG_ON (ffb_iblk(fbd) > lfb_iblk(fbd));
+
+ if (ffb_iblk(fbd) == lfb_iblk(fbd)) {
+ /* invalidate lfb */
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lfb.off = 0;
+ advance_ffb_simple(fbd);
+ return 0;
+ }
+
+ if (fbd->fbd_lfb.off) {
+ if (fbd->fbd_lfb.off == lfb_ext->len - 1) {
+ lfb_ext->len--;
+ fbd->fbd_lfb.off--;
+ } else {
+ rc = split_fb_extent(lfb_ext, &fbd->fbd_lfb.off, fbd);
+ }
+ } else {
+ BUG_ON (lfb_ext->list.prev == &fbd->fbd_free_list);
+ BUG_ON (lfb_ext == fbd->fbd_ffb.ext);
+
+ lfb_ext->clu++;
+ lfb_ext->iblk++;
+ lfb_ext->len--;
+
+ fbd->fbd_lfb.ext = list_entry(lfb_ext->list.prev,
+ struct ploop_freeblks_extent,
+ list);
+ fbd->fbd_lfb.off = fbd->fbd_lfb.ext->len - 1;
+
+ if (lfb_ext->len == 0) {
+ list_del(&lfb_ext->list);
+ kfree(lfb_ext);
+ }
+ }
+
+ BUG_ON (fbd->fbd_ffb.ext == NULL);
+ BUG_ON (fbd->fbd_lfb.ext == NULL);
+ BUG_ON (lfb_iblk(fbd) < ffb_iblk(fbd));
+ return rc;
+}
+
+int ploop_fb_get_reloc_block(struct ploop_freeblks_desc *fbd,
+ cluster_t *from_clu_p, iblock_t *from_iblk_p,
+ cluster_t *to_clu_p, iblock_t *to_iblk_p,
+ u32 *free_p)
+{
+ cluster_t from_clu, to_clu;
+ iblock_t from_iblk, to_iblk;
+ u32 free;
+ struct ploop_relocblks_extent *r_extent = fbd->fbd_lrb.ext;
+
+ if (!fbd)
+ return -1;
+
+ /* whole range is drained? */
+ if (r_extent == NULL)
+ return -1;
+
+ BUG_ON (fbd->fbd_lrb.off >= r_extent->len);
+
+ from_clu = r_extent->clu + fbd->fbd_lrb.off;
+ from_iblk = r_extent->iblk + fbd->fbd_lrb.off;
+ free = r_extent->free;
+
+ /* from_iblk is in range to relocate, but it's marked as free.
+ * This means that we only need to zero its index, no actual
+ * relocation needed. Such an operation doesn't consume free
+ * block that fbd_last_free refers to */
+ if (free) {
+ /* The block we're going to zero-index was already re-used? */
+ if (fbd->fbd_ffb.ext == NULL || ffb_iblk(fbd) > from_iblk)
+ return -1;
+
+ BUG_ON (fbd->fbd_ffb.off >= fbd->fbd_ffb.ext->len);
+
+ to_iblk = ~0U;
+ to_clu = ~0U;
+ } else {
+ /* run out of free blocks which can be used as destination
+ * for relocation ? */
+ if (fbd->fbd_lfb.ext == NULL)
+ return -1;
+
+ BUG_ON (fbd->fbd_ffb.ext == NULL);
+ BUG_ON (fbd->fbd_ffb.off >= fbd->fbd_ffb.ext->len);
+ BUG_ON (fbd->fbd_lfb.off >= fbd->fbd_lfb.ext->len);
+ BUG_ON (ffb_iblk(fbd) > lfb_iblk(fbd));
+
+ to_clu = lfb_clu(fbd);
+ to_iblk = lfb_iblk(fbd);
+
+ if (advance_lfb_left(fbd)) {
+ /* Error implies stopping relocation */
+ fbd->fbd_lrb.ext = NULL;
+ fbd->fbd_lrb.off = 0;
+ return -1;
+ }
+ }
+
+ /* consume one block from the end of reloc list */
+ advance_lrb(fbd);
+
+ fbd->fbd_n_relocating++;
+
+ *from_clu_p = from_clu;
+ *from_iblk_p = from_iblk;
+ *to_clu_p = to_clu;
+ *to_iblk_p = to_iblk;
+ *free_p = free;
+ return 0;
+}
+
+void ploop_fb_relocate_req_completed(struct ploop_freeblks_desc *fbd)
+{
+ fbd->fbd_n_relocated++;
+}
+
+static void advance_lfb_right(struct ploop_freeblks_desc *fbd)
+{
+ iblock_t iblk = get_first_reloc_iblk(fbd);
+
+ if (fbd->fbd_lfb.off < fbd->fbd_lfb.ext->len - 1) {
+ if (fbd->fbd_lfb.ext->iblk + fbd->fbd_lfb.off + 1 < iblk) {
+ fbd->fbd_lfb.off++;
+ }
+ } else if (fbd->fbd_lfb.ext->list.next != &fbd->fbd_free_list) {
+ struct ploop_freeblks_extent *f_extent;
+ f_extent = list_entry(fbd->fbd_lfb.ext->list.next,
+ struct ploop_freeblks_extent,
+ list);
+ if (f_extent->iblk < iblk) {
+ fbd->fbd_lfb.ext = f_extent;
+ fbd->fbd_lfb.off = 0;
+ }
+ }
+
+ /* invalidating ffb always implies invalidating lfb */
+ BUG_ON (fbd->fbd_ffb.ext == NULL && fbd->fbd_lfb.ext != NULL);
+
+ /* caller has just advanced ffb, but we must keep lfb intact
+ * if next-free-block (following to lfb) is in reloc-range */
+ if (fbd->fbd_ffb.ext != NULL && fbd->fbd_lfb.ext != NULL &&
+ lfb_iblk(fbd) < ffb_iblk(fbd)) {
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lfb.off = 0;
+ }
+}
+
+static void trim_reloc_list_one_blk(struct ploop_freeblks_desc *fbd)
+{
+ struct ploop_relocblks_extent *r_extent_first;
+ iblock_t iblk = lrb_iblk(fbd);
+ int invalidate = 0;
+
+ BUG_ON (list_empty(&fbd->fbd_reloc_list));
+ r_extent_first = list_entry(fbd->fbd_reloc_list.next,
+ struct ploop_relocblks_extent, list);
+
+ if (r_extent_first->len > 1) {
+ fbd->fbd_lost_range_addon = 0;
+ r_extent_first->iblk++;
+ r_extent_first->clu++;
+ r_extent_first->len--;
+ if (iblk < r_extent_first->iblk) {
+ invalidate = 1;
+ } else if (r_extent_first == fbd->fbd_lrb.ext) {
+ BUG_ON (fbd->fbd_lrb.off == 0);
+ fbd->fbd_lrb.off--;
+ }
+ } else {
+ if (r_extent_first == fbd->fbd_lrb.ext) {
+ invalidate = 1;
+ } else {
+ struct ploop_relocblks_extent *r_extent;
+ BUG_ON (r_extent_first->list.next ==
+ &fbd->fbd_reloc_list);
+ r_extent = list_entry(r_extent_first->list.next,
+ struct ploop_relocblks_extent,
+ list);
+ fbd->fbd_lost_range_addon = r_extent->iblk -
+ (r_extent_first->iblk + r_extent_first->len);
+ }
+ list_del(&r_extent_first->list);
+ kfree(r_extent_first);
+ }
+
+ if (invalidate) {
+ /* invalidate both lfb and lrb */
+ fbd->fbd_lrb.ext = NULL;
+ fbd->fbd_lrb.off = 0;
+ if (fbd->fbd_lfb.ext != NULL) {
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lfb.off = 0;
+ }
+ }
+}
+
+static void advance_ffb(struct ploop_freeblks_desc *fbd)
+{
+ BUG_ON (fbd->fbd_ffb.ext == NULL);
+ BUG_ON (fbd->fbd_lfb.ext != NULL && ffb_iblk(fbd) > lfb_iblk(fbd));
+
+ if (fbd->fbd_ffb.off < fbd->fbd_ffb.ext->len - 1) {
+ fbd->fbd_ffb.off++;
+ } else {
+ if (fbd->fbd_ffb.ext->list.next == &fbd->fbd_free_list) {
+ BUG_ON (fbd->fbd_lfb.ext != NULL &&
+ ffb_iblk(fbd) != lfb_iblk(fbd));
+ fbd->fbd_ffb.ext = NULL;
+ } else {
+ fbd->fbd_ffb.ext = list_entry(fbd->fbd_ffb.ext->list.next,
+ struct ploop_freeblks_extent,
+ list);
+ }
+ fbd->fbd_ffb.off = 0;
+ }
+
+ if (fbd->fbd_ffb.ext == NULL && fbd->fbd_lfb.ext != NULL) {
+ /* invalidate lfb */
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lfb.off = 0;
+ return;
+ }
+
+ if (fbd->fbd_ffb.ext != NULL &&
+ ffb_iblk(fbd) >= fbd->fbd_first_lost_iblk) {
+ /* invalidate both ffb and lfb */
+ fbd->fbd_ffb.ext = NULL;
+ fbd->fbd_ffb.off = 0;
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lfb.off = 0;
+ }
+
+ /* nothing to do anymore if relocation process is completed */
+ if (fbd->fbd_lrb.ext == NULL)
+ return;
+
+ trim_reloc_list_one_blk(fbd);
+
+ /* trim could invalidate both lrb and lfb */
+ if (fbd->fbd_lrb.ext == NULL || fbd->fbd_lfb.ext == NULL)
+ return;
+
+ advance_lfb_right(fbd);
+}
+
+int ploop_fb_get_free_block(struct ploop_freeblks_desc *fbd,
+ cluster_t *clu, iblock_t *iblk)
+{
+ if (!fbd)
+ return -1;
+
+ if (fbd->fbd_ffb.ext == NULL) {
+ BUG_ON (fbd->fbd_lfb.ext != NULL);
+ BUG_ON (fbd->fbd_lost_range_len < 0);
+
+ if (fbd->fbd_lost_range_len == 0)
+ return -1;
+
+ *iblk = fbd->fbd_first_lost_iblk++;
+ fbd->fbd_lost_range_len--;
+
+ if (fbd->fbd_lrb.ext != NULL) {
+ /* stop relocation process */
+ fbd->fbd_lrb.ext = NULL;
+ fbd->fbd_lrb.off = 0;
+ }
+
+ return 0;
+ }
+
+ BUG_ON (ffb_iblk(fbd) >= fbd->fbd_first_lost_iblk);
+ BUG_ON (fbd->fbd_n_free <= 0);
+
+ *clu = ffb_clu(fbd);
+ fbd->fbd_n_free--;
+
+ if (fbd->plo->maintenance_type == PLOOP_MNTN_RELOC)
+ advance_ffb(fbd);
+ else
+ advance_ffb_simple(fbd);
+
+ BUG_ON (fbd->fbd_ffb.ext == NULL && fbd->fbd_n_free != 0);
+ BUG_ON (fbd->fbd_ffb.ext != NULL && fbd->fbd_n_free == 0);
+
+ return 1;
+}
+
+static void fbd_complete_bio(struct ploop_freeblks_desc *fbd, int err)
+{
+ struct ploop_device *plo = fbd->plo;
+ unsigned int nr_completed = 0;
+
+ while (fbd->fbd_dbl.head) {
+ struct bio * bio = fbd->fbd_dbl.head;
+ fbd->fbd_dbl.head = bio->bi_next;
+ bio->bi_next = NULL;
+ BIO_ENDIO(plo->queue, bio, err);
+ nr_completed++;
+ }
+ fbd->fbd_dbl.tail = NULL;
+
+ spin_lock_irq(&plo->lock);
+ plo->bio_total -= nr_completed;
+ if (!bio_list_empty(&plo->bio_discard_list) &&
+ waitqueue_active(&plo->waitq))
+ wake_up_interruptible(&plo->waitq);
+ spin_unlock_irq(&plo->lock);
+}
+
+void ploop_fb_reinit(struct ploop_freeblks_desc *fbd, int err)
+{
+ fbd_complete_bio(fbd, err);
+
+ while (!list_empty(&fbd->fbd_free_list)) {
+ struct ploop_freeblks_extent *fblk_extent;
+
+ fblk_extent = list_first_entry(&fbd->fbd_free_list,
+ struct ploop_freeblks_extent,
+ list);
+ list_del(&fblk_extent->list);
+ kfree(fblk_extent);
+ }
+
+ while (!list_empty(&fbd->fbd_reloc_list)) {
+ struct ploop_relocblks_extent *rblk_extent;
+
+ rblk_extent = list_first_entry(&fbd->fbd_reloc_list,
+ struct ploop_relocblks_extent,
+ list);
+ list_del(&rblk_extent->list);
+ kfree(rblk_extent);
+ }
+
+ fbd->fbd_n_free = 0;
+ fbd->fbd_ffb.ext = NULL;
+ fbd->fbd_lfb.ext = NULL;
+ fbd->fbd_lrb.ext = NULL;
+ fbd->fbd_ffb.off = 0;
+ fbd->fbd_lfb.off = 0;
+ fbd->fbd_lrb.off = 0;
+ fbd->fbd_n_relocated = fbd->fbd_n_relocating = 0;
+ fbd->fbd_lost_range_len = 0;
+ fbd->fbd_lost_range_addon = 0;
+
+ BUG_ON(!RB_EMPTY_ROOT(&fbd->reloc_tree));
+}
+
+struct ploop_freeblks_desc *ploop_fb_init(struct ploop_device *plo)
+{
+ struct ploop_freeblks_desc *fbd;
+ int i;
+
+ fbd = kmalloc(sizeof(struct ploop_freeblks_desc), GFP_KERNEL);
+ if (fbd == NULL)
+ return NULL;
+
+ fbd->fbd_dbl.tail = fbd->fbd_dbl.head = NULL;
+ INIT_LIST_HEAD(&fbd->fbd_free_list);
+ INIT_LIST_HEAD(&fbd->fbd_reloc_list);
+ fbd->reloc_tree = RB_ROOT;
+ fbd->fbd_freezed_level = -1;
+
+ fbd->plo = plo;
+
+ ploop_fb_reinit(fbd, 0);
+
+ INIT_LIST_HEAD(&fbd->free_zero_list);
+ for (i = 0; i < plo->tune.max_requests; i++) {
+ struct ploop_request * preq;
+ preq = kzalloc(sizeof(struct ploop_request), GFP_KERNEL);
+ if (preq == NULL)
+ goto fb_init_failed;
+
+ preq->plo = plo;
+ INIT_LIST_HEAD(&preq->delay_list);
+ list_add(&preq->list, &fbd->free_zero_list);
+ }
+
+ return fbd;
+
+fb_init_failed:
+ ploop_fb_fini(fbd, -ENOMEM);
+ return NULL;
+}
+
+void ploop_fb_fini(struct ploop_freeblks_desc *fbd, int err)
+{
+ struct ploop_device *plo;
+
+ if (fbd == NULL)
+ return;
+
+ plo = fbd->plo;
+ BUG_ON (plo == NULL);
+
+ fbd_complete_bio(fbd, err);
+
+ while (!list_empty(&fbd->fbd_free_list)) {
+ struct ploop_freeblks_extent *fblk_extent;
+
+ fblk_extent = list_first_entry(&fbd->fbd_free_list,
+ struct ploop_freeblks_extent,
+ list);
+ list_del(&fblk_extent->list);
+ kfree(fblk_extent);
+ }
+
+ while (!list_empty(&fbd->fbd_reloc_list)) {
+ struct ploop_relocblks_extent *rblk_extent;
+
+ rblk_extent = list_first_entry(&fbd->fbd_reloc_list,
+ struct ploop_relocblks_extent,
+ list);
+ list_del(&rblk_extent->list);
+ kfree(rblk_extent);
+ }
+
+ while (!list_empty(&fbd->free_zero_list)) {
+ struct ploop_request * preq;
+
+ preq = list_first_entry(&fbd->free_zero_list,
+ struct ploop_request,
+ list);
+ list_del(&preq->list);
+ kfree(preq);
+ }
+
+ kfree(fbd);
+ plo->fbd = NULL;
+}
+
+int ploop_fb_add_free_extent(struct ploop_freeblks_desc *fbd,
+ cluster_t clu, iblock_t iblk, u32 len)
+{
+ struct ploop_freeblks_extent *fblk_extent;
+ struct ploop_freeblks_extent *ex;
+
+ if (len == 0) {
+ printk("ploop_fb_add_free_extent(): empty extent! (%u/%u)\n",
+ clu, iblk);
+ return 0;
+ }
+
+ list_for_each_entry_reverse(ex, &fbd->fbd_free_list, list)
+ if (ex->iblk < iblk)
+ break;
+
+ if (ex->list.next != &fbd->fbd_free_list) {
+ struct ploop_freeblks_extent *tmp;
+ tmp = list_entry(ex->list.next, struct ploop_freeblks_extent, list);
+
+ if (iblk + len > tmp->iblk) {
+ int c = &ex->list != &fbd->fbd_free_list;
+ printk("ploop_fb_add_free_extent(): next (%u %u %u) "
+ "intersects with (%u %u %u); ex (%u %u %d)\n",
+ tmp->clu, tmp->iblk, tmp->len, clu, iblk, len,
+ c ? ex->clu : 0, c ? ex->iblk : 0, c ? ex->len : -1);
+ return -EINVAL;
+ }
+ }
+
+ if (&ex->list != &fbd->fbd_free_list) {
+ if (ex->iblk + ex->len > iblk) {
+ struct ploop_freeblks_extent *t = NULL;
+ if (ex->list.next != &fbd->fbd_free_list)
+ t = list_entry(ex->list.next, struct ploop_freeblks_extent, list);
+ printk("ploop_fb_add_free_extent(): ex (%u %u %u) "
+ "intersects with (%u %u %u); next (%u %u %d)\n",
+ ex->clu, ex->iblk, ex->len, clu, iblk, len,
+ t ? t->clu : 0, t ? t->iblk : 0, t ? t->len : -1);
+ return -EINVAL;
+ }
+ }
+
+ fblk_extent = kzalloc(sizeof(*fblk_extent), GFP_KERNEL);
+ if (fblk_extent == NULL)
+ return -ENOMEM;
+
+ fblk_extent->clu = clu;
+ fblk_extent->iblk = iblk;
+ fblk_extent->len = len;
+
+ list_add(&fblk_extent->list, &ex->list);
+
+ fbd->fbd_n_free += len;
+
+ fbd->fbd_ffb.ext = list_entry(fbd->fbd_free_list.next, struct ploop_freeblks_extent, list);
+ fbd->fbd_ffb.off = 0;
+
+ return 0;
+}
+
+int ploop_fb_add_reloc_extent(struct ploop_freeblks_desc *fbd,
+ cluster_t clu, iblock_t iblk, u32 len, u32 free)
+{
+ struct ploop_relocblks_extent *rblk_extent;
+
+ if (len == 0) {
+ printk("ploop_fb_add_reloc_extent(): empty extent! (%u/%u)\n",
+ clu, iblk);
+ return 0;
+ }
+
+ if (!list_empty(&fbd->fbd_reloc_list)) {
+ rblk_extent = list_entry(fbd->fbd_reloc_list.prev,
+ struct ploop_relocblks_extent, list);
+ if (rblk_extent->iblk + rblk_extent->len > iblk) {
+ printk("ploop_fb_add_reloc_extent(): extents should be sorted\n");
+ return -EINVAL;
+ }
+
+ if (rblk_extent->list.next != &fbd->fbd_reloc_list) {
+ rblk_extent = list_entry(rblk_extent->list.next,
+ struct ploop_relocblks_extent, list);
+ if (iblk + len > rblk_extent->iblk) {
+ printk("ploop_fb_add_reloc_extent(): intersected extents\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ rblk_extent = kzalloc(sizeof(*rblk_extent), GFP_KERNEL);
+ if (rblk_extent == NULL)
+ return -ENOMEM;
+
+ rblk_extent->clu = clu;
+ rblk_extent->iblk = iblk;
+ rblk_extent->len = len;
+ rblk_extent->free = free;
+
+ list_add_tail(&rblk_extent->list, &fbd->fbd_reloc_list);
+
+ return 0;
+}
+
+void ploop_fb_lost_range_init(struct ploop_freeblks_desc *fbd,
+ iblock_t first_lost_iblk)
+{
+ fbd->fbd_first_lost_iblk = first_lost_iblk;
+ fbd->fbd_lost_range_len = 0;
+}
+
+void ploop_fb_relocation_start(struct ploop_freeblks_desc *fbd,
+ __u32 n_scanned)
+{
+ iblock_t a_h = fbd->fbd_first_lost_iblk;
+ iblock_t new_a_h; /* where a_h will be after relocation
+ if no WRITEs intervene */
+ struct ploop_relocblks_extent *r_extent;
+ struct ploop_relocblks_extent *r_extent_first;
+ int n_free = fbd->fbd_n_free;
+ u32 l;
+ struct ploop_freeblks_extent *fextent;
+
+ BUG_ON(fbd->fbd_lost_range_len != 0);
+ if (list_empty(&fbd->fbd_reloc_list)) {
+ fbd->fbd_first_lost_iblk -= n_scanned;
+ fbd->fbd_lost_range_len += n_scanned;
+ return;
+ }
+
+ r_extent_first = list_entry(fbd->fbd_reloc_list.next,
+ struct ploop_relocblks_extent, list);
+ r_extent = list_entry(fbd->fbd_reloc_list.prev,
+ struct ploop_relocblks_extent, list);
+ new_a_h = r_extent->iblk + r_extent->len;
+
+ BUG_ON(fbd->fbd_first_lost_iblk < new_a_h);
+ fbd->fbd_lost_range_len = fbd->fbd_first_lost_iblk - new_a_h;
+ fbd->fbd_first_lost_iblk = new_a_h;
+
+ if (!n_free)
+ return;
+
+ while (1) {
+ l = MIN(n_free, r_extent->len);
+
+ n_free -= l;
+ new_a_h -= l;
+
+ if (!n_free)
+ break;
+
+ if (r_extent->list.prev == &fbd->fbd_reloc_list) {
+ r_extent = NULL;
+ break;
+ } else {
+ r_extent = list_entry(r_extent->list.prev,
+ struct ploop_relocblks_extent,
+ list);
+ }
+ /* skip lost blocks */
+ new_a_h = r_extent->iblk + r_extent->len;
+ }
+
+ l = 0;
+
+ /* ploop-balloon scanned exactly range [a_h - n_scanned .. a_h - 1] */
+ if (n_free) {
+ l = r_extent_first->iblk - (a_h - n_scanned);
+ } else if (r_extent->iblk == new_a_h) {
+ if (r_extent == r_extent_first) {
+ l = r_extent->iblk - (a_h - n_scanned);
+ } else {
+ struct ploop_relocblks_extent *r_extent_prev;
+
+ BUG_ON (r_extent->list.prev == &fbd->fbd_reloc_list);
+ r_extent_prev = list_entry(r_extent->list.prev,
+ struct ploop_relocblks_extent,
+ list);
+ l = r_extent->iblk - (r_extent_prev->iblk +
+ r_extent_prev->len);
+ }
+ }
+
+ new_a_h -= l;
+
+ /* let's trim reloc_list a bit based on new_a_h */
+ while (r_extent_first->iblk < new_a_h) {
+
+ if (r_extent_first->iblk + r_extent_first->len > new_a_h) {
+ l = new_a_h - r_extent_first->iblk;
+ r_extent_first->iblk += l;
+ r_extent_first->clu += l;
+ r_extent_first->len -= l;
+ break;
+ }
+
+ if (r_extent_first->list.next == &fbd->fbd_reloc_list) {
+ list_del(&r_extent_first->list);
+ kfree(r_extent_first);
+ break;
+ }
+
+ list_del(&r_extent_first->list);
+ kfree(r_extent_first);
+ r_extent_first = list_entry(fbd->fbd_reloc_list.next,
+ struct ploop_relocblks_extent,
+ list);
+ }
+
+ if (!list_empty(&fbd->fbd_reloc_list)) {
+ fbd->fbd_lrb.ext = list_entry(fbd->fbd_reloc_list.prev,
+ struct ploop_relocblks_extent,
+ list);
+ fbd->fbd_lrb.off = fbd->fbd_lrb.ext->len - 1;
+
+ fbd->fbd_lost_range_addon = r_extent_first->iblk - new_a_h;
+ }
+
+ /* new_a_h is calculated. now, let's find "last free block" position */
+ if (ffb_iblk(fbd) < new_a_h) {
+ list_for_each_entry_reverse(fextent, &fbd->fbd_free_list, list)
+ if (fextent->iblk < new_a_h)
+ break;
+
+ BUG_ON(&fextent->list == &fbd->fbd_free_list);
+ } else
+ fextent = NULL;
+
+ fbd->fbd_lfb.ext = fextent; /* NULL means
+ "no free blocks for relocation" */
+ if (fextent != NULL)
+ fbd->fbd_lfb.off = MIN(new_a_h - fextent->iblk,
+ fextent->len) - 1;
+}
+
+int ploop_discard_add_bio(struct ploop_freeblks_desc *fbd, struct bio *bio)
+{
+ struct ploop_device *plo;
+
+ if (!fbd)
+ return -EOPNOTSUPP;
+
+ plo = fbd->plo;
+
+ if (!test_bit(PLOOP_S_DISCARD, &plo->state))
+ return -EOPNOTSUPP;
+ if (fbd->plo->maintenance_type != PLOOP_MNTN_DISCARD)
+ return -EBUSY;
+ /* only one request can be processed simultaneously */
+ if (fbd->fbd_dbl.head)
+ return -EBUSY;
+
+ fbd->fbd_dbl.head = fbd->fbd_dbl.tail = bio;
+
+ return 0;
+}
+
+int ploop_discard_is_inprogress(struct ploop_freeblks_desc *fbd)
+{
+ return fbd && fbd->fbd_dbl.head != NULL;
+}
diff --git a/drivers/block/ploop/freeblks.h b/drivers/block/ploop/freeblks.h
new file mode 100644
index 000000000000..b37f23e3d6b0
--- /dev/null
+++ b/drivers/block/ploop/freeblks.h
@@ -0,0 +1,58 @@
+/*
+ * drivers/block/ploop/freeblks.h
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *
+ */
+
+#ifndef __FREEBLKS_H__
+#define __FREEBLKS_H__
+
+/* freeblks API - in-kernel balloon support */
+
+/* init/fini stuff */
+struct ploop_freeblks_desc *ploop_fb_init(struct ploop_device *plo);
+void ploop_fb_fini(struct ploop_freeblks_desc *fbd, int err);
+void ploop_fb_reinit(struct ploop_freeblks_desc *fbd, int err);
+int ploop_fb_add_free_extent(struct ploop_freeblks_desc *fbd, cluster_t clu, iblock_t iblk, u32 len);
+int ploop_fb_add_reloc_extent(struct ploop_freeblks_desc *fbd, cluster_t clu, iblock_t iblk, u32 len, u32 free);
+void ploop_fb_lost_range_init(struct ploop_freeblks_desc *fbd, iblock_t first_lost_iblk);
+void ploop_fb_relocation_start(struct ploop_freeblks_desc *fbd, __u32 n_scanned);
+int ploop_discard_add_bio(struct ploop_freeblks_desc *fbd, struct bio *bio);
+int ploop_discard_is_inprogress(struct ploop_freeblks_desc *fbd);
+
+/* avoid direct access to freeblks internals */
+int ploop_fb_get_n_relocated(struct ploop_freeblks_desc *fbd);
+int ploop_fb_get_n_relocating(struct ploop_freeblks_desc *fbd);
+int ploop_fb_get_n_free(struct ploop_freeblks_desc *fbd);
+iblock_t ploop_fb_get_alloc_head(struct ploop_freeblks_desc *fbd);
+int ploop_fb_get_lost_range_len(struct ploop_freeblks_desc *fbd);
+iblock_t ploop_fb_get_first_lost_iblk(struct ploop_freeblks_desc *fbd);
+
+/* get/set freezed level (for sanity checks) */
+int ploop_fb_get_freezed_level(struct ploop_freeblks_desc *fbd);
+void ploop_fb_set_freezed_level(struct ploop_freeblks_desc *fbd, int level);
+
+/* maintain rb-tree of "in progress" relocation requests */
+void ploop_fb_add_reloc_req(struct ploop_freeblks_desc *fbd, struct ploop_request *preq);
+void ploop_fb_del_reloc_req(struct ploop_freeblks_desc *fbd, struct ploop_request *preq);
+int ploop_fb_check_reloc_req(struct ploop_freeblks_desc *fbd, struct ploop_request *preq, unsigned long pin_state);
+
+/* helper for ioctl(PLOOP_IOC_FBGET) */
+int ploop_fb_copy_freeblks_to_user(struct ploop_freeblks_desc *fbd, void *arg,
+ struct ploop_freeblks_ctl *ctl);
+int ploop_fb_filter_freeblks(struct ploop_freeblks_desc *fbd, unsigned long minlen);
+
+/* get/put "zero index" request */
+struct ploop_request *ploop_fb_get_zero_request(struct ploop_freeblks_desc *fbd);
+void ploop_fb_put_zero_request(struct ploop_freeblks_desc *fbd, struct ploop_request *preq);
+
+/* get/put block to relocate */
+int ploop_fb_get_reloc_block(struct ploop_freeblks_desc *fbd, cluster_t *from_clu, iblock_t *from_iblk,
+ cluster_t *to_clu, iblock_t *to_iblk, u32 *free);
+void ploop_fb_relocate_req_completed(struct ploop_freeblks_desc *fbd);
+
+/* get free block to reuse */
+int ploop_fb_get_free_block(struct ploop_freeblks_desc *fbd, cluster_t *clu, iblock_t *iblk);
+
+#endif
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 1493e4138254..8c7a42e32864 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -53,8 +53,8 @@ enum {
* consumed by userspace yet */
PLOOP_S_CONGESTED, /* Too many bios submitted to us */
PLOOP_S_NO_FALLOC_DISCARD, /* FIXME: Remove this: Unable to handle discard requests by fallocate */
- PLOOP_S_DISCARD, /* Obsolete: ploop is ready to handle discard request */
- PLOOP_S_DISCARD_LOADED, /* Obsolete: A discard request was handled and
+ PLOOP_S_DISCARD, /* ploop is ready to handle discard request */
+ PLOOP_S_DISCARD_LOADED, /* A discard request was handled and
free blocks loaded */
PLOOP_S_LOCKED, /* ploop is locked by userspace
(for minor mgmt only) */
@@ -471,6 +471,7 @@ struct ploop_device
struct ploop_stats st;
char cookie[PLOOP_COOKIE_SIZE];
+ struct ploop_freeblks_desc *fbd;
struct ploop_pushbackup_desc *pbd;
struct block_device *dm_crypt_bdev;
@@ -489,10 +490,10 @@ enum
PLOOP_REQ_TRANS,
PLOOP_REQ_MERGE,
PLOOP_REQ_RELOC_A, /* 'A' stands for allocate() */
- PLOOP_REQ_RELOC_S, /* Obsolete: 'S' stands for submit() */
+ PLOOP_REQ_RELOC_S, /* 'S' stands for submit() */
PLOOP_REQ_RELOC_N, /* 'N' stands for "nullify" */
- PLOOP_REQ_ZERO, /* Obsolete */
- PLOOP_REQ_DISCARD, /* Obsolete */
+ PLOOP_REQ_ZERO,
+ PLOOP_REQ_DISCARD,
PLOOP_REQ_RSYNC,
PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
@@ -504,10 +505,10 @@ enum
#define PLOOP_REQ_MERGE_FL (1 << PLOOP_REQ_MERGE)
#define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
-#define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S) /* Obsolete */
+#define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
#define PLOOP_REQ_RELOC_N_FL (1 << PLOOP_REQ_RELOC_N)
-#define PLOOP_REQ_DISCARD_FL (1 << PLOOP_REQ_DISCARD) /* Obsolete */
-#define PLOOP_REQ_ZERO_FL (1 << PLOOP_REQ_ZERO) /* Obsolete */
+#define PLOOP_REQ_DISCARD_FL (1 << PLOOP_REQ_DISCARD)
+#define PLOOP_REQ_ZERO_FL (1 << PLOOP_REQ_ZERO)
enum
{
@@ -924,6 +925,8 @@ extern struct kobj_type ploop_delta_ktype;
void ploop_sysfs_init(struct ploop_device * plo);
void ploop_sysfs_uninit(struct ploop_device * plo);
+void ploop_queue_zero_request(struct ploop_device *plo, struct ploop_request *orig_preq, cluster_t clu);
+
int ploop_maintenance_wait(struct ploop_device * plo);
extern int max_map_pages;
diff --git a/include/linux/ploop/ploop_if.h b/include/linux/ploop/ploop_if.h
index f2a1e8ab4c70..3b5928cfb69e 100644
--- a/include/linux/ploop/ploop_if.h
+++ b/include/linux/ploop/ploop_if.h
@@ -234,14 +234,14 @@ enum {
PLOOP_MNTN_SNAPSHOT, /* bdev is freezed due to snapshot */
PLOOP_MNTN_TRACK, /* tracking is in progress */
- PLOOP_MNTN_DISCARD, /* Obsolete: ready to handle discard requests */
+ PLOOP_MNTN_DISCARD, /* ready to handle discard requests */
PLOOP_MNTN_NOFAST = 256,
/* all types below requires fast-path disabled ! */
PLOOP_MNTN_MERGE, /* merge is in progress */
PLOOP_MNTN_GROW, /* grow is in progress */
- PLOOP_MNTN_RELOC, /* Obsolete: relocation is in progress */
+ PLOOP_MNTN_RELOC, /* relocation is in progress */
PLOOP_MNTN_PUSH_BACKUP, /* push backup is in progress */
};
@@ -317,32 +317,32 @@ struct ploop_track_extent
/* Increase size of block device */
#define PLOOP_IOC_GROW _IOW(PLOOPCTLTYPE, 17, struct ploop_ctl)
-/* Obsolete: Inquire current state of free block extents */
+/* Inquire current state of free block extents */
#define PLOOP_IOC_FBGET _IOW(PLOOPCTLTYPE, 18, struct ploop_freeblks_ctl)
/* Start balloning or inquire maintenance_type or flush stale BALLON state */
#define PLOOP_IOC_BALLOON _IOW(PLOOPCTLTYPE, 19, struct ploop_balloon_ctl)
-/* Obsolete: Load free blocks to ploop */
+/* Load free blocks to ploop */
#define PLOOP_IOC_FREEBLKS _IOW(PLOOPCTLTYPE, 20, struct ploop_freeblks_ctl)
-/* Obsolete: Load blocks to relocate and initiate relocation process */
+/* Load blocks to relocate and initiate relocation process */
#define PLOOP_IOC_RELOCBLKS _IOW(PLOOPCTLTYPE, 21, struct ploop_relocblks_ctl)
/* Search ploop_device global tree for first unused minor number */
#define PLOOP_IOC_GETDEVICE _IOW(PLOOPCTLTYPE, 22, struct ploop_getdevice_ctl)
-/* Obsolete: Start handling discard requests */
+/* Start handling discard requests */
#define PLOOP_IOC_DISCARD_INIT _IO(PLOOPCTLTYPE, 23)
-/* Obsolete: Stop handling discard requests */
+/* Stop handling discard requests */
#define PLOOP_IOC_DISCARD_FINI _IO(PLOOPCTLTYPE, 24)
-/* Obsolete: Wait a discard request */
+/* Wait a discard request */
#define PLOOP_IOC_DISCARD_WAIT _IO(PLOOPCTLTYPE, 25)
-/* Obsolete: Drop current state of free block extents */
+/* Drop current state of free block extents */
#define PLOOP_IOC_FBDROP _IO(PLOOPCTLTYPE, 26)
-/* Obsolete: Filter extents with sizes less than arg */
+/* Filter extents with sizes less than arg */
#define PLOOP_IOC_FBFILTER _IOR(PLOOPCTLTYPE, 27, unsigned long)
/* Set maximum size for the top delta . */
More information about the Devel
mailing list