[Devel] [PATCH RHEL9 COMMIT] vhost-blk: rework iov and bio handling
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 9 17:58:35 MSK 2025
The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.3
------>
commit 8d5a6070839db6fb61e22aa5a18f1d4f634cbaad
Author: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
Date: Mon Dec 30 13:39:48 2024 +0800
vhost-blk: rework iov and bio handling
Manual page handling is tiresome and error-prone. Let's use iov
iterators and bio_iov_iter_get_pages() helper, which will
automatically fill bio with pages from iov. As this also pins
pages, add bio_release_pages() at the end of every bio.
While at it, remove VHOST_BLK_SECTOR_BITS and it's friends since
they are just a copy of SECTOR_BITS.
v2:
- fix bio allocation size formula
- fix bio_iov_iter_get_pages error path to only put pages for which we
had succesful get
- add explicit BUG_ON to check sector aligned bio start
https://virtuozzo.atlassian.net/browse/PSBM-157752
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
======
Patchset description:
vhost-blk: bounce buffer for unaligned requests
Andrey Zhadchenko (2):
vhost-blk: rework iov and bio handling
vhost-blk: add bounce-buffer for non-aligned requests
David Howells (1):
iov_iter: Add a function to extract a page list from an iterator
Pavel Tikhomirov (1):
vhost-blk: remove excess vhost_blk_req.use_inline
Feature: vhost-blk: in-kernel accelerator for virtio-blk guests
---
drivers/vhost/blk.c | 161 ++++++++++++++--------------------------------------
1 file changed, 43 insertions(+), 118 deletions(-)
diff --git a/drivers/vhost/blk.c b/drivers/vhost/blk.c
index 90d20d0eb722..a289552d6f37 100644
--- a/drivers/vhost/blk.c
+++ b/drivers/vhost/blk.c
@@ -49,22 +49,10 @@ enum {
#define VHOST_MAX_METADATA_IOV 1
-#define VHOST_BLK_SECTOR_BITS 9
-#define VHOST_BLK_SECTOR_SIZE (1 << VHOST_BLK_SECTOR_BITS)
-#define VHOST_BLK_SECTOR_MASK (VHOST_BLK_SECTOR_SIZE - 1)
-
-struct req_page_list {
- struct page **pages;
- int pages_nr;
-};
-
#define NR_INLINE 16
struct vhost_blk_req {
- struct req_page_list inline_pl[NR_INLINE];
- struct page *inline_page[NR_INLINE];
struct bio *inline_bio[NR_INLINE];
- struct req_page_list *pl;
int req_bin;
bool use_inline;
@@ -137,12 +125,6 @@ static int move_iovec(struct iovec *from, struct iovec *to,
return len ? -1 : moved_seg;
}
-static inline int iov_num_pages(struct iovec *iov)
-{
- return (PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
- ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
-}
-
static inline int vhost_blk_set_status(struct vhost_blk_req *req, u8 status)
{
struct iov_iter iter;
@@ -172,28 +154,14 @@ static void vhost_blk_req_done(struct bio *bio)
vhost_vq_work_queue(&req->blk_vq->vq, &req->blk_vq->work);
}
+ bio_release_pages(bio, !req->bi_opf);
bio_put(bio);
}
-static void vhost_blk_req_umap(struct vhost_blk_req *req)
+static void vhost_blk_req_cleanup(struct vhost_blk_req *req)
{
- struct req_page_list *pl;
- int i, j;
-
- if (req->pl) {
- for (i = 0; i < req->iov_nr; i++) {
- pl = &req->pl[i];
-
- for (j = 0; j < pl->pages_nr; j++) {
- if (!req->bi_opf)
- set_page_dirty_lock(pl->pages[j]);
- put_page(pl->pages[j]);
- }
- }
- }
-
if (!req->use_inline)
- kfree(req->pl);
+ kfree(req->bio);
}
static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
@@ -202,7 +170,6 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
struct bio *bio;
req->use_inline = true;
- req->pl = NULL;
req->bio = req->inline_bio;
bio = bio_alloc(bdev, 0, req->bi_opf, GFP_KERNEL);
@@ -219,111 +186,69 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
return 0;
}
-static struct page **vhost_blk_prepare_req(struct vhost_blk_req *req,
- int total_pages, int iov_nr)
-{
- int pl_len, page_len, bio_len;
- void *buf;
-
- req->use_inline = false;
- pl_len = iov_nr * sizeof(req->pl[0]);
- page_len = total_pages * sizeof(struct page *);
- bio_len = (total_pages + BIO_MAX_VECS - 1) / BIO_MAX_VECS * sizeof(struct bio *);
-
- buf = kmalloc(pl_len + page_len + bio_len, GFP_KERNEL);
- if (!buf)
- return NULL;
-
- req->pl = buf;
- req->bio = buf + pl_len + page_len;
-
- return buf + pl_len;
-}
-
static int vhost_blk_bio_make(struct vhost_blk_req *req,
struct block_device *bdev)
{
- int pages_nr_total, i, j, ret;
- struct iovec *iov = req->iov;
- int iov_nr = req->iov_nr;
- struct page **pages, *page;
- struct bio *bio = NULL;
- int bio_nr = 0;
+ int nr_pages, nr_pages_total = 0, bio_nr = 0, ret, i;
+ struct iov_iter iter;
+ struct bio *bio;
+ sector_t sector = req->sector;
+ unsigned long pos = 0;
if (unlikely(req->bi_opf == REQ_OP_FLUSH))
return vhost_blk_bio_make_simple(req, bdev);
- pages_nr_total = 0;
- for (i = 0; i < iov_nr; i++)
- pages_nr_total += iov_num_pages(&iov[i]);
+ iov_iter_init(&iter, req->bi_opf, req->iov, req->iov_nr, req->len);
- if (pages_nr_total > NR_INLINE) {
- pages = vhost_blk_prepare_req(req, pages_nr_total, iov_nr);
- if (!pages)
+ nr_pages_total = iov_iter_npages(&iter, INT_MAX);
+ if (nr_pages_total > NR_INLINE * BIO_MAX_VECS) {
+ req->bio = kmalloc(((nr_pages_total + BIO_MAX_VECS - 1) /
+ BIO_MAX_VECS) * sizeof(struct bio *),
+ GFP_KERNEL);
+ if (!req->bio)
return -ENOMEM;
+ req->use_inline = false;
} else {
req->use_inline = true;
- req->pl = req->inline_pl;
- pages = req->inline_page;
req->bio = req->inline_bio;
}
- req->iov_nr = 0;
- for (i = 0; i < iov_nr; i++) {
- int pages_nr = iov_num_pages(&iov[i]);
- unsigned long iov_base, iov_len;
- struct req_page_list *pl;
+ nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS);
+ do {
+ /* We can't handle next bio if it's start is not sector aligned */
+ BUG_ON(pos & SECTOR_MASK);
- iov_base = (unsigned long)iov[i].iov_base;
- iov_len = (unsigned long)iov[i].iov_len;
-
- ret = get_user_pages_fast(iov_base, pages_nr,
- !req->bi_opf, pages);
- if (ret != pages_nr)
+ bio = bio_alloc(bdev, nr_pages, req->bi_opf, GFP_KERNEL);
+ if (!bio)
goto fail;
- req->iov_nr++;
- pl = &req->pl[i];
- pl->pages_nr = pages_nr;
- pl->pages = pages;
-
- for (j = 0; j < pages_nr; j++) {
- unsigned int off, len, pos;
-
- page = pages[j];
- off = iov_base & ~PAGE_MASK;
- len = PAGE_SIZE - off;
- if (len > iov_len)
- len = iov_len;
-
- while (!bio || !bio_add_page(bio, page, len, off)) {
- bio = bio_alloc(bdev, bio_max_segs(pages_nr_total),
- req->bi_opf, GFP_KERNEL);
- if (!bio)
- goto fail;
- bio->bi_iter.bi_sector = req->sector;
- bio->bi_private = req;
- bio->bi_end_io = vhost_blk_req_done;
- req->bio[bio_nr++] = bio;
- }
-
- iov_base += len;
- iov_len -= len;
- pages_nr_total--;
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_private = req;
+ bio->bi_end_io = vhost_blk_req_done;
- pos = (iov_base & VHOST_BLK_SECTOR_MASK) + iov_len;
- req->sector += pos >> VHOST_BLK_SECTOR_BITS;
+ ret = bio_iov_iter_get_pages(bio, &iter);
+ if (unlikely(ret)) {
+ bio_put(bio);
+ goto fail;
}
+ req->bio[bio_nr++] = bio;
+
+ pos += bio->bi_iter.bi_size;
+ sector = req->sector + (pos >> SECTOR_SHIFT);
+
+ nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS);
+ } while (nr_pages);
- pages += pages_nr;
- }
atomic_set(&req->bio_nr, bio_nr);
return 0;
-
fail:
- for (i = 0; i < bio_nr; i++)
+ for (i = 0; i < bio_nr; i++) {
+ bio_release_pages(req->bio[i], false);
bio_put(req->bio[i]);
- vhost_blk_req_umap(req);
+ }
+
+ vhost_blk_req_cleanup(req);
+
return -ENOMEM;
}
@@ -535,7 +460,7 @@ static void vhost_blk_handle_host_kick(struct vhost_work *work)
if (!blk)
blk = req->blk;
- vhost_blk_req_umap(req);
+ vhost_blk_req_cleanup(req);
status = req->bio_err == 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR;
ret = vhost_blk_set_status(req, status);
More information about the Devel
mailing list