[Devel] [PATCH RHEL9 COMMIT] vhost-blk: rework iov and bio handling

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jan 9 17:58:35 MSK 2025


The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.3
------>
commit 8d5a6070839db6fb61e22aa5a18f1d4f634cbaad
Author: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
Date:   Mon Dec 30 13:39:48 2024 +0800

    vhost-blk: rework iov and bio handling
    
    Manual page handling is tiresome and error-prone. Let's use iov
    iterators and bio_iov_iter_get_pages() helper, which will
    automatically fill bio with pages from iov. As this also pins
    pages, add bio_release_pages() at the end of every bio.
    
    While at it, remove VHOST_BLK_SECTOR_BITS and it's friends since
    they are just a copy of SECTOR_BITS.
    
    v2:
     - fix bio allocation size formula
     - fix bio_iov_iter_get_pages error path to only put pages for which we
       had succesful get
     - add explicit BUG_ON to check sector aligned bio start
    
    https://virtuozzo.atlassian.net/browse/PSBM-157752
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    ======
    Patchset description:
    vhost-blk: bounce buffer for unaligned requests
    
    Andrey Zhadchenko (2):
      vhost-blk: rework iov and bio handling
      vhost-blk: add bounce-buffer for non-aligned requests
    
    David Howells (1):
      iov_iter: Add a function to extract a page list from an iterator
    
    Pavel Tikhomirov (1):
      vhost-blk: remove excess vhost_blk_req.use_inline
    
    Feature: vhost-blk: in-kernel accelerator for virtio-blk guests
---
 drivers/vhost/blk.c | 161 ++++++++++++++--------------------------------------
 1 file changed, 43 insertions(+), 118 deletions(-)

diff --git a/drivers/vhost/blk.c b/drivers/vhost/blk.c
index 90d20d0eb722..a289552d6f37 100644
--- a/drivers/vhost/blk.c
+++ b/drivers/vhost/blk.c
@@ -49,22 +49,10 @@ enum {
 
 #define VHOST_MAX_METADATA_IOV 1
 
-#define VHOST_BLK_SECTOR_BITS 9
-#define VHOST_BLK_SECTOR_SIZE (1 << VHOST_BLK_SECTOR_BITS)
-#define VHOST_BLK_SECTOR_MASK (VHOST_BLK_SECTOR_SIZE - 1)
-
-struct req_page_list {
-	struct page **pages;
-	int pages_nr;
-};
-
 #define NR_INLINE 16
 
 struct vhost_blk_req {
-	struct req_page_list inline_pl[NR_INLINE];
-	struct page *inline_page[NR_INLINE];
 	struct bio *inline_bio[NR_INLINE];
-	struct req_page_list *pl;
 	int req_bin;
 	bool use_inline;
 
@@ -137,12 +125,6 @@ static int move_iovec(struct iovec *from, struct iovec *to,
 	return len ? -1 : moved_seg;
 }
 
-static inline int iov_num_pages(struct iovec *iov)
-{
-	return (PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
-	       ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
-}
-
 static inline int vhost_blk_set_status(struct vhost_blk_req *req, u8 status)
 {
 	struct iov_iter iter;
@@ -172,28 +154,14 @@ static void vhost_blk_req_done(struct bio *bio)
 		vhost_vq_work_queue(&req->blk_vq->vq, &req->blk_vq->work);
 	}
 
+	bio_release_pages(bio, !req->bi_opf);
 	bio_put(bio);
 }
 
-static void vhost_blk_req_umap(struct vhost_blk_req *req)
+static void vhost_blk_req_cleanup(struct vhost_blk_req *req)
 {
-	struct req_page_list *pl;
-	int i, j;
-
-	if (req->pl) {
-		for (i = 0; i < req->iov_nr; i++) {
-			pl = &req->pl[i];
-
-			for (j = 0; j < pl->pages_nr; j++) {
-				if (!req->bi_opf)
-					set_page_dirty_lock(pl->pages[j]);
-				put_page(pl->pages[j]);
-			}
-		}
-	}
-
 	if (!req->use_inline)
-		kfree(req->pl);
+		kfree(req->bio);
 }
 
 static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
@@ -202,7 +170,6 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
 	struct bio *bio;
 
 	req->use_inline = true;
-	req->pl = NULL;
 	req->bio = req->inline_bio;
 
 	bio = bio_alloc(bdev, 0, req->bi_opf, GFP_KERNEL);
@@ -219,111 +186,69 @@ static int vhost_blk_bio_make_simple(struct vhost_blk_req *req,
 	return 0;
 }
 
-static struct page **vhost_blk_prepare_req(struct vhost_blk_req *req,
-				 int total_pages, int iov_nr)
-{
-	int pl_len, page_len, bio_len;
-	void *buf;
-
-	req->use_inline = false;
-	pl_len = iov_nr * sizeof(req->pl[0]);
-	page_len = total_pages * sizeof(struct page *);
-	bio_len = (total_pages + BIO_MAX_VECS - 1) / BIO_MAX_VECS * sizeof(struct bio *);
-
-	buf = kmalloc(pl_len + page_len + bio_len, GFP_KERNEL);
-	if (!buf)
-		return NULL;
-
-	req->pl	= buf;
-	req->bio = buf + pl_len + page_len;
-
-	return buf + pl_len;
-}
-
 static int vhost_blk_bio_make(struct vhost_blk_req *req,
 			      struct block_device *bdev)
 {
-	int pages_nr_total, i, j, ret;
-	struct iovec *iov = req->iov;
-	int iov_nr = req->iov_nr;
-	struct page **pages, *page;
-	struct bio *bio = NULL;
-	int bio_nr = 0;
+	int nr_pages, nr_pages_total = 0, bio_nr = 0, ret, i;
+	struct iov_iter iter;
+	struct bio *bio;
+	sector_t sector = req->sector;
+	unsigned long pos = 0;
 
 	if (unlikely(req->bi_opf == REQ_OP_FLUSH))
 		return vhost_blk_bio_make_simple(req, bdev);
 
-	pages_nr_total = 0;
-	for (i = 0; i < iov_nr; i++)
-		pages_nr_total += iov_num_pages(&iov[i]);
+	iov_iter_init(&iter, req->bi_opf, req->iov, req->iov_nr, req->len);
 
-	if (pages_nr_total > NR_INLINE) {
-		pages = vhost_blk_prepare_req(req, pages_nr_total, iov_nr);
-		if (!pages)
+	nr_pages_total = iov_iter_npages(&iter, INT_MAX);
+	if (nr_pages_total > NR_INLINE * BIO_MAX_VECS) {
+		req->bio = kmalloc(((nr_pages_total + BIO_MAX_VECS - 1) /
+				    BIO_MAX_VECS) * sizeof(struct bio *),
+				   GFP_KERNEL);
+		if (!req->bio)
 			return -ENOMEM;
+		req->use_inline = false;
 	} else {
 		req->use_inline = true;
-		req->pl = req->inline_pl;
-		pages = req->inline_page;
 		req->bio = req->inline_bio;
 	}
 
-	req->iov_nr = 0;
-	for (i = 0; i < iov_nr; i++) {
-		int pages_nr = iov_num_pages(&iov[i]);
-		unsigned long iov_base, iov_len;
-		struct req_page_list *pl;
+	nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS);
+	do {
+		/* We can't handle next bio if it's start is not sector aligned */
+		BUG_ON(pos & SECTOR_MASK);
 
-		iov_base = (unsigned long)iov[i].iov_base;
-		iov_len  = (unsigned long)iov[i].iov_len;
-
-		ret = get_user_pages_fast(iov_base, pages_nr,
-					  !req->bi_opf, pages);
-		if (ret != pages_nr)
+		bio = bio_alloc(bdev, nr_pages,	req->bi_opf, GFP_KERNEL);
+		if (!bio)
 			goto fail;
 
-		req->iov_nr++;
-		pl = &req->pl[i];
-		pl->pages_nr = pages_nr;
-		pl->pages = pages;
-
-		for (j = 0; j < pages_nr; j++) {
-			unsigned int off, len, pos;
-
-			page = pages[j];
-			off = iov_base & ~PAGE_MASK;
-			len = PAGE_SIZE - off;
-			if (len > iov_len)
-				len = iov_len;
-
-			while (!bio || !bio_add_page(bio, page, len, off)) {
-				bio = bio_alloc(bdev, bio_max_segs(pages_nr_total),
-						req->bi_opf, GFP_KERNEL);
-				if (!bio)
-					goto fail;
-				bio->bi_iter.bi_sector  = req->sector;
-				bio->bi_private = req;
-				bio->bi_end_io  = vhost_blk_req_done;
-				req->bio[bio_nr++] = bio;
-			}
-
-			iov_base	+= len;
-			iov_len		-= len;
-			pages_nr_total--;
+		bio->bi_iter.bi_sector = sector;
+		bio->bi_private = req;
+		bio->bi_end_io  = vhost_blk_req_done;
 
-			pos = (iov_base & VHOST_BLK_SECTOR_MASK) + iov_len;
-			req->sector += pos >> VHOST_BLK_SECTOR_BITS;
+		ret = bio_iov_iter_get_pages(bio, &iter);
+		if (unlikely(ret)) {
+			bio_put(bio);
+			goto fail;
 		}
+		req->bio[bio_nr++] = bio;
+
+		pos += bio->bi_iter.bi_size;
+		sector = req->sector + (pos >> SECTOR_SHIFT);
+
+		nr_pages = bio_iov_vecs_to_alloc(&iter, BIO_MAX_VECS);
+	} while (nr_pages);
 
-		pages += pages_nr;
-	}
 	atomic_set(&req->bio_nr, bio_nr);
 	return 0;
-
 fail:
-	for (i = 0; i < bio_nr; i++)
+	for (i = 0; i < bio_nr; i++) {
+		bio_release_pages(req->bio[i], false);
 		bio_put(req->bio[i]);
-	vhost_blk_req_umap(req);
+	}
+
+	vhost_blk_req_cleanup(req);
+
 	return -ENOMEM;
 }
 
@@ -535,7 +460,7 @@ static void vhost_blk_handle_host_kick(struct vhost_work *work)
 		if (!blk)
 			blk = req->blk;
 
-		vhost_blk_req_umap(req);
+		vhost_blk_req_cleanup(req);
 
 		status = req->bio_err == 0 ?  VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR;
 		ret = vhost_blk_set_status(req, status);


More information about the Devel mailing list