[Devel] [PATCH RHEL9 COMMIT] fuse: skip bg_queue for async direct io pcs requests

Konstantin Khorenko khorenko at virtuozzo.com
Wed Nov 1 22:53:11 MSK 2023


The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.30.8
------>
commit 54e40be2a9b78ed72cfa8fd7a059aad3d7936036
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Fri Oct 6 18:44:14 2023 +0800

    fuse: skip bg_queue for async direct io pcs requests
    
    There is a capital problem in fuse pcs implementation.
    While requests scale by cpu we still have contention on bg_lock
    and all the requests go through single bottleneck at bg_queue.
    Of course we had inferior performance due to this, but we
    ignored the problem as the preformance still was good.
    
    But recently it was found that under some realistic curcumstances
    we get collapse of preformance, it drop > 10 times when
    load on pcs increases. The reason is that algorithm effectively
    reduces number of cpus used to 1 or a few and besides
    that triggers an extreme contention on bg_lock.
    
    Yet, bg_queue for kio pcs requests is entirely useless.
    The request is already allocated, resources are consumed.
    If we push it to kio pcs it will be treated by pcs finegrain
    congenstion avoidance. So, we can skip bg_queue.
    
    This patch makes this only for async direct io requests.
    The reason is that for page cache reqs we must conform
    invalidation rules which need some serialization.
    It is not impossible, but requires some work and in fact
    not very useful.
    
    The patch is extreme. It removes not only bg_queue,
    which is good deal, no doubts, but also blocking at
    allocation of aio request, which is dubious. Right
    now we are limited only by system aio limit.
    
    https://pmc.acronis.work/browse/VSTOR-54040
    
    Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
    
    Feature: vStorage
---
 fs/fuse/dev.c    | 26 +++++++++++++++++++++-----
 fs/fuse/file.c   |  1 +
 fs/fuse/fuse_i.h |  2 ++
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index eb3fc44fe324..b93d77af2d24 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -362,7 +362,8 @@ void __fuse_request_end( struct fuse_req *req, bool flush_bg)
 			flush_bg_queue_and_unlock(fc);
 		else
 			spin_unlock(&fc->bg_lock);
-	}
+	} else if (test_bit(FR_NO_ACCT, &req->flags))
+		bg = true;
 
 	if (test_bit(FR_ASYNC, &req->flags)) {
 		req->args->end(fm, req->args, req->out.h.error);
@@ -465,9 +466,10 @@ static void __fuse_request_send(struct fuse_req *req)
 
 	if (fc->kio.op) {
 		int ret = fc->kio.op->req_classify(req, false, false);
-		if (likely(!ret))
-			return fc->kio.op->req_send(req, false);
-		else if (ret < 0)
+		if (likely(!ret)) {
+			fc->kio.op->req_send(req, false);
+			return;
+		} else if (ret < 0)
 			return;
 	}
 
@@ -600,6 +602,7 @@ static int fuse_request_queue_background(struct fuse_req *req)
 	struct fuse_conn *fc = fm->fc;
 	struct fuse_file *ff = req->args->ff;
 	struct fuse_iqueue *fiq = req->args->fiq;
+	int nonblocking = test_bit(FR_NONBLOCKING, &req->flags);
 	int ret = -ENOTCONN;
 
 	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
@@ -609,6 +612,19 @@ static int fuse_request_queue_background(struct fuse_req *req)
 		atomic_inc(&fc->num_waiting);
 	}
 	__set_bit(FR_ISREPLY, &req->flags);
+
+	if (fc->kio.op && req->args->async && !nonblocking &&
+	    (!ff || !test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state))) {
+		int ret = fc->kio.op->req_classify(req, false, false);
+		if (likely(!ret)) {
+			__clear_bit(FR_BACKGROUND, &req->flags);
+			__set_bit(FR_NO_ACCT, &req->flags);
+			fc->kio.op->req_send(req, true);
+			return 0;
+		} else if (ret < 0)
+			return 0;
+	}
+
 	spin_lock(&fc->bg_lock);
 	if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
 		ret = -EIO;
@@ -622,7 +638,7 @@ static int fuse_request_queue_background(struct fuse_req *req)
 			set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
 		}
 
-		if (test_bit(FR_NONBLOCKING, &req->flags)) {
+		if (nonblocking) {
 			fc->active_background++;
 			spin_lock(&fiq->lock);
 			req->in.h.unique = fuse_get_unique(fiq);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 869274095664..c685e019073d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -998,6 +998,7 @@ static ssize_t fuse_async_req_send(struct fuse_mount *fm,
 
 	ia->ap.args.end = fuse_aio_complete_req;
 	ia->ap.args.may_block = io->should_dirty;
+	ia->ap.args.async = true;
 	err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL);
 	if (err)
 		fuse_aio_complete_req(fm, &ia->ap.args, err);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e3654005abef..d9e27b36784a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -313,6 +313,7 @@ struct fuse_args {
 	bool may_block:1;
 	bool nonblocking:1;
 	bool kio_internal:1;
+	bool async:1;
 	struct fuse_in_arg in_args[3];
 	struct fuse_arg out_args[3];
 	void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
@@ -407,6 +408,7 @@ enum fuse_req_flag {
 	FR_ASYNC,
 	FR_NONBLOCKING,
 	FR_KIO_INTERNAL,
+	FR_NO_ACCT,
 };
 
 /**


More information about the Devel mailing list