[Devel] [PATCH RHEL9 COMMIT] fs/fuse: multithread fuse write

Konstantin Khorenko khorenko at virtuozzo.com
Thu Dec 28 23:52:00 MSK 2023


The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.30.16
------>
commit 8108bd43ce97fa91bf3346e81144846ac732b0ff
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Sat Dec 16 02:06:38 2023 +0800

    fs/fuse: multithread fuse write
    
    fuse user space creates cloned channel device and binds it to cpu.
    Kernel routes WRITE requests to these channels, which allows us
    to offload expensive reads from fuse device to multiple threads.
    
    At the moment we see significant improvements, about 30% in some
    major ostor workload.
    
    Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
    
    Feature: fuse: multithread fuse write
---
 fs/fuse/dev.c             | 26 +++++++++---------------
 fs/fuse/file.c            | 16 ++++++++++++---
 fs/fuse/fuse_i.h          |  5 ++++-
 fs/fuse/inode.c           | 52 +++++++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/fuse.h |  1 +
 5 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8d15f76e0aea..ce5e72ce898e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2337,8 +2337,12 @@ void fuse_abort_conn(struct fuse_conn *fc)
 		fc->max_background = UINT_MAX;
 		flush_bg_queue_and_unlock(fc);
 
-		for_each_online_cpu(cpu)
-			fuse_abort_iqueue(per_cpu_ptr(fc->iqs, cpu), &to_end);
+		for_each_online_cpu(cpu) {
+			if (fc->riqs)
+				fuse_abort_iqueue(per_cpu_ptr(fc->riqs, cpu), &to_end);
+			if (fc->wiqs)
+				fuse_abort_iqueue(per_cpu_ptr(fc->wiqs, cpu), &to_end);
+		}
 		fuse_abort_iqueue(&fc->main_iq, &to_end);
 
 		end_polls(fc);
@@ -2451,20 +2455,10 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
 		}
 		break;
 	case FUSE_DEV_IOC_SETAFF:
-		res = -EINVAL;
-		if (arg < NR_CPUS && cpu_possible(arg)) {
-			struct fuse_dev *fud = fuse_get_dev(file);
-			spin_lock(&fud->fc->lock);
-
-			fud->fiq->handled_by_fud--;
-			BUG_ON(fud->fiq->handled_by_fud < 0);
-
-			fud->fiq = per_cpu_ptr(fud->fc->iqs, arg);
-
-			fud->fiq->handled_by_fud++;
-			spin_unlock(&fud->fc->lock);
-			res = 0;
-		}
+		res = fuse_install_percpu_iqs(fuse_get_dev(file), arg, 0);
+		break;
+	case FUSE_DEV_IOC_SETAFF_W:
+		res = fuse_install_percpu_iqs(fuse_get_dev(file), arg, 1);
 		break;
 	case FUSE_IOC_KIO_CALL:
 	{
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c685e019073d..6ea5921d0865 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -789,9 +789,12 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
 	args->io_inode = file_inode(file);
 
 	if (opcode == FUSE_READ) {
-		struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->iqs);
-		if (fiq->handled_by_fud)
-			args->fiq = fiq;
+		if (ff->fm->fc->riqs) {
+			struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->riqs);
+
+			if (fiq->handled_by_fud)
+				args->fiq = fiq;
+		}
 		args->inode = file->f_path.dentry->d_inode;
 		args->ff = ff;
 	}
@@ -1299,6 +1302,13 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
 	args->out_args[0].value = &ia->write.out;
 	args->io_inode = inode;
 	args->ff = ff;
+
+	if (ff->fm->fc->wiqs) {
+		struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->wiqs);
+
+		if (fiq->handled_by_fud)
+			args->fiq = fiq;
+	}
 }
 
 static unsigned int fuse_write_flags(struct kiocb *iocb)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1633db46c6ef..74e95f508504 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -714,7 +714,8 @@ struct fuse_conn {
 	struct fuse_iqueue main_iq;
 
 	/** Per-cpu input queues */
-	struct fuse_iqueue __percpu *iqs;
+	struct fuse_iqueue __percpu *riqs;
+	struct fuse_iqueue __percpu *wiqs;
 
 	/** The next unique kernel file handle */
 	atomic64_t khctr;
@@ -1276,6 +1277,8 @@ int fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 		    struct user_namespace *user_ns,
 		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
 
+int fuse_install_percpu_iqs(struct fuse_dev *fud, int cpu, int rw);
+
 /**
  * Release reference to fuse_conn
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 49993e1d20a5..c99dc3af5b10 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -963,6 +963,48 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 	fiq->priv = priv;
 }
 
+int fuse_install_percpu_iqs(struct fuse_dev *fud, int dest_cpu, int rw)
+{
+	int res = -EINVAL;
+
+	if (dest_cpu < NR_CPUS && cpu_possible(dest_cpu)) {
+		struct fuse_iqueue __percpu **iqs_p = rw ? &fud->fc->wiqs : &fud->fc->riqs;
+		struct fuse_iqueue __percpu *iqs;
+
+		iqs = *iqs_p;
+		if (iqs == NULL) {
+			int cpu;
+
+			iqs = alloc_percpu(struct fuse_iqueue);
+			if (!iqs)
+				return -ENOMEM;
+			for_each_possible_cpu(cpu) {
+				fuse_iqueue_init(per_cpu_ptr(iqs, cpu), fud->fc->main_iq.ops,
+								  fud->fc->main_iq.priv);
+			}
+		}
+
+		spin_lock(&fud->fc->lock);
+
+		if (*iqs_p == NULL) {
+			*iqs_p = iqs;
+		} else if (*iqs_p != iqs) {
+			free_percpu(iqs);
+			iqs = *iqs_p;
+		}
+
+		fud->fiq->handled_by_fud--;
+		BUG_ON(fud->fiq->handled_by_fud < 0);
+
+		fud->fiq = per_cpu_ptr(iqs, dest_cpu);
+
+		fud->fiq->handled_by_fud++;
+		spin_unlock(&fud->fc->lock);
+		res = 0;
+	}
+	return res;
+}
+
 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
 {
 	unsigned int i;
@@ -991,11 +1033,6 @@ int fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 		init_waitqueue_head(&fc->qhash[cpu].waitq);
 	}
 	fuse_iqueue_init(&fc->main_iq, fiq_ops, fiq_priv);
-	fc->iqs = alloc_percpu(struct fuse_iqueue);
-	if (!fc->iqs)
-		return -ENOMEM;
-	for_each_online_cpu(cpu)
-		fuse_iqueue_init(per_cpu_ptr(fc->iqs, cpu), fiq_ops, fiq_priv);
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	INIT_LIST_HEAD(&fc->devices);
@@ -1449,7 +1486,10 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
 void fuse_free_conn(struct fuse_conn *fc)
 {
 	WARN_ON(!list_empty(&fc->devices));
-	free_percpu(fc->iqs);
+	if (fc->riqs)
+		free_percpu(fc->riqs);
+	if (fc->wiqs)
+		free_percpu(fc->wiqs);
 	kfree_rcu(fc, rcu);
 }
 EXPORT_SYMBOL_GPL(fuse_free_conn);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index ace90d721038..8414d2ca23f7 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -950,6 +950,7 @@ struct fuse_notify_retrieve_in {
 #define FUSE_DEV_IOC_MAGIC		229
 #define FUSE_DEV_IOC_CLONE		_IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
 #define FUSE_DEV_IOC_SETAFF		_IO(FUSE_DEV_IOC_MAGIC, 1)
+#define FUSE_DEV_IOC_SETAFF_W		_IO(FUSE_DEV_IOC_MAGIC, 2)
 
 struct fuse_lseek_in {
 	uint64_t	fh;


More information about the Devel mailing list