[Devel] [PATCH RHEL9 COMMIT] fs/fuse: multithread fuse write

Konstantin Khorenko khorenko at virtuozzo.com
Thu Dec 28 23:51:04 MSK 2023


The commit is pushed to "branch-rh9-5.14.0-362.8.1.vz9.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-362.8.1.vz9.35.4
------>
commit c4e2490aa51609a7fff55cb0b379c2b72a53f5dd
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Sat Dec 16 02:06:38 2023 +0800

    fs/fuse: multithread fuse write
    
    fuse user space creates cloned channel device and binds it to cpu.
    Kernel routes WRITE requests to these channels, which allows us
    to offload expensive reads from fuse device to multiple threads.
    
    At the moment we see significant improvements, about 30% in some
    major ostor workload.
    
    Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
    
    Feature: fuse: multithread fuse write
---
 fs/fuse/dev.c             | 26 +++++++++---------------
 fs/fuse/file.c            | 16 ++++++++++++---
 fs/fuse/fuse_i.h          |  5 ++++-
 fs/fuse/inode.c           | 52 +++++++++++++++++++++++++++++++++++++++++------
 include/uapi/linux/fuse.h |  1 +
 5 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cdc237e74cb..dd7fff50b914 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2339,8 +2339,12 @@ void fuse_abort_conn(struct fuse_conn *fc)
 		fc->max_background = UINT_MAX;
 		flush_bg_queue_and_unlock(fc);
 
-		for_each_online_cpu(cpu)
-			fuse_abort_iqueue(per_cpu_ptr(fc->iqs, cpu), &to_end);
+		for_each_online_cpu(cpu) {
+			if (fc->riqs)
+				fuse_abort_iqueue(per_cpu_ptr(fc->riqs, cpu), &to_end);
+			if (fc->wiqs)
+				fuse_abort_iqueue(per_cpu_ptr(fc->wiqs, cpu), &to_end);
+		}
 		fuse_abort_iqueue(&fc->main_iq, &to_end);
 
 		end_polls(fc);
@@ -2453,20 +2457,10 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
 		}
 		break;
 	case FUSE_DEV_IOC_SETAFF:
-		res = -EINVAL;
-		if (arg < NR_CPUS && cpu_possible(arg)) {
-			struct fuse_dev *fud = fuse_get_dev(file);
-			spin_lock(&fud->fc->lock);
-
-			fud->fiq->handled_by_fud--;
-			BUG_ON(fud->fiq->handled_by_fud < 0);
-
-			fud->fiq = per_cpu_ptr(fud->fc->iqs, arg);
-
-			fud->fiq->handled_by_fud++;
-			spin_unlock(&fud->fc->lock);
-			res = 0;
-		}
+		res = fuse_install_percpu_iqs(fuse_get_dev(file), arg, 0);
+		break;
+	case FUSE_DEV_IOC_SETAFF_W:
+		res = fuse_install_percpu_iqs(fuse_get_dev(file), arg, 1);
 		break;
 	case FUSE_IOC_KIO_CALL:
 	{
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 32fd234e575b..691e8bbbdcc4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -797,9 +797,12 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
 	args->io_inode = file_inode(file);
 
 	if (opcode == FUSE_READ) {
-		struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->iqs);
-		if (fiq->handled_by_fud)
-			args->fiq = fiq;
+		if (ff->fm->fc->riqs) {
+			struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->riqs);
+
+			if (fiq->handled_by_fud)
+				args->fiq = fiq;
+		}
 		args->inode = file->f_path.dentry->d_inode;
 		args->ff = ff;
 	}
@@ -1308,6 +1311,13 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
 	args->out_args[0].value = &ia->write.out;
 	args->io_inode = inode;
 	args->ff = ff;
+
+	if (ff->fm->fc->wiqs) {
+		struct fuse_iqueue *fiq = raw_cpu_ptr(ff->fm->fc->wiqs);
+
+		if (fiq->handled_by_fud)
+			args->fiq = fiq;
+	}
 }
 
 static unsigned int fuse_write_flags(struct kiocb *iocb)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 236907b9f6e2..426f7b9e7e2f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -723,7 +723,8 @@ struct fuse_conn {
 	struct fuse_iqueue main_iq;
 
 	/** Per-cpu input queues */
-	struct fuse_iqueue __percpu *iqs;
+	struct fuse_iqueue __percpu *riqs;
+	struct fuse_iqueue __percpu *wiqs;
 
 	/** The next unique kernel file handle */
 	atomic64_t khctr;
@@ -1300,6 +1301,8 @@ int fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 		    struct user_namespace *user_ns,
 		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
 
+int fuse_install_percpu_iqs(struct fuse_dev *fud, int cpu, int rw);
+
 /**
  * Release reference to fuse_conn
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8020c49d2717..1909583b5f37 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1016,6 +1016,48 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 	fiq->priv = priv;
 }
 
+int fuse_install_percpu_iqs(struct fuse_dev *fud, int dest_cpu, int rw)
+{
+	int res = -EINVAL;
+
+	if (dest_cpu < NR_CPUS && cpu_possible(dest_cpu)) {
+		struct fuse_iqueue __percpu **iqs_p = rw ? &fud->fc->wiqs : &fud->fc->riqs;
+		struct fuse_iqueue __percpu *iqs;
+
+		iqs = *iqs_p;
+		if (iqs == NULL) {
+			int cpu;
+
+			iqs = alloc_percpu(struct fuse_iqueue);
+			if (!iqs)
+				return -ENOMEM;
+			for_each_possible_cpu(cpu) {
+				fuse_iqueue_init(per_cpu_ptr(iqs, cpu), fud->fc->main_iq.ops,
+								  fud->fc->main_iq.priv);
+			}
+		}
+
+		spin_lock(&fud->fc->lock);
+
+		if (*iqs_p == NULL) {
+			*iqs_p = iqs;
+		} else if (*iqs_p != iqs) {
+			free_percpu(iqs);
+			iqs = *iqs_p;
+		}
+
+		fud->fiq->handled_by_fud--;
+		BUG_ON(fud->fiq->handled_by_fud < 0);
+
+		fud->fiq = per_cpu_ptr(iqs, dest_cpu);
+
+		fud->fiq->handled_by_fud++;
+		spin_unlock(&fud->fc->lock);
+		res = 0;
+	}
+	return res;
+}
+
 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
 {
 	unsigned int i;
@@ -1044,11 +1086,6 @@ int fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 		init_waitqueue_head(&fc->qhash[cpu].waitq);
 	}
 	fuse_iqueue_init(&fc->main_iq, fiq_ops, fiq_priv);
-	fc->iqs = alloc_percpu(struct fuse_iqueue);
-	if (!fc->iqs)
-		return -ENOMEM;
-	for_each_online_cpu(cpu)
-		fuse_iqueue_init(per_cpu_ptr(fc->iqs, cpu), fiq_ops, fiq_priv);
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	INIT_LIST_HEAD(&fc->devices);
@@ -1511,7 +1548,10 @@ EXPORT_SYMBOL_GPL(fuse_send_init);
 void fuse_free_conn(struct fuse_conn *fc)
 {
 	WARN_ON(!list_empty(&fc->devices));
-	free_percpu(fc->iqs);
+	if (fc->riqs)
+		free_percpu(fc->riqs);
+	if (fc->wiqs)
+		free_percpu(fc->wiqs);
 	kfree_rcu(fc, rcu);
 }
 EXPORT_SYMBOL_GPL(fuse_free_conn);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 6a02da47e72c..4c8586669342 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -977,6 +977,7 @@ struct fuse_notify_retrieve_in {
 #define FUSE_DEV_IOC_MAGIC		229
 #define FUSE_DEV_IOC_CLONE		_IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
 #define FUSE_DEV_IOC_SETAFF		_IO(FUSE_DEV_IOC_MAGIC, 1)
+#define FUSE_DEV_IOC_SETAFF_W		_IO(FUSE_DEV_IOC_MAGIC, 2)
 
 struct fuse_lseek_in {
 	uint64_t	fh;


More information about the Devel mailing list