[Devel] [PATCH RHEL7 COMMIT] fuse kio: Introduce fiemap_work

Konstantin Khorenko khorenko at virtuozzo.com
Wed Sep 5 13:06:37 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.11.6.vz7.71.8
------>
commit b00f4be216f806bd1f94783a2793fa1759ab3b4b
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Wed Sep 5 13:06:37 2018 +0300

    fuse kio: Introduce fiemap_work
    
    Make fiemap iterations async again, and move it
    to a new work. It looks like it's enough to have
    the only fiemap work for now, since we have
    the only work for main_job and completion_job
    (fiemap is a subset of all types of request).
    
    In case of we found it is not enough, we may
    simply scale it in the future (to use 2 fiemap
    works, or to introduce NR_CPUS works for fiemap).
    But for me it looks like 1 work is enough, and
    for standard workflow it is more efficient,
    then to create kthread every time, like we had
    couple of patches ago.
    
    Next patch make fiemap queueing complete async.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Acked-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
    
    =====================
    Patchset description:
    
    Optimize fiemap ioctl
    
    https://jira.sw.ru/browse/HCI-90
    
    Summary:
      This patch set optimizes fiemap ioctl by removing
      kthread creation. Instead of this, static work
      is used, so we safe some time on copy_process().
    
    Fiemap does not require separate kthread, since
    the most time the kthread is spending in waiting
    for fiter->ireq.iocount becomes 0. Instead of this,
    the generic kthread could queue another fiemap
    requests at this time. This is the thing the patch set
    introduces.
    
    Note, that we had a kthread for every fiemap request,
    and this may look more scalable. But this is not true,
    since the actions, fiemap does, is pretty small. So,
    I think for the most workload the single fiemap work
    is enough. If we meet a workload, where the single
    work is not enough, it will be pretty easy to make
    fiemap_work just as an array in struct pcs_cluster_core
    (to make it per-node or even per-cpu). But I think,
    it's not necessary at least till main_job or completion_job
    are per-node or per-cpu (fiemap requests are small subset
    of all requests going through main_job).
    
    https://github.com/shekkbuilder/fiemap/blob/master/fiemap.c
    code was taken as a base for the performance test and modified.
    The below is results and the test's code.
    
    Time of test execution on 3 extents-file (just randomly chosen
    number of extents):
    
    Before: real 0m11.069s
    After:  real 0m9.112s
    
    It became 17% faster, it was 21% slower.
    
    Kirill Tkhai (7):
          fuse kio: Extract fiemap iteration from fiemap_worker() to separate function
          fuse kio: Move it variable from stack to struct fiemap_iterator
          fuse kio: Kill fiemap_worker() thread
          fuse kio: Move position advance in fiemap_process_one()
          fuse kio: Move fiter ireq iocount assignment
          fuse kio: Introduce fiemap_work
          fuse kio: Async queueing of fiemap from work
---
 fs/fuse/kio/pcs/pcs_cluster.c      | 38 +++++++++++++++++++++++++++++++++++++-
 fs/fuse/kio/pcs/pcs_cluster.h      |  2 ++
 fs/fuse/kio/pcs/pcs_cluster_core.c |  1 +
 fs/fuse/kio/pcs/pcs_req.h          |  1 +
 4 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index e182a34cd16c..cdd61b23dc2a 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -77,6 +77,7 @@ void pcs_sreq_complete(struct pcs_int_request *sreq)
 
 struct fiemap_iterator
 {
+	struct list_head	list;
 	struct pcs_int_request 	*orig_ireq;
 	wait_queue_head_t	wq;
 	char			*buffer;
@@ -88,6 +89,23 @@ struct fiemap_iterator
 	struct iov_iter		it;
 };
 
+static DEFINE_SPINLOCK(fiter_lock);
+static LIST_HEAD(fiter_list);
+
+static void queue_fiter_work(struct fiemap_iterator *fiter)
+{
+	struct pcs_cluster_core *cc = fiter->orig_ireq->cc;
+	bool was_empty;
+
+	spin_lock(&fiter_lock);
+	was_empty = list_empty(&fiter_list);
+	list_add_tail(&fiter->list, &fiter_list);
+	spin_unlock(&fiter_lock);
+
+	if (was_empty)
+		queue_work(cc->wq, &cc->fiemap_work);
+}
+
 static void fiemap_iter_done(struct pcs_int_request * ireq)
 {
 	struct fiemap_iterator * fiter = container_of(ireq, struct fiemap_iterator, ireq);
@@ -238,6 +256,24 @@ static void fiemap_process_one(struct fiemap_iterator *fiter)
 	ireq_complete(orig_ireq);
 }
 
+void fiemap_work_func(struct work_struct *w)
+{
+	struct fiemap_iterator *fiter;
+
+	spin_lock(&fiter_lock);
+	while (!list_empty(&fiter_list)) {
+		fiter = list_first_entry(&fiter_list,
+					 struct fiemap_iterator, list);
+		list_del_init(&fiter->list);
+		spin_unlock(&fiter_lock);
+
+		fiemap_process_one(fiter);
+
+		spin_lock(&fiter_lock);
+	}
+	spin_unlock(&fiter_lock);
+}
+
 static void process_ireq_fiemap(struct pcs_int_request *orig_ireq)
 {
 	struct pcs_dentry_info * di;
@@ -279,7 +315,7 @@ static void process_ireq_fiemap(struct pcs_int_request *orig_ireq)
 	fiter->apireq.size = 0;
 	fiter->apireq.pos = orig_ireq->apireq.req->pos;
 
-	fiemap_process_one(fiter);
+	queue_fiter_work(fiter);
 }
 
 void pcs_cc_process_ireq_chunk(struct pcs_int_request *ireq)
diff --git a/fs/fuse/kio/pcs/pcs_cluster.h b/fs/fuse/kio/pcs/pcs_cluster.h
index 191753ba316d..f4ec5ecf9571 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.h
+++ b/fs/fuse/kio/pcs/pcs_cluster.h
@@ -46,6 +46,8 @@ int pcs_cluster_init(struct pcs_fuse_cluster *c, struct workqueue_struct *,
 		     PCS_NODE_ID_T *id);
 void pcs_cluster_fini(struct pcs_fuse_cluster *c);
 
+extern void fiemap_work_func(struct work_struct *w);
+
 static inline struct pcs_fuse_req *pcs_fuse_req_from_work(struct pcs_fuse_exec_ctx *ctx)
 {
 	return container_of(ctx, struct pcs_fuse_req, exec);
diff --git a/fs/fuse/kio/pcs/pcs_cluster_core.c b/fs/fuse/kio/pcs/pcs_cluster_core.c
index 4784f1d2915b..b99e01e5edfe 100644
--- a/fs/fuse/kio/pcs/pcs_cluster_core.c
+++ b/fs/fuse/kio/pcs/pcs_cluster_core.c
@@ -128,6 +128,7 @@ int pcs_cc_init(struct pcs_cluster_core *cc, struct workqueue_struct *wq,
 	INIT_LIST_HEAD(&cc->completion_queue); /* completion queue only for sanity */
 	INIT_WORK(&cc->main_job, cc_workqueue_handler);
 	INIT_WORK(&cc->completion_job, cc_completion_handler);
+	INIT_WORK(&cc->fiemap_work, fiemap_work_func);
 	cc->wq = wq;
 
 	pcs_csset_init(&cc->css);
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index fec6c1e1575c..e4b66d142734 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -197,6 +197,7 @@ struct pcs_cluster_core
 	struct list_head	completion_queue;/* Internal queue for ireqs to complete */
 	struct work_struct	main_job;
 	struct work_struct	completion_job;
+	struct work_struct	fiemap_work;
 
 	struct pcs_cs_set	css;		/* Table of all CSs */
 	struct pcs_map_set	maps;		/* Global map data */


More information about the Devel mailing list