[Devel] [PATCH RHEL7 COMMIT] fuse kio: Introduce fiemap_work
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Sep 5 13:06:37 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.11.6.vz7.71.8
------>
commit b00f4be216f806bd1f94783a2793fa1759ab3b4b
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Wed Sep 5 13:06:37 2018 +0300
fuse kio: Introduce fiemap_work
Make fiemap iterations async again, and move it
to a new work. It looks like it's enough to have
the only fiemap work for now, since we have
the only work for main_job and completion_job
(fiemap is a subset of all types of request).
In case of we found it is not enough, we may
simply scale it in the future (to use 2 fiemap
works, or to introduce NR_CPUS works for fiemap).
But for me it looks like 1 work is enough, and
for standard workflow it is more efficient,
then to create kthread every time, like we had
couple of patches ago.
Next patch make fiemap queueing complete async.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Acked-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
=====================
Patchset description:
Optimize fiemap ioctl
https://jira.sw.ru/browse/HCI-90
Summary:
This patch set optimizes fiemap ioctl by removing
kthread creation. Instead of this, static work
is used, so we safe some time on copy_process().
Fiemap does not require separate kthread, since
the most time the kthread is spending in waiting
for fiter->ireq.iocount becomes 0. Instead of this,
the generic kthread could queue another fiemap
requests at this time. This is the thing the patch set
introduces.
Note, that we had a kthread for every fiemap request,
and this may look more scalable. But this is not true,
since the actions, fiemap does, is pretty small. So,
I think for the most workload the single fiemap work
is enough. If we meet a workload, where the single
work is not enough, it will be pretty easy to make
fiemap_work just as an array in struct pcs_cluster_core
(to make it per-node or even per-cpu). But I think,
it's not necessary at least till main_job or completion_job
are per-node or per-cpu (fiemap requests are small subset
of all requests going through main_job).
https://github.com/shekkbuilder/fiemap/blob/master/fiemap.c
code was taken as a base for the performance test and modified.
The below is results and the test's code.
Time of test execution on 3 extents-file (just randomly chosen
number of extents):
Before: real 0m11.069s
After: real 0m9.112s
It became 17% faster, it was 21% slower.
Kirill Tkhai (7):
fuse kio: Extract fiemap iteration from fiemap_worker() to separate function
fuse kio: Move it variable from stack to struct fiemap_iterator
fuse kio: Kill fiemap_worker() thread
fuse kio: Move position advance in fiemap_process_one()
fuse kio: Move fiter ireq iocount assignment
fuse kio: Introduce fiemap_work
fuse kio: Async queueing of fiemap from work
---
fs/fuse/kio/pcs/pcs_cluster.c | 38 +++++++++++++++++++++++++++++++++++++-
fs/fuse/kio/pcs/pcs_cluster.h | 2 ++
fs/fuse/kio/pcs/pcs_cluster_core.c | 1 +
fs/fuse/kio/pcs/pcs_req.h | 1 +
4 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index e182a34cd16c..cdd61b23dc2a 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -77,6 +77,7 @@ void pcs_sreq_complete(struct pcs_int_request *sreq)
struct fiemap_iterator
{
+ struct list_head list;
struct pcs_int_request *orig_ireq;
wait_queue_head_t wq;
char *buffer;
@@ -88,6 +89,23 @@ struct fiemap_iterator
struct iov_iter it;
};
+static DEFINE_SPINLOCK(fiter_lock);
+static LIST_HEAD(fiter_list);
+
+static void queue_fiter_work(struct fiemap_iterator *fiter)
+{
+ struct pcs_cluster_core *cc = fiter->orig_ireq->cc;
+ bool was_empty;
+
+ spin_lock(&fiter_lock);
+ was_empty = list_empty(&fiter_list);
+ list_add_tail(&fiter->list, &fiter_list);
+ spin_unlock(&fiter_lock);
+
+ if (was_empty)
+ queue_work(cc->wq, &cc->fiemap_work);
+}
+
static void fiemap_iter_done(struct pcs_int_request * ireq)
{
struct fiemap_iterator * fiter = container_of(ireq, struct fiemap_iterator, ireq);
@@ -238,6 +256,24 @@ static void fiemap_process_one(struct fiemap_iterator *fiter)
ireq_complete(orig_ireq);
}
+void fiemap_work_func(struct work_struct *w)
+{
+ struct fiemap_iterator *fiter;
+
+ spin_lock(&fiter_lock);
+ while (!list_empty(&fiter_list)) {
+ fiter = list_first_entry(&fiter_list,
+ struct fiemap_iterator, list);
+ list_del_init(&fiter->list);
+ spin_unlock(&fiter_lock);
+
+ fiemap_process_one(fiter);
+
+ spin_lock(&fiter_lock);
+ }
+ spin_unlock(&fiter_lock);
+}
+
static void process_ireq_fiemap(struct pcs_int_request *orig_ireq)
{
struct pcs_dentry_info * di;
@@ -279,7 +315,7 @@ static void process_ireq_fiemap(struct pcs_int_request *orig_ireq)
fiter->apireq.size = 0;
fiter->apireq.pos = orig_ireq->apireq.req->pos;
- fiemap_process_one(fiter);
+ queue_fiter_work(fiter);
}
void pcs_cc_process_ireq_chunk(struct pcs_int_request *ireq)
diff --git a/fs/fuse/kio/pcs/pcs_cluster.h b/fs/fuse/kio/pcs/pcs_cluster.h
index 191753ba316d..f4ec5ecf9571 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.h
+++ b/fs/fuse/kio/pcs/pcs_cluster.h
@@ -46,6 +46,8 @@ int pcs_cluster_init(struct pcs_fuse_cluster *c, struct workqueue_struct *,
PCS_NODE_ID_T *id);
void pcs_cluster_fini(struct pcs_fuse_cluster *c);
+extern void fiemap_work_func(struct work_struct *w);
+
static inline struct pcs_fuse_req *pcs_fuse_req_from_work(struct pcs_fuse_exec_ctx *ctx)
{
return container_of(ctx, struct pcs_fuse_req, exec);
diff --git a/fs/fuse/kio/pcs/pcs_cluster_core.c b/fs/fuse/kio/pcs/pcs_cluster_core.c
index 4784f1d2915b..b99e01e5edfe 100644
--- a/fs/fuse/kio/pcs/pcs_cluster_core.c
+++ b/fs/fuse/kio/pcs/pcs_cluster_core.c
@@ -128,6 +128,7 @@ int pcs_cc_init(struct pcs_cluster_core *cc, struct workqueue_struct *wq,
INIT_LIST_HEAD(&cc->completion_queue); /* completion queue only for sanity */
INIT_WORK(&cc->main_job, cc_workqueue_handler);
INIT_WORK(&cc->completion_job, cc_completion_handler);
+ INIT_WORK(&cc->fiemap_work, fiemap_work_func);
cc->wq = wq;
pcs_csset_init(&cc->css);
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index fec6c1e1575c..e4b66d142734 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -197,6 +197,7 @@ struct pcs_cluster_core
struct list_head completion_queue;/* Internal queue for ireqs to complete */
struct work_struct main_job;
struct work_struct completion_job;
+ struct work_struct fiemap_work;
struct pcs_cs_set css; /* Table of all CSs */
struct pcs_map_set maps; /* Global map data */
More information about the Devel
mailing list