[Devel] [PATCH RHEL7 COMMIT] fuse kio: Async queueing of fiemap from work

Wed Sep 5 13:06:38 MSK 2018

The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.11.6.vz7.71.8
------>
commit cf2b74b4c20c00e810cf31bca135c24f7463bddc
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Wed Sep 5 13:06:38 2018 +0300

    fuse kio: Async queueing of fiemap from work
    
    It's not needed to wait till a hunk is complete
    from the work, it can queue another hunks at this
    time instead. So, the scheme becomes:
    
    1)queue all fiters from the list.
    2)wait till list there is a new iter in the list.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Acked-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
    
    =====================
    Patchset description:
    
    Optimize fiemap ioctl
    
    https://jira.sw.ru/browse/HCI-90
    
    Summary:
      This patch set optimizes fiemap ioctl by removing
      kthread creation. Instead of this, static work
      is used, so we safe some time on copy_process().
    
    Fiemap does not require separate kthread, since
    the most time the kthread is spending in waiting
    for fiter->ireq.iocount becomes 0. Instead of this,
    the generic kthread could queue another fiemap
    requests at this time. This is the thing the patch set
    introduces.
    
    Note, that we had a kthread for every fiemap request,
    and this may look more scalable. But this is not true,
    since the actions, fiemap does, is pretty small. So,
    I think for the most workload the single fiemap work
    is enough. If we meet a workload, where the single
    work is not enough, it will be pretty easy to make
    fiemap_work just as an array in struct pcs_cluster_core
    (to make it per-node or even per-cpu). But I think,
    it's not necessary at least till main_job or completion_job
    are per-node or per-cpu (fiemap requests are small subset
    of all requests going through main_job).
    
    https://github.com/shekkbuilder/fiemap/blob/master/fiemap.c
    code was taken as a base for the performance test and modified.
    The below is results and the test's code.
    
    Time of test execution on 3 extents-file (just randomly chosen
    number of extents):
    
    Before: real 0m11.069s
    After:  real 0m9.112s
    
    It became 17% faster, it was 21% slower.
    
    Kirill Tkhai (7):
          fuse kio: Extract fiemap iteration from fiemap_worker() to separate function
          fuse kio: Move it variable from stack to struct fiemap_iterator
          fuse kio: Kill fiemap_worker() thread
          fuse kio: Move position advance in fiemap_process_one()
          fuse kio: Move fiter ireq iocount assignment
          fuse kio: Introduce fiemap_work
          fuse kio: Async queueing of fiemap from work
---
 fs/fuse/kio/pcs/pcs_cluster.c | 101 ++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 52 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index cdd61b23dc2a..ee66adda3c2d 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -110,7 +110,7 @@ static void fiemap_iter_done(struct pcs_int_request * ireq)
 {
 	struct fiemap_iterator * fiter = container_of(ireq, struct fiemap_iterator, ireq);
 
-	wake_up(&fiter->wq);
+	queue_fiter_work(fiter);
 }
 
 static void fiemap_get_iter(void * datasource, unsigned int offset, struct iov_iter *it)
@@ -193,63 +193,60 @@ static void fiemap_process_one(struct fiemap_iterator *fiter)
 	pos = fiter->apireq.pos;
 	end = orig_ireq->apireq.req->pos + orig_ireq->apireq.req->size;
 
-	while (1) {
-		/*
-		 * We reuse zeroed fiter->apireq.size to detect first
-		 * iteration of the fiter. In this case we do not have
-		 * completed extents and just skip this business.
-		 */
-		if (fiter->apireq.size != 0) {
-			if (pcs_if_error(&fiter->ireq.error)) {
-				fiter->orig_ireq->error = fiter->ireq.error;
-				goto out;
-			}
-			if (fiter->ireq.apireq.aux)
-				xfer_fiemap_extents(fiter, pos, fiter->buffer,
-						    fiter->ireq.apireq.aux);
-			pos += fiter->apireq.size;
-		}
-
-		if (pos >= end)
-			goto out;
-		if (fiter->fiemap_max && *fiter->mapped >= fiter->fiemap_max)
-			break;
-
-		fiter->apireq.pos = pos;
-		fiter->apireq.size = end - pos;
-		fiter->ireq.ts = ktime_get();
-
-		sreq = ireq_alloc(di);
-		if (!sreq) {
-			pcs_set_local_error(&orig_ireq->error, PCS_ERR_NOMEM);
+	/*
+	 * We reuse zeroed fiter->apireq.size to detect first
+	 * iteration of the fiter. In this case we do not have
+	 * completed extents and just skip this business.
+	 */
+	if (fiter->apireq.size != 0) {
+		/* Xfer previous chunk and advance pos */
+		if (pcs_if_error(&fiter->ireq.error)) {
+			fiter->orig_ireq->error = fiter->ireq.error;
 			goto out;
 		}
+		if (fiter->ireq.apireq.aux)
+			xfer_fiemap_extents(fiter, pos, fiter->buffer,
+					    fiter->ireq.apireq.aux);
+		pos += fiter->apireq.size;
+	}
 
-		sreq->dentry = di;
-		sreq->type = PCS_IREQ_IOCHUNK;
-		INIT_LIST_HEAD(&sreq->tok_list);
-		sreq->tok_reserved = 0;
-		sreq->tok_serno = 0;
-		sreq->iochunk.map = NULL;
-		sreq->iochunk.flow = pcs_flow_record(&di->mapping.ftab, 0, pos, end-pos, &di->cluster->maps.ftab);
-		sreq->iochunk.cmd = PCS_REQ_T_FIEMAP;
-		sreq->iochunk.cs_index = 0;
-		sreq->iochunk.chunk = pos;
-		sreq->iochunk.offset = 0;
-		sreq->iochunk.dio_offset = 0;
-		sreq->iochunk.size = end - pos;
-		sreq->iochunk.csl = NULL;
-		sreq->iochunk.banned_cs.val = 0;
-		sreq->iochunk.msg.destructor = NULL;
-		sreq->iochunk.msg.rpc = NULL;
-
-		pcs_sreq_attach(sreq, &fiter->ireq);
-		sreq->complete_cb = pcs_sreq_complete;
+	if (pos >= end)
+		goto out;
+	if (fiter->fiemap_max && *fiter->mapped >= fiter->fiemap_max)
+		goto out;
 
-		pcs_cc_process_ireq_chunk(sreq);
+	/* Queue next chunk */
+	fiter->apireq.pos = pos;
+	fiter->apireq.size = end - pos;
+	fiter->ireq.ts = ktime_get();
 
-		wait_event(fiter->wq, atomic_read(&fiter->ireq.iocount) == 0);
+	sreq = ireq_alloc(di);
+	if (!sreq) {
+		pcs_set_local_error(&orig_ireq->error, PCS_ERR_NOMEM);
+		goto out;
 	}
+	sreq->dentry = di;
+	sreq->type = PCS_IREQ_IOCHUNK;
+	INIT_LIST_HEAD(&sreq->tok_list);
+	sreq->tok_reserved = 0;
+	sreq->tok_serno = 0;
+	sreq->iochunk.map = NULL;
+	sreq->iochunk.flow = pcs_flow_record(&di->mapping.ftab, 0, pos, end-pos, &di->cluster->maps.ftab);
+	sreq->iochunk.cmd = PCS_REQ_T_FIEMAP;
+	sreq->iochunk.cs_index = 0;
+	sreq->iochunk.chunk = pos;
+	sreq->iochunk.offset = 0;
+	sreq->iochunk.dio_offset = 0;
+	sreq->iochunk.size = end - pos;
+	sreq->iochunk.csl = NULL;
+	sreq->iochunk.banned_cs.val = 0;
+	sreq->iochunk.msg.destructor = NULL;
+	sreq->iochunk.msg.rpc = NULL;
+
+	pcs_sreq_attach(sreq, &fiter->ireq);
+	sreq->complete_cb = pcs_sreq_complete;
+	pcs_cc_process_ireq_chunk(sreq);
+	return;
 out:
 	kvfree(fiter->buffer);
 	kfree(fiter);