[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: add pending kio requests to kqueue

Konstantin Khorenko khorenko at virtuozzo.com
Tue May 21 19:02:18 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-957.12.2.vz7.96.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.1
------>
commit 5276ee8173c90969e075830ffe88a178849f5f38
Author: Pavel Butsykin <pbutsykin at virtuozzo.com>
Date:   Tue May 21 19:02:16 2019 +0300

    fs/fuse kio: add pending kio requests to kqueue
    
    Pending kio requests don't fall into kqueue list and therefore not tracked, this
    of course is a mistake. This patch fixes the mistake, making it possible to add
    pending requests to di->kq inside pcs_fuse_prep_rw(). It's also very important
    to be able to immediately interrupt pending kio requests and terminate it with
    error in order to maintain synchronization with fuse_invalidate_files(). By this
    reason pcs_fuse_prep_rw() will return -EIO in case FUSE_S_FAIL_IMMEDIATELY
    status was set to ff_state.
    
    Signed-off-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
    
    =====================
    Patchset description:
    
    fix deadlock between synchronous reqs and fuse_invalidate_files
    
    One more deadlock with fuse_invalidate_files():
    
    [<ffffffff92ba7cc4>] __lock_page+0x74/0x90
    [<ffffffff92bb9a75>] invalidate_inode_pages2_range+0x445/0x470
    [<ffffffff92bb9ab7>] invalidate_inode_pages2+0x17/0x20
    [<ffffffffc034cde5>] fuse_invalidate_files+0x235/0x270 [fuse]
    [<ffffffffc033d3fb>] fuse_dev_do_write+0x7fb/0xe20 [fuse]
    [<ffffffffc033ddc1>] fuse_dev_write+0x71/0xa0 [fuse]
    [<ffffffff92c3c2e6>] do_sync_write+0x96/0xe0
    [<ffffffff92c3cdc0>] vfs_write+0xc0/0x1f0
    [<ffffffff92c3dbef>] SyS_write+0x7f/0xf0
    [<ffffffff9315589b>] system_call_fastpath+0x22/0x27
    
    This happened because synchronous kio read request was not dropped by kill_requests:
    
    PID: 20684  TASK: ffff9543c2e71160  CPU: 5   COMMAND: "co_io"
    [ffff9547b0faf9f8] __schedule at ffffffff93148a9f
    [ffff9547b0fafa88] schedule at ffffffff93148fe9
    [ffff9547b0fafa98] kpcs_req_send at ffffffffc047dab5 [fuse_kio_pcs]
    [ffff9547b0fafb08] __fuse_request_send at ffffffffc033a987 [fuse]
    [ffff9547b0fafb40] fuse_request_check_and_send at ffffffffc033e097 [fuse]
    [ffff9547b0fafb50] fuse_send_read at ffffffffc03463ec [fuse]
    [ffff9547b0fafb90] __fuse_readpage at ffffffffc03475ad [fuse]
    [ffff9547b0fafc40] fuse_readpage at ffffffffc0347a3c [fuse]
    [ffff9547b0fafca0] generic_file_read_iter at ffffffff92baa186
    [ffff9547b0fafd58] generic_file_aio_read at ffffffff92baa5c5
    [ffff9547b0fafdc0] fuse_file_aio_read at ffffffffc0343788 [fuse]
    [ffff9547b0fafdf0] do_sync_read at ffffffff92c3c206
    [ffff9547b0fafed0] vfs_read at ffffffff92c3cc2f
    [ffff9547b0faff00] sys_pread64 at ffffffff92c3dcf2
    [ffff9547b0faff50] system_call_fastpath at ffffffff9315589b
    
     struct pcs_fuse_req {
     req = {
        list = {
          next = 0xffff95479374eb18,
          prev = 0xffff9547acf76e00
        },
        ...
        page_cache = 1,
        page_needs_release = 0,
        killed = 0,
        ...
        ff = 0x0,
        io_inode = 0xffff95445da0b740,
    
    The fuse_file pointer was not initialized in req->ff, that means we can't check
    ff->ff_state inside pcs_fuse_submit(), despite the fact that this request has
    locked pages. It's believed that requests with an empty req->ff don't have pages,
    therefore, such requests may not be synchronized with fuse_invalidate_files().
    However, synchronous requests don't reference ff, but pass it as a parameter to
    fuse_request_check_and_send().
    
    This patchset fixes it.
    
    https://pmc.acronis.com/browse/VSTOR-23034
    Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    Pavel Butsykin (4):
      fs/fuse kio: forward fuse_file pointer to kpcs_req_send()
      fs/fuse kio: add pending kio requests to kqueue
      fs/fuse kio: style fix in pcs_fuse_submit()
      fs/fuse kio: keep fuse_file for requests waiting for shrink
---
 fs/fuse/kio/pcs/fuse_io.c          |  5 +++
 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 82 +++++++++++++++++++++++++-------------
 2 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index 219f4e3423af..ed5926eb5d4d 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -253,10 +253,15 @@ void pcs_fuse_prep_io(struct pcs_fuse_req *r, unsigned short type, off_t offset,
 static void falloc_req_complete(struct pcs_int_request *ireq)
 {
 	struct pcs_fuse_req * r = ireq->completion_data.priv;
+	struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
 
 	BUG_ON(ireq->type != PCS_IREQ_NOOP);
 
+	spin_lock(&di->kq_lock);
+	list_del_init(&r->req.list);
+	spin_unlock(&di->kq_lock);
+
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
 	inode_dio_end(r->req.io_inode);
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index a4ab56a1fd88..245fdee569c2 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -823,6 +823,19 @@ static void wait_shrink(struct pcs_fuse_req *r, struct pcs_dentry_info *di)
 	list_add_tail(&r->exec.ireq.list, &di->size.queue);
 }
 
+static bool kqueue_insert(struct pcs_dentry_info *di, struct fuse_file *ff,
+			  struct fuse_req *req)
+{
+	spin_lock(&di->kq_lock);
+	if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
+		spin_unlock(&di->kq_lock);
+		return false;
+	}
+	list_add_tail(&req->list, &di->kq);
+	spin_unlock(&di->kq_lock);
+	return true;
+}
+
 /*
  * Check i size boundary and deffer request if necessary
  * Ret code
@@ -830,7 +843,7 @@ static void wait_shrink(struct pcs_fuse_req *r, struct pcs_dentry_info *di)
  * -1: should fail request
  * 1: request placed to pended queue
 */
-static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
+static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
 {
 	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 	struct pcs_dentry_info *di = pcs_inode_from_fuse(fi);
@@ -840,8 +853,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 	/* Deffer all requests if shrink requested to prevent livelock */
 	if (di->size.op == PCS_SIZE_SHRINK) {
 		wait_shrink(r, di);
-		spin_unlock(&di->lock);
-		return 1;
+		ret = 1;
+		goto out;
 	}
 	if (r->req.in.h.opcode == FUSE_READ) {
 		size_t size;
@@ -851,8 +864,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 		if (in->offset + in->size > di->fileinfo.attr.size) {
 			if (in->offset >= di->fileinfo.attr.size) {
 				r->req.out.args[0].size = 0;
-				spin_unlock(&di->lock);
-				return -1;
+				ret = -EPERM;
+				goto out;
 			}
 			size = di->fileinfo.attr.size - in->offset;
 		}
@@ -861,6 +874,10 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 		struct fuse_write_in *in = &r->req.misc.write.in;
 
 		if (in->offset + in->size > di->fileinfo.attr.size) {
+			if (!kqueue_insert(di, ff, &r->req)) {
+				ret = -EIO;
+				goto out;
+			}
 			wait_grow(r, di, in->offset + in->size);
 			ret = 1;
 		}
@@ -876,8 +893,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 		size = in->fm_length;
 		if (in->fm_start + size > di->fileinfo.attr.size) {
 			if (in->fm_start >= di->fileinfo.attr.size) {
-				spin_unlock(&di->lock);
-				return -1;
+				ret = -EPERM;
+				goto out;
 			}
 			size = di->fileinfo.attr.size - in->fm_start;
 		}
@@ -888,6 +905,10 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 		struct fuse_fallocate_in const *in = r->req.in.args[0].value;
 
 		if (in->offset + in->length > di->fileinfo.attr.size) {
+			if (!kqueue_insert(di, ff, &r->req)) {
+				ret = -EIO;
+				goto out;
+			}
 			wait_grow(r, di, in->offset + in->length);
 			ret = 1;
 		}
@@ -900,14 +921,14 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
 			if (ret) {
 				pcs_fuse_prep_fallocate(r);
 			} else {
-				spin_unlock(&di->lock);
-				return -1;
+				ret = -EPERM;
+				goto out;
 			}
 		}
 	}
 	inode_dio_begin(r->req.io_inode);
+out:
 	spin_unlock(&di->lock);
-
 	return ret;
 }
 
@@ -932,12 +953,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
 	switch (r->req.in.h.opcode) {
 	case FUSE_WRITE:
 	case FUSE_READ:
-		ret = pcs_fuse_prep_rw(r);
-		if (!ret)
+		ret = pcs_fuse_prep_rw(r, ff);
+		if (likely(!ret))
 			goto submit;
 		if (ret > 0)
-			/* Pended, nothing to do. */
-			return;
+			return; /* Pended, nothing to do. */
+		if (ret != -EPERM) {
+			req->out.h.error = ret;
+			goto error;
+		}
 		break;
 	case FUSE_FALLOCATE: {
 		struct fuse_fallocate_in *inarg = (void*) req->in.args[0].value;
@@ -966,12 +990,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
 				inarg->length = di->fileinfo.attr.size - inarg->offset;
 		}
 
-		ret = pcs_fuse_prep_rw(r);
-		if (!ret)
+		ret = pcs_fuse_prep_rw(r, ff);
+		if (likely(!ret))
 			goto submit;
 		if (ret > 0)
-			/* Pended, nothing to do. */
-			return;
+			return; /* Pended, nothing to do. */
+		if (ret != -EPERM) {
+			req->out.h.error = ret;
+			goto error;
+		}
 		break;
 	}
 	case FUSE_FSYNC:
@@ -984,12 +1011,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
 			goto error;
 		}
 
-		ret = pcs_fuse_prep_rw(r);
-		if (!ret)
+		ret = pcs_fuse_prep_rw(r, ff);
+		if (likely(!ret))
 			goto submit;
 		if (ret > 0)
-			/* Pended, nothing to do. */
-			return;
+			return; /* Pended, nothing to do. */
+		if (ret != -EPERM) {
+			req->out.h.error = ret;
+			goto error;
+		}
 		break;
 	}
 	r->req.out.h.error = 0;
@@ -1004,14 +1034,10 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
 	return;
 
 submit:
-	spin_lock(&di->kq_lock);
-	if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
-		spin_unlock(&di->kq_lock);
+	if (!kqueue_insert(di, ff, req)) {
 		req->out.h.error = -EIO;
 		goto error;
 	}
-	list_add_tail(&req->list, &di->kq);
-	spin_unlock(&di->kq_lock);
 
 	if (async)
 		pcs_cc_submit(ireq->cc, ireq);
@@ -1079,7 +1105,7 @@ static void _pcs_shrink_end(struct fuse_conn *fc, struct fuse_req *req)
 
 		TRACE("resubmit %p\n", &r->req);
 		list_del_init(&ireq->list);
-		pcs_fuse_submit(pfc, &r->req, NULL, true, false);
+		pcs_fuse_submit(pfc, &r->req, r->req.ff, true, false);
 	}
 }
 



More information about the Devel mailing list