[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: add pending kio requests to kqueue
Konstantin Khorenko
khorenko at virtuozzo.com
Tue May 21 19:02:18 MSK 2019
The commit is pushed to "branch-rh7-3.10.0-957.12.2.vz7.96.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.1
------>
commit 5276ee8173c90969e075830ffe88a178849f5f38
Author: Pavel Butsykin <pbutsykin at virtuozzo.com>
Date: Tue May 21 19:02:16 2019 +0300
fs/fuse kio: add pending kio requests to kqueue
Pending kio requests don't fall into kqueue list and therefore not tracked, this
of course is a mistake. This patch fixes the mistake, making it possible to add
pending requests to di->kq inside pcs_fuse_prep_rw(). It's also very important
to be able to immediately interrupt pending kio requests and terminate it with
error in order to maintain synchronization with fuse_invalidate_files(). By this
reason pcs_fuse_prep_rw() will return -EIO in case FUSE_S_FAIL_IMMEDIATELY
status was set to ff_state.
Signed-off-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
=====================
Patchset description:
fix deadlock between synchronous reqs and fuse_invalidate_files
One more deadlock with fuse_invalidate_files():
[<ffffffff92ba7cc4>] __lock_page+0x74/0x90
[<ffffffff92bb9a75>] invalidate_inode_pages2_range+0x445/0x470
[<ffffffff92bb9ab7>] invalidate_inode_pages2+0x17/0x20
[<ffffffffc034cde5>] fuse_invalidate_files+0x235/0x270 [fuse]
[<ffffffffc033d3fb>] fuse_dev_do_write+0x7fb/0xe20 [fuse]
[<ffffffffc033ddc1>] fuse_dev_write+0x71/0xa0 [fuse]
[<ffffffff92c3c2e6>] do_sync_write+0x96/0xe0
[<ffffffff92c3cdc0>] vfs_write+0xc0/0x1f0
[<ffffffff92c3dbef>] SyS_write+0x7f/0xf0
[<ffffffff9315589b>] system_call_fastpath+0x22/0x27
This happened because synchronous kio read request was not dropped by kill_requests:
PID: 20684 TASK: ffff9543c2e71160 CPU: 5 COMMAND: "co_io"
[ffff9547b0faf9f8] __schedule at ffffffff93148a9f
[ffff9547b0fafa88] schedule at ffffffff93148fe9
[ffff9547b0fafa98] kpcs_req_send at ffffffffc047dab5 [fuse_kio_pcs]
[ffff9547b0fafb08] __fuse_request_send at ffffffffc033a987 [fuse]
[ffff9547b0fafb40] fuse_request_check_and_send at ffffffffc033e097 [fuse]
[ffff9547b0fafb50] fuse_send_read at ffffffffc03463ec [fuse]
[ffff9547b0fafb90] __fuse_readpage at ffffffffc03475ad [fuse]
[ffff9547b0fafc40] fuse_readpage at ffffffffc0347a3c [fuse]
[ffff9547b0fafca0] generic_file_read_iter at ffffffff92baa186
[ffff9547b0fafd58] generic_file_aio_read at ffffffff92baa5c5
[ffff9547b0fafdc0] fuse_file_aio_read at ffffffffc0343788 [fuse]
[ffff9547b0fafdf0] do_sync_read at ffffffff92c3c206
[ffff9547b0fafed0] vfs_read at ffffffff92c3cc2f
[ffff9547b0faff00] sys_pread64 at ffffffff92c3dcf2
[ffff9547b0faff50] system_call_fastpath at ffffffff9315589b
struct pcs_fuse_req {
req = {
list = {
next = 0xffff95479374eb18,
prev = 0xffff9547acf76e00
},
...
page_cache = 1,
page_needs_release = 0,
killed = 0,
...
ff = 0x0,
io_inode = 0xffff95445da0b740,
The fuse_file pointer was not initialized in req->ff, that means we can't check
ff->ff_state inside pcs_fuse_submit(), despite the fact that this request has
locked pages. It's believed that requests with an empty req->ff don't have pages,
therefore, such requests may not be synchronized with fuse_invalidate_files().
However, synchronous requests don't reference ff, but pass it as a parameter to
fuse_request_check_and_send().
This patchset fixes it.
https://pmc.acronis.com/browse/VSTOR-23034
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Pavel Butsykin (4):
fs/fuse kio: forward fuse_file pointer to kpcs_req_send()
fs/fuse kio: add pending kio requests to kqueue
fs/fuse kio: style fix in pcs_fuse_submit()
fs/fuse kio: keep fuse_file for requests waiting for shrink
---
fs/fuse/kio/pcs/fuse_io.c | 5 +++
fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 82 +++++++++++++++++++++++++-------------
2 files changed, 59 insertions(+), 28 deletions(-)
diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index 219f4e3423af..ed5926eb5d4d 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -253,10 +253,15 @@ void pcs_fuse_prep_io(struct pcs_fuse_req *r, unsigned short type, off_t offset,
static void falloc_req_complete(struct pcs_int_request *ireq)
{
struct pcs_fuse_req * r = ireq->completion_data.priv;
+ struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
struct pcs_fuse_cluster *pfc = cl_from_req(r);
BUG_ON(ireq->type != PCS_IREQ_NOOP);
+ spin_lock(&di->kq_lock);
+ list_del_init(&r->req.list);
+ spin_unlock(&di->kq_lock);
+
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
inode_dio_end(r->req.io_inode);
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index a4ab56a1fd88..245fdee569c2 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -823,6 +823,19 @@ static void wait_shrink(struct pcs_fuse_req *r, struct pcs_dentry_info *di)
list_add_tail(&r->exec.ireq.list, &di->size.queue);
}
+static bool kqueue_insert(struct pcs_dentry_info *di, struct fuse_file *ff,
+ struct fuse_req *req)
+{
+ spin_lock(&di->kq_lock);
+ if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
+ spin_unlock(&di->kq_lock);
+ return false;
+ }
+ list_add_tail(&req->list, &di->kq);
+ spin_unlock(&di->kq_lock);
+ return true;
+}
+
/*
* Check i size boundary and deffer request if necessary
* Ret code
@@ -830,7 +843,7 @@ static void wait_shrink(struct pcs_fuse_req *r, struct pcs_dentry_info *di)
* -1: should fail request
* 1: request placed to pended queue
*/
-static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
+static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
{
struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
struct pcs_dentry_info *di = pcs_inode_from_fuse(fi);
@@ -840,8 +853,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
/* Deffer all requests if shrink requested to prevent livelock */
if (di->size.op == PCS_SIZE_SHRINK) {
wait_shrink(r, di);
- spin_unlock(&di->lock);
- return 1;
+ ret = 1;
+ goto out;
}
if (r->req.in.h.opcode == FUSE_READ) {
size_t size;
@@ -851,8 +864,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
if (in->offset + in->size > di->fileinfo.attr.size) {
if (in->offset >= di->fileinfo.attr.size) {
r->req.out.args[0].size = 0;
- spin_unlock(&di->lock);
- return -1;
+ ret = -EPERM;
+ goto out;
}
size = di->fileinfo.attr.size - in->offset;
}
@@ -861,6 +874,10 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
struct fuse_write_in *in = &r->req.misc.write.in;
if (in->offset + in->size > di->fileinfo.attr.size) {
+ if (!kqueue_insert(di, ff, &r->req)) {
+ ret = -EIO;
+ goto out;
+ }
wait_grow(r, di, in->offset + in->size);
ret = 1;
}
@@ -876,8 +893,8 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
size = in->fm_length;
if (in->fm_start + size > di->fileinfo.attr.size) {
if (in->fm_start >= di->fileinfo.attr.size) {
- spin_unlock(&di->lock);
- return -1;
+ ret = -EPERM;
+ goto out;
}
size = di->fileinfo.attr.size - in->fm_start;
}
@@ -888,6 +905,10 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
struct fuse_fallocate_in const *in = r->req.in.args[0].value;
if (in->offset + in->length > di->fileinfo.attr.size) {
+ if (!kqueue_insert(di, ff, &r->req)) {
+ ret = -EIO;
+ goto out;
+ }
wait_grow(r, di, in->offset + in->length);
ret = 1;
}
@@ -900,14 +921,14 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r)
if (ret) {
pcs_fuse_prep_fallocate(r);
} else {
- spin_unlock(&di->lock);
- return -1;
+ ret = -EPERM;
+ goto out;
}
}
}
inode_dio_begin(r->req.io_inode);
+out:
spin_unlock(&di->lock);
-
return ret;
}
@@ -932,12 +953,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
switch (r->req.in.h.opcode) {
case FUSE_WRITE:
case FUSE_READ:
- ret = pcs_fuse_prep_rw(r);
- if (!ret)
+ ret = pcs_fuse_prep_rw(r, ff);
+ if (likely(!ret))
goto submit;
if (ret > 0)
- /* Pended, nothing to do. */
- return;
+ return; /* Pended, nothing to do. */
+ if (ret != -EPERM) {
+ req->out.h.error = ret;
+ goto error;
+ }
break;
case FUSE_FALLOCATE: {
struct fuse_fallocate_in *inarg = (void*) req->in.args[0].value;
@@ -966,12 +990,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
inarg->length = di->fileinfo.attr.size - inarg->offset;
}
- ret = pcs_fuse_prep_rw(r);
- if (!ret)
+ ret = pcs_fuse_prep_rw(r, ff);
+ if (likely(!ret))
goto submit;
if (ret > 0)
- /* Pended, nothing to do. */
- return;
+ return; /* Pended, nothing to do. */
+ if (ret != -EPERM) {
+ req->out.h.error = ret;
+ goto error;
+ }
break;
}
case FUSE_FSYNC:
@@ -984,12 +1011,15 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
goto error;
}
- ret = pcs_fuse_prep_rw(r);
- if (!ret)
+ ret = pcs_fuse_prep_rw(r, ff);
+ if (likely(!ret))
goto submit;
if (ret > 0)
- /* Pended, nothing to do. */
- return;
+ return; /* Pended, nothing to do. */
+ if (ret != -EPERM) {
+ req->out.h.error = ret;
+ goto error;
+ }
break;
}
r->req.out.h.error = 0;
@@ -1004,14 +1034,10 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req,
return;
submit:
- spin_lock(&di->kq_lock);
- if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
- spin_unlock(&di->kq_lock);
+ if (!kqueue_insert(di, ff, req)) {
req->out.h.error = -EIO;
goto error;
}
- list_add_tail(&req->list, &di->kq);
- spin_unlock(&di->kq_lock);
if (async)
pcs_cc_submit(ireq->cc, ireq);
@@ -1079,7 +1105,7 @@ static void _pcs_shrink_end(struct fuse_conn *fc, struct fuse_req *req)
TRACE("resubmit %p\n", &r->req);
list_del_init(&ireq->list);
- pcs_fuse_submit(pfc, &r->req, NULL, true, false);
+ pcs_fuse_submit(pfc, &r->req, r->req.ff, true, false);
}
}
More information about the Devel
mailing list