[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: don't wait read requests in case of fsync/flush
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Oct 21 14:12:57 MSK 2019
The commit is pushed to "branch-rh7-3.10.0-1062.1.2.vz7.114.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.1.2.vz7.114.7
------>
commit b9ae1b8f967a39d1961f6d254047b59ec7a512d7
Author: Ildar Ismagilov <ildar.ismagilov at virtuozzo.com>
Date: Mon Oct 21 14:12:55 2019 +0300
fs/fuse kio: don't wait read requests in case of fsync/flush
In this patch, the KIO requests are divided into two types: read and write.
And in case of fsync/flush we only wait for completion write requests.
https://pmc.acronis.com/browse/VSTOR-11372
Signed-off-by: Ildar Ismagilov <ildar.ismagilov at virtuozzo.com>
Acked-by: Alexey Kuznetsov <kuznet at acronis.com>
---
fs/fuse/dir.c | 3 +-
fs/fuse/file.c | 6 ++--
fs/fuse/fuse_i.h | 57 ++++++++++++++++++++++++++++++++++++++
fs/fuse/inode.c | 3 ++
fs/fuse/kio/pcs/fuse_io.c | 15 ++++++----
fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 18 ++++++++----
6 files changed, 89 insertions(+), 13 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4974ce801279..ccaf6058c76d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1638,7 +1638,7 @@ void fuse_set_nowrite(struct inode *inode)
BUG_ON(fi->writectr < 0);
fi->writectr += FUSE_NOWRITE;
spin_unlock(&fi->lock);
- inode_dio_wait(inode);
+ fuse_write_dio_wait(fi);
wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
}
@@ -1778,6 +1778,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (is_truncate) {
fuse_set_nowrite(inode);
+ fuse_read_dio_wait(fi);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (trust_local_cmtime && attr->ia_size != inode->i_size)
attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e46b8844dd49..cfafff050de8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -480,11 +480,10 @@ static int fuse_release(struct inode *inode, struct file *file)
/*
* Flush pending requests before FUSE_RELEASE makes userspace
* to drop the lease of the file. Otherwise, they never finish.
- * Keep in mind, that in kio case fuse_sync_writes() currently
- * waits all type of requests (not only write).
*/
mutex_lock(&inode->i_mutex);
fuse_sync_writes(inode);
+ fuse_read_dio_wait(fi);
if (fi->num_openers == 0 && ff->fc->kio.op->file_close)
ff->fc->kio.op->file_close(ff->fc, file, inode);
@@ -3730,6 +3729,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out;
fuse_sync_writes(inode);
+ fuse_read_dio_wait(fi);
}
}
@@ -3939,6 +3939,7 @@ int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
if (is_bad_inode(inode))
@@ -3970,6 +3971,7 @@ int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
mutex_lock(&inode->i_mutex);
fuse_sync_writes(inode);
+ fuse_read_dio_wait(fi);
if (fieinfo->fi_extents_max == 0) {
err = fuse_request_fiemap(inode, 0, &start, &len, NULL, fieinfo);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cd9b997b885e..092916ce8c0e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -143,6 +143,13 @@ struct fuse_inode {
/** Private kdirect io context */
void *private;
+
+ /** Direct IO operations */
+ struct {
+ wait_queue_head_t waitq;
+ atomic_t read_count;
+ atomic_t write_count;
+ } dio;
};
/** FUSE inode state bits */
@@ -1060,6 +1067,56 @@ void fuse_flush_writepages(struct inode *inode);
void fuse_set_nowrite(struct inode *inode);
void fuse_release_nowrite(struct inode *inode);
+static inline void fuse_read_dio_begin(struct fuse_inode *fi)
+{
+ atomic_inc(&fi->dio.read_count);
+}
+
+static inline void fuse_read_dio_end(struct fuse_inode *fi)
+{
+ if (atomic_dec_and_test(&fi->dio.read_count))
+ wake_up(&fi->dio.waitq);
+}
+
+static inline void fuse_read_dio_wait(struct fuse_inode *fi)
+{
+ wait_event(fi->dio.waitq,
+ atomic_read(&fi->dio.read_count) == 0);
+}
+
+static inline int fuse_read_dio_count(struct fuse_inode *fi)
+{
+ return atomic_read(&fi->dio.read_count);
+}
+
+static inline void fuse_write_dio_begin(struct fuse_inode *fi)
+{
+ atomic_inc(&fi->dio.write_count);
+}
+
+static inline void fuse_write_dio_end(struct fuse_inode *fi)
+{
+ if (atomic_dec_and_test(&fi->dio.write_count))
+ wake_up(&fi->dio.waitq);
+}
+
+static inline void fuse_write_dio_wait(struct fuse_inode *fi)
+{
+ wait_event(fi->dio.waitq,
+ atomic_read(&fi->dio.write_count) == 0);
+}
+
+static inline int fuse_write_dio_count(struct fuse_inode *fi)
+{
+ return atomic_read(&fi->dio.write_count);
+}
+
+static inline void fuse_dio_wait(struct fuse_inode *fi)
+{
+ fuse_read_dio_wait(fi);
+ fuse_write_dio_wait(fi);
+}
+
/**
* File-system tells the kernel to invalidate cache for the given node id.
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8c0d213c07b9..fc5f066d3e4d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -109,6 +109,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->writepages = RB_ROOT;
init_waitqueue_head(&fi->page_waitq);
spin_lock_init(&fi->lock);
+ init_waitqueue_head(&fi->dio.waitq);
+ atomic_set(&fi->dio.read_count, 0);
+ atomic_set(&fi->dio.write_count, 0);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
kmem_cache_free(fuse_inode_cachep, inode);
diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index ed5926eb5d4d..fe70f6c02bc0 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -36,6 +36,7 @@ static void intreq_complete(struct pcs_int_request *ireq)
static void on_read_done(struct pcs_fuse_req *r, size_t size)
{
struct pcs_fuse_cluster *pfc = cl_from_req(r);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
@@ -48,7 +49,7 @@ static void on_read_done(struct pcs_fuse_req *r, size_t size)
}
fuse_stat_account(pfc->fc, KFUSE_OP_READ, ktime_sub(ktime_get(), r->exec.ireq.ts));
r->req.out.args[0].size = size;
- inode_dio_end(r->req.io_inode);
+ fuse_read_dio_end(fi);
request_end(pfc->fc, &r->req);
}
@@ -65,22 +66,24 @@ static void on_write_done(struct pcs_fuse_req *r, off_t pos, size_t size)
{
struct fuse_write_out *out = &r->req.misc.write.out;
struct pcs_fuse_cluster *pfc = cl_from_req(r);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
out->size = size;
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
fuse_stat_account(pfc->fc, KFUSE_OP_WRITE, ktime_sub(ktime_get(), r->exec.ireq.ts));
- inode_dio_end(r->req.io_inode);
+ fuse_write_dio_end(fi);
request_end(pfc->fc, &r->req);
}
static void on_fallocate_done(struct pcs_fuse_req *r, off_t pos, size_t size)
{
struct pcs_fuse_cluster *pfc = cl_from_req(r);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), r->exec.ireq.ts));
- inode_dio_end(r->req.io_inode);
+ fuse_write_dio_end(fi);
request_end(pfc->fc, &r->req);
}
@@ -88,10 +91,11 @@ static void on_fallocate_done(struct pcs_fuse_req *r, off_t pos, size_t size)
static void on_fiemap_done(struct pcs_fuse_req *r)
{
struct pcs_fuse_cluster *pfc = cl_from_req(r);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
- inode_dio_end(r->req.io_inode);
+ fuse_write_dio_end(fi);
request_end(pfc->fc, &r->req);
}
@@ -255,6 +259,7 @@ static void falloc_req_complete(struct pcs_int_request *ireq)
struct pcs_fuse_req * r = ireq->completion_data.priv;
struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
struct pcs_fuse_cluster *pfc = cl_from_req(r);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
BUG_ON(ireq->type != PCS_IREQ_NOOP);
@@ -264,7 +269,7 @@ static void falloc_req_complete(struct pcs_int_request *ireq)
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
- inode_dio_end(r->req.io_inode);
+ fuse_write_dio_end(fi);
request_end(pfc->fc, &r->req);
}
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 90e12bf20e41..2bda2381bb8e 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -713,6 +713,7 @@ void ireq_destroy(struct pcs_int_request *ireq)
static int submit_size_grow(struct inode *inode, unsigned long long size)
{
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -720,9 +721,9 @@ static int submit_size_grow(struct inode *inode, unsigned long long size)
int err;
/* Caller comes here w/o i_mutex, but vfs_truncate is blocked
- at inode_dio_wait() see fuse_set_nowrite
+ at fuse_write_dio_wait see fuse_set_nowrite
*/
- BUG_ON(!atomic_read(&inode->i_dio_count));
+ BUG_ON(!fuse_write_dio_count(fi));
TRACE("ino:%ld size:%lld \n",inode->i_ino, size);
@@ -880,11 +881,14 @@ static inline int req_wait_grow_queue(struct pcs_fuse_req *r,
off_t offset, size_t size)
{
struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
+ struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
if (!kqueue_insert(di, ff, &r->req))
return -EIO;
- inode_dio_begin(r->req.io_inode);
+ BUG_ON(r->req.in.h.opcode != FUSE_WRITE && r->req.in.h.opcode != FUSE_FALLOCATE);
+ fuse_write_dio_begin(fi);
+
wait_grow(r, di, offset + size);
return 1;
}
@@ -901,6 +905,7 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
{
struct fuse_req *req = &r->req;
struct pcs_dentry_info *di = get_pcs_inode(req->io_inode);
+ struct fuse_inode *fi = get_fuse_inode(req->io_inode);
int ret;
spin_lock(&di->lock);
@@ -1005,7 +1010,10 @@ static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
if (!kqueue_insert(di, ff, req))
return -EIO;
- inode_dio_begin(req->io_inode);
+ if (req->in.h.opcode == FUSE_READ)
+ fuse_read_dio_begin(fi);
+ else
+ fuse_write_dio_begin(fi);
return 0;
fail:
pending:
@@ -1205,7 +1213,7 @@ static void pcs_kio_setattr_handle(struct fuse_inode *fi, struct fuse_req *req)
if (di->size.op == PCS_SIZE_SHRINK) {
BUG_ON(!mutex_is_locked(&req->io_inode->i_mutex));
/* wait for aio reads in flight */
- inode_dio_wait(req->io_inode);
+ fuse_dio_wait(fi);
req->end = _pcs_shrink_end;
} else
More information about the Devel
mailing list