[Devel] [PATCH VZ9 1/2] fs: fuse: kio: resolve race condition fallocate vs shrink
Alexey Kuznetsov
kuznet at virtuozzo.com
Thu Nov 7 19:01:10 MSK 2024
It was missed kio/pcs module has warning on out of mutex execution
of fallocate request. It is not a bug per se, yet the situation
is dubious. The rationale was that logically fallocate(zerorange) is
_not_ different of aio direct write of zeros to the same area,
and that write used to be executed out of inode_lock. So asserting
the lock in fallocate might be a noise.
Yet we must comply completely. There is one difference, aio takes
dio_write counter, which blocks concurrent truncates until write
is completed. This is important mostly for shrinks, as expanding
writes are not executed asynchronously, fallocates likewise.
So, we take dio_write for async fallocate out of inode_lock,
making it 100% equivalent to write.
NOTE: the situation with expanding writes/fallocates is complicated.
We might even have a zero day bug here, I do not see one now, but this does
not mean we did not have there any holes. Just a note.
Affects: #VSTOR-94829
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
---
fs/fuse/file.c | 11 +++++++++--
fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 21 ++++++++++++++++-----
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7bf7c9e..195b771 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3450,8 +3450,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
- if (revoke_lock)
+ if (revoke_lock) {
+ /* This blocks truncates which might happen after
+ * inode_lock is released
+ */
+ fuse_write_dio_begin(fi);
inode_unlock(inode);
+ }
args.opcode = FUSE_FALLOCATE;
args.io_inode = inode;
@@ -3460,8 +3465,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
err = fuse_simple_request(fm, &args);
- if (revoke_lock)
+ if (revoke_lock) {
inode_lock(inode);
+ fuse_write_dio_end(fi);
+ }
if (err == -ENOSYS) {
fm->fc->no_fallocate = 1;
err = -EOPNOTSUPP;
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 1bd43ec..21bfd82 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -1019,19 +1019,19 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req)
break;
case FUSE_FALLOCATE: {
struct fuse_fallocate_in *inarg = (void*) args->in_args[0].value;
+ size_t sz = READ_ONCE(di->fileinfo.attr.size);
if (pfc->fc->no_fallocate) {
req->out.h.error = -EOPNOTSUPP;
goto error;
}
- if (inarg->offset >= di->fileinfo.attr.size)
+ if (inarg->offset >= sz)
inarg->mode &= ~FALLOC_FL_ZERO_RANGE;
if (inarg->mode == FALLOC_FL_KEEP_SIZE)
break; /* NOPE */
- WARN_ON_ONCE(!inode_is_locked(&fi->inode));
if (inarg->mode & (FALLOC_FL_ZERO_RANGE|FALLOC_FL_PUNCH_HOLE)) {
if ((inarg->offset & (PAGE_SIZE - 1)) || (inarg->length & (PAGE_SIZE - 1))) {
req->out.h.error = -EINVAL;
@@ -1040,10 +1040,21 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster *pfc, struct fuse_req *req)
}
if (inarg->mode & FALLOC_FL_KEEP_SIZE) {
- if (inarg->offset > di->fileinfo.attr.size)
+ if (inarg->offset > sz)
break; /* NOPE */
- if (inarg->offset + inarg->length > di->fileinfo.attr.size)
- inarg->length = di->fileinfo.attr.size - inarg->offset;
+ if (inarg->offset + inarg->length > sz)
+ inarg->length = sz - inarg->offset;
+ } else {
+ /* attr.size can expand, but not shrink. If the file is expanded
+ * by fallocate inode_lock cannot be dropped.
+ *
+ * Interesting new situation to consider: update i_size happens of exit
+ * from allocating write/falloc and we have a window where attr.size is ahead
+ * of i_size. It looks safe as allocating call is deemed to be synchronous,
+ * and as i_size is still not advanced all the follwing ones are.
+ */
+ WARN_ON_ONCE(inarg->offset + inarg->length > sz &&
+ !inode_is_locked(&fi->inode));
}
ret = pcs_fuse_prep_rw(r);
--
1.8.3.1
More information about the Devel
mailing list