[Devel] [PATCH RHEL9 COMMIT] fs/ext4: introduce FALLOC_FL_PREALLOCATE flag to fallocate
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 23 21:53:03 MSK 2025
The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.4
------>
commit a797e74b46ad0dfc852d4f24f31636d6fc4a2a7e
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date: Sat Jan 18 02:08:52 2025 +0800
fs/ext4: introduce FALLOC_FL_PREALLOCATE flag to fallocate
A new FALLOC_FL_PREALLOCATE flag to fallocate is added.
It is just like usual fallocate(0), but with important property:
from viewpoint of preallocation it is considered exactly like write(0),
i.e. it uses preallocation. Normally preallocation for fallocate
is disabled, the argument is that if application does fallocate
it wishes to keep full control on space allocation, but when
it is used as optimization to write zeros without memory copies
and block zeroing, we have no reasons to suppress normal preallocation.
khorenko@: logic is the following:
if FALLOC_FL_PREALLOCATE
preallocation enabled for all "len"
if !FALLOC_FL_PREALLOCATE
if len <= falloc_prealloc
preallocation disabled
if falloc_prealloc < len <= EXT_UNWRITTEN_MAX_LEN
preallocation enabled
if len > EXT_UNWRITTEN_MAX_LEN
preallocation enabled
So if FALLOC_FL_PREALLOCATE is used, fallocate() behaves like write(),
i.e. always uses preallocation, the falloc_prealloc value is ignored.
If FALLOC_FL_PREALLOCATE is NOT used:
- stock fallocate() disables preallocation for len sizes <= 32K.
- introduced falloc_prealloc allows to make an experiment:
if we set non-zero falloc_prealloc parameter, preallocation
will be disabled for sizes falloc_prealloc .. 32K.
The idea behind: if you perform small fallocate like 4K, you probably
do not mean to fragment the file.
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
Feature: ext4: preallocation flag for small fallocates
---
fs/ext4/extents.c | 17 +++++++++++------
include/linux/falloc.h | 3 ++-
include/uapi/linux/falloc.h | 2 ++
3 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74251eebf831..e541d4a07fe7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -35,6 +35,10 @@
#include <trace/events/ext4.h>
+static unsigned int falloc_prealloc;
+module_param(falloc_prealloc, uint, 0644);
+MODULE_PARM_DESC(falloc_prealloc, "Preallocate fallocate like it is a write for smaller sizes");
+
/*
* used by extent splitting.
*/
@@ -4423,7 +4427,7 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ext4_lblk_t len, loff_t new_size,
- int flags)
+ int flags, int mode)
{
struct inode *inode = file_inode(file);
handle_t *handle;
@@ -4442,7 +4446,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
* that it doesn't get unnecessarily split into multiple
* extents.
*/
- if (len <= EXT_UNWRITTEN_MAX_LEN)
+ if (!(mode & FALLOC_FL_PREALLOCATE) && len <= EXT_UNWRITTEN_MAX_LEN &&
+ len > falloc_prealloc)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/*
@@ -4588,7 +4593,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits) >> blkbits,
(round_up((offset + len), 1 << blkbits) -
round_down(offset, 1 << blkbits)) >> blkbits,
- new_size, flags);
+ new_size, flags, mode);
if (ret)
goto out_mutex;
@@ -4621,7 +4626,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
inode->i_mtime = inode->i_ctime = current_time(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
- flags);
+ flags, mode);
filemap_invalidate_unlock(mapping);
if (ret)
goto out_mutex;
@@ -4694,7 +4699,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
- FALLOC_FL_INSERT_RANGE))
+ FALLOC_FL_INSERT_RANGE | FALLOC_FL_PREALLOCATE))
return -EOPNOTSUPP;
inode_lock(inode);
@@ -4754,7 +4759,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret)
goto out;
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags, mode);
if (ret)
goto out;
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index f3f0b97b1675..5e689612a622 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -30,7 +30,8 @@ struct space_resv {
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
- FALLOC_FL_UNSHARE_RANGE)
+ FALLOC_FL_UNSHARE_RANGE | \
+ FALLOC_FL_PREALLOCATE)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 51398fa57f6c..62431833effa 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -77,4 +77,6 @@
*/
#define FALLOC_FL_UNSHARE_RANGE 0x40
+#define FALLOC_FL_PREALLOCATE 0x80
+
#endif /* _UAPI_FALLOC_H_ */
More information about the Devel
mailing list