[Devel] [PATCH RHEL8 COMMIT] ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Apr 30 13:52:42 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.24
------>
commit c4a4fada18ad4dab1b051e49e39065c569ad64c8
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri Apr 30 13:52:42 2021 +0300
ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode
Patchset description:
Shrink big fdtable on criu restore
This patchset allows to avoid memory overuse introduced by service fds on criu
restore.
The solution is simple: smartly check for closed fd number, and shrink fdtable
if this could be made. The checks are happen in is_pseudosuper mode, so we do
not affect performance on normal work mode.
The problem is we can't solve this for 100% case in userspace.
Kernel allows to fix that completely.
https://jira.sw.ru/browse/PSBM-78827
Eric Dumazet (1):
ms/fs/file.c: don't acquire files->file_lock in fd_install()
Kirill Tkhai (3):
files: Add new argument to expand_files()
files: Add fdtable_align() helper
files: Shrink big fdtable on close in is_pseudosuper mode
Mateusz Guzik (1):
ms/vfs: grab the lock instead of blocking in __fd_install during resizing
============================================================
This patch description:
This trick is going to be used for criu restore, to release excess memory
occupied by service files:
we check a closing fd, and if it's a half of max available fdtable number, we
try to shrink the fdstable and decrease amoung of memory needed to store task's
fds.
Currently is_pseudosuper state is used to detect restore, but it can be changed
later if needed.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcunov at openvz.org>
Rebase to vz8:
- Used rebased to RH7.9 vz7 commit 4b024fd120c5 ("ve/fs/files: Shrink big
fdtable on close in is_pseudosuper mode") which handles new copy_fd_bitmaps
helper function.
(cherry picked from vz7 commit 4b024fd120c5 ("ve/fs/files: Shrink big fdtable on
close in is_pseudosuper mode"))
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
fs/file.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 48 insertions(+), 15 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index 4f68ef0f6ee0..0ed1c4cd6eae 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/ve.h>
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -47,21 +48,25 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
* spinlock held for write.
*/
static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
- unsigned int count)
+ unsigned int count, bool shrink)
{
unsigned int cpy, set;
- cpy = count / BITS_PER_BYTE;
+ cpy = min(count, nfdt->max_fds) / BITS_PER_BYTE;
set = (nfdt->max_fds - count) / BITS_PER_BYTE;
memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
- memset((char *)nfdt->open_fds + cpy, 0, set);
+ if (!shrink)
+ memset((char *)nfdt->open_fds + cpy, 0, set);
+
memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
- memset((char *)nfdt->close_on_exec + cpy, 0, set);
+ if (!shrink)
+ memset((char *)nfdt->close_on_exec + cpy, 0, set);
- cpy = BITBIT_SIZE(count);
+ cpy = BITBIT_SIZE(min(count, nfdt->max_fds));
set = BITBIT_SIZE(nfdt->max_fds) - cpy;
memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
- memset((char *)nfdt->full_fds_bits + cpy, 0, set);
+ if (!shrink)
+ memset((char *)nfdt->full_fds_bits + cpy, 0, set);
}
/*
@@ -72,14 +77,15 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt, bool shrink
{
unsigned int cpy, set;
- BUG_ON(nfdt->max_fds < ofdt->max_fds);
+ BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink);
- cpy = ofdt->max_fds * sizeof(struct file *);
+ cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *);
set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
memcpy(nfdt->fd, ofdt->fd, cpy);
- memset((char *)nfdt->fd + cpy, 0, set);
+ if (!shrink)
+ memset((char *)nfdt->fd + cpy, 0, set);
- copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
+ copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds, shrink);
}
static unsigned int fdtable_align(unsigned int nr)
@@ -170,16 +176,26 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr, bool shri
spin_lock(&files->file_lock);
if (!new_fdt)
return -ENOMEM;
+ cur_fdt = files_fdtable(files);
/*
* extremely unlikely race - sysctl_nr_open decreased between the check in
* caller and alloc_fdtable(). Cheaper to catch it here...
*/
- if (unlikely(new_fdt->max_fds <= nr)) {
+ if (unlikely((new_fdt->max_fds <= nr && !shrink) ||
+ (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) {
__free_fdtable(new_fdt);
return -EMFILE;
}
- cur_fdt = files_fdtable(files);
- BUG_ON(nr < cur_fdt->max_fds);
+ if (unlikely(shrink)) {
+ int i;
+ i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds);
+ i = fdtable_align(i);
+ if (i == cur_fdt->max_fds) {
+ __free_fdtable(new_fdt);
+ return 1;
+ }
+ }
+ BUG_ON((nr < cur_fdt->max_fds) != shrink);
copy_fdtable(new_fdt, cur_fdt, shrink);
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
@@ -208,7 +224,7 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink
fdt = files_fdtable(files);
/* Do we need to expand? */
- if (nr < fdt->max_fds)
+ if (nr < fdt->max_fds && !shrink)
return expanded;
/* Can we expand? */
@@ -223,6 +239,15 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink
goto repeat;
}
+ if (unlikely(shrink)) {
+ unsigned int i;
+ i = find_last_bit(fdt->open_fds, fdt->max_fds);
+ nr = i;
+ i = fdtable_align(i);
+ if (i >= fdt->max_fds)
+ return expanded;
+ }
+
/* All good, so we try */
files->resize_in_progress = true;
expanded = expand_fdtable(files, nr, shrink);
@@ -337,7 +362,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
open_files = count_open_files(old_fdt);
}
- copy_fd_bitmaps(new_fdt, old_fdt, open_files);
+ copy_fd_bitmaps(new_fdt, old_fdt, open_files, false);
old_fds = old_fdt->fd;
new_fds = new_fdt->fd;
@@ -643,6 +668,14 @@ int __close_fd(struct files_struct *files, unsigned fd)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
+
+ /* Try to shrink fdt and to free memory */
+ if (unlikely(fd * 2 >= fdt->max_fds &&
+ fd > (1024 / sizeof(struct file *))) &&
+ get_exec_env() != get_ve0() &&
+ get_exec_env()->is_pseudosuper)
+ expand_files(files, fd, true);
+
spin_unlock(&files->file_lock);
return filp_close(file, files);
More information about the Devel
mailing list