[Devel] [PATCH RHEL7 COMMIT] files: Shrink big fdtable on close in is_pseudosuper mode
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 18 18:05:15 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-693.11.6.vz7.42.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.11.6.vz7.42.1
------>
commit f070e04eb6d3e9e4a4b47ae8fac0b1517dd619c2
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Thu Jan 18 18:05:15 2018 +0300
files: Shrink big fdtable on close in is_pseudosuper mode
Patchset description:
Shrink big fdtable on criu restore
This patchset allows to avoid memory overuse introduced by service fds on criu
restore.
The solution is simple: smartly check for closed fd number, and shrink fdtable
if this could be made. The checks are happen in is_pseudosuper mode, so we do
not affect performance on normal work mode.
The problem is we can't solve this for 100% case in userspace.
Kernel allows to fix that completely.
https://jira.sw.ru/browse/PSBM-78827
Eric Dumazet (1):
ms/fs/file.c: don't acquire files->file_lock in fd_install()
Kirill Tkhai (3):
files: Add new argument to expand_files()
files: Add fdtable_align() helper
files: Shrink big fdtable on close in is_pseudosuper mode
Mateusz Guzik (1):
ms/vfs: grab the lock instead of blocking in __fd_install during resizing
============================================================
This patch description:
This trick is going to be used for criu restore, to release excess memory
occupied by service files:
we check a closing fd, and if it's a half of max available fdtable number, we
try to shrink the fdstable and decrease amoung of memory needed to store task's
fds.
Currently is_pseudosuper state is used to detect restore, but it can be changed
later if needed.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
fs/file.c | 51 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 41 insertions(+), 10 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index f009eb9bf1c8..b85e8ee6143b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/ve.h>
int sysctl_nr_open __read_mostly = 1024*1024;
int sysctl_nr_open_min = BITS_PER_LONG;
@@ -69,19 +70,22 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt, bool shrink
{
unsigned int cpy, set;
- BUG_ON(nfdt->max_fds < ofdt->max_fds);
+ BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink);
- cpy = ofdt->max_fds * sizeof(struct file *);
+ cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *);
set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
memcpy(nfdt->fd, ofdt->fd, cpy);
- memset((char *)(nfdt->fd) + cpy, 0, set);
+ if (!shrink)
+ memset((char *)(nfdt->fd) + cpy, 0, set);
- cpy = ofdt->max_fds / BITS_PER_BYTE;
+ cpy = min(ofdt->max_fds, nfdt->max_fds) / BITS_PER_BYTE;
set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
- memset((char *)(nfdt->open_fds) + cpy, 0, set);
+ if (!shrink)
+ memset((char *)(nfdt->open_fds) + cpy, 0, set);
memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
- memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
+ if (!shrink)
+ memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
}
static unsigned int fdtable_align(unsigned int nr)
@@ -169,16 +173,26 @@ static int expand_fdtable(struct files_struct *files, int nr, bool shrink)
spin_lock(&files->file_lock);
if (!new_fdt)
return -ENOMEM;
+ cur_fdt = files_fdtable(files);
/*
* extremely unlikely race - sysctl_nr_open decreased between the check in
* caller and alloc_fdtable(). Cheaper to catch it here...
*/
- if (unlikely(new_fdt->max_fds <= nr)) {
+ if (unlikely((new_fdt->max_fds <= nr && !shrink) ||
+ (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) {
__free_fdtable(new_fdt);
return -EMFILE;
}
- cur_fdt = files_fdtable(files);
- BUG_ON(nr < cur_fdt->max_fds);
+ if (unlikely(shrink)) {
+ int i;
+ i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds);
+ i = fdtable_align(i);
+ if (i == cur_fdt->max_fds) {
+ __free_fdtable(new_fdt);
+ return 1;
+ }
+ }
+ BUG_ON((nr < cur_fdt->max_fds) != shrink);
copy_fdtable(new_fdt, cur_fdt, shrink);
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
@@ -207,7 +221,7 @@ static int expand_files(struct files_struct *files, int nr, bool shrink)
fdt = files_fdtable(files);
/* Do we need to expand? */
- if (nr < fdt->max_fds)
+ if (nr < fdt->max_fds && !shrink)
return expanded;
/* Can we expand? */
@@ -222,6 +236,15 @@ static int expand_files(struct files_struct *files, int nr, bool shrink)
goto repeat;
}
+ if (unlikely(shrink)) {
+ unsigned int i;
+ i = find_last_bit(fdt->open_fds, fdt->max_fds);
+ nr = i;
+ i = fdtable_align(i);
+ if (i >= fdt->max_fds)
+ return expanded;
+ }
+
/* All good, so we try */
files->resize_in_progress = true;
expanded = expand_fdtable(files, nr, shrink);
@@ -637,6 +660,14 @@ int __close_fd(struct files_struct *files, unsigned fd)
rcu_assign_pointer(fdt->fd[fd], NULL);
__clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
+
+ /* Try to shrink fdt and to free memory */
+ if (unlikely(fd * 2 >= fdt->max_fds &&
+ fd > (1024 / sizeof(struct file *))) &&
+ get_exec_env() != get_ve0() &&
+ get_exec_env()->is_pseudosuper)
+ expand_files(files, fd, true);
+
spin_unlock(&files->file_lock);
return filp_close(file, files);
More information about the Devel
mailing list