[Devel] [PATCH RH8 3/3] ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Mon Apr 19 13:31:55 MSK 2021


From: Kirill Tkhai <ktkhai at virtuozzo.com>

Patchset description:
Shrink big fdtable on criu restore

This patchset allows to avoid memory overuse introduced by service fds on criu
restore.
The solution is simple: smartly check for closed fd number, and shrink fdtable
if this could be made. The checks are happen in is_pseudosuper mode, so we do
not affect performance on normal work mode.

The problem is we can't solve this for 100% case in userspace.
Kernel allows to fix that completely.

https://jira.sw.ru/browse/PSBM-78827

Eric Dumazet (1):
      ms/fs/file.c: don't acquire files->file_lock in fd_install()

Kirill Tkhai (3):
      files: Add new argument to expand_files()
      files: Add fdtable_align() helper
      files: Shrink big fdtable on close in is_pseudosuper mode

Mateusz Guzik (1):
      ms/vfs: grab the lock instead of blocking in __fd_install during resizing

============================================================
This patch description:

This trick is going to be used for criu restore, to release excess memory
occupied by service files:
we check a closing fd, and if it's a half of max available fdtable number, we
try to shrink the fdstable and decrease amoung of memory needed to store task's
fds.

Currently is_pseudosuper state is used to detect restore, but it can be changed
later if needed.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcunov at openvz.org>

Rebase to vz8:
 - Used rebased to RH7.9 commit 4b024fd120c5cfc3775387e2ed2e29d389a42849 which
handles new copy_fd_bitmaps helper function.

(cherry picked from e4a319f998910317ce1559acebecca365f85d8ba)
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 fs/file.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 4f68ef0f..0ed1c4c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/ve.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -47,21 +48,25 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
  * spinlock held for write.
  */
 static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
-			    unsigned int count)
+			    unsigned int count, bool shrink)
 {
 	unsigned int cpy, set;
 
-	cpy = count / BITS_PER_BYTE;
+	cpy = min(count, nfdt->max_fds) / BITS_PER_BYTE;
 	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
 	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
-	memset((char *)nfdt->open_fds + cpy, 0, set);
+	if (!shrink)
+		memset((char *)nfdt->open_fds + cpy, 0, set);
+
 	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
-	memset((char *)nfdt->close_on_exec + cpy, 0, set);
+	if (!shrink)
+		memset((char *)nfdt->close_on_exec + cpy, 0, set);
 
-	cpy = BITBIT_SIZE(count);
+	cpy = BITBIT_SIZE(min(count, nfdt->max_fds));
 	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
 	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
-	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
+	if (!shrink)
+		memset((char *)nfdt->full_fds_bits + cpy, 0, set);
 }
 
 /*
@@ -72,14 +77,15 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt, bool shrink
 {
 	unsigned int cpy, set;
 
-	BUG_ON(nfdt->max_fds < ofdt->max_fds);
+	BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink);
 
-	cpy = ofdt->max_fds * sizeof(struct file *);
+	cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *);
 	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
 	memcpy(nfdt->fd, ofdt->fd, cpy);
-	memset((char *)nfdt->fd + cpy, 0, set);
+	if (!shrink)
+		memset((char *)nfdt->fd + cpy, 0, set);
 
-	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
+	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds, shrink);
 }
 
 static unsigned int fdtable_align(unsigned int nr)
@@ -170,16 +176,26 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr, bool shri
 	spin_lock(&files->file_lock);
 	if (!new_fdt)
 		return -ENOMEM;
+	cur_fdt = files_fdtable(files);
 	/*
 	 * extremely unlikely race - sysctl_nr_open decreased between the check in
 	 * caller and alloc_fdtable().  Cheaper to catch it here...
 	 */
-	if (unlikely(new_fdt->max_fds <= nr)) {
+	if (unlikely((new_fdt->max_fds <= nr && !shrink) ||
+		     (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) {
 		__free_fdtable(new_fdt);
 		return -EMFILE;
 	}
-	cur_fdt = files_fdtable(files);
-	BUG_ON(nr < cur_fdt->max_fds);
+	if (unlikely(shrink)) {
+		int i;
+		i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds);
+		i = fdtable_align(i);
+		if (i == cur_fdt->max_fds) {
+			__free_fdtable(new_fdt);
+			return 1;
+		}
+	}
+	BUG_ON((nr < cur_fdt->max_fds) != shrink);
 	copy_fdtable(new_fdt, cur_fdt, shrink);
 	rcu_assign_pointer(files->fdt, new_fdt);
 	if (cur_fdt != &files->fdtab)
@@ -208,7 +224,7 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink
 	fdt = files_fdtable(files);
 
 	/* Do we need to expand? */
-	if (nr < fdt->max_fds)
+	if (nr < fdt->max_fds && !shrink)
 		return expanded;
 
 	/* Can we expand? */
@@ -223,6 +239,15 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink
 		goto repeat;
 	}
 
+	if (unlikely(shrink)) {
+		unsigned int i;
+		i = find_last_bit(fdt->open_fds, fdt->max_fds);
+		nr = i;
+		i = fdtable_align(i);
+		if (i >= fdt->max_fds)
+			return expanded;
+	}
+
 	/* All good, so we try */
 	files->resize_in_progress = true;
 	expanded = expand_fdtable(files, nr, shrink);
@@ -337,7 +362,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 		open_files = count_open_files(old_fdt);
 	}
 
-	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
+	copy_fd_bitmaps(new_fdt, old_fdt, open_files, false);
 
 	old_fds = old_fdt->fd;
 	new_fds = new_fdt->fd;
@@ -643,6 +668,14 @@ int __close_fd(struct files_struct *files, unsigned fd)
 		goto out_unlock;
 	rcu_assign_pointer(fdt->fd[fd], NULL);
 	__put_unused_fd(files, fd);
+
+	/* Try to shrink fdt and to free memory */
+	if (unlikely(fd * 2 >= fdt->max_fds &&
+		     fd > (1024 / sizeof(struct file *))) &&
+		     get_exec_env() != get_ve0() &&
+		     get_exec_env()->is_pseudosuper)
+		expand_files(files, fd, true);
+
 	spin_unlock(&files->file_lock);
 	return filp_close(file, files);
 
-- 
1.8.3.1



More information about the Devel mailing list