[Devel] [PATCH vz9 1/2] prctl: add option to manage memory allocation scopes

Alexander Atanasov alexander.atanasov at virtuozzo.com
Tue Apr 4 20:56:57 MSK 2023


Currently there is no way to hint the kernel to avoid triggering
page reclaims. This is useful in networked file systems,
which can deadlock in the synchronous reclaim path and to reduce
jitter when streaming which can be induced by a synchronouse reclaim.

To aid the userspace add interface to manage PF_MEMALLOC, PF_MEMALLOC_NOIO,
PF_MEMALLOC_NOFS, PF_MEMALLOC_PIN flags via prctl.

Interface is defined via option PR_MEMALLOC_FLAGS and respective
PR_MEMALLOC_GET_FLAGS, PR_MEMALLOC_SET_FLAGS and PR_MEMALLOC_CLEAR_FLAGS.
Flag values used are defined in the kernel header include/linux/sched.h.

https://jira.sw.ru/browse/PSBM-141577
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
 include/uapi/linux/prctl.h |  6 ++++++
 kernel/sys.c               | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 4baf1c5b0be7..409bba71a92b 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -277,4 +277,10 @@ struct prctl_task_ct_fields {
 	__s64 start_boottime;
 };
 
+/* Set task memalloc flags */
+#define PR_MEMALLOC_FLAGS			1001
+#define PR_MEMALLOC_GET_FLAGS			1
+#define PR_MEMALLOC_SET_FLAGS			2
+#define PR_MEMALLOC_CLEAR_FLAGS			3
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 54d7bc990e8f..170f179fa4e5 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2313,6 +2313,36 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
 
 #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
 
+#define MEMALLOC_FLAGS_MASK (PF_MEMALLOC | PF_MEMALLOC_NOFS | \
+                             PF_MEMALLOC_NOIO | PF_MEMALLOC_PIN)
+
+static int prctl_memalloc_flags(int opt, unsigned long flags)
+{
+	unsigned int pflags;
+
+#ifdef CONFIG_VE
+	if (!ve_is_super(get_exec_env()))
+		return -ENOSYS;
+#endif
+	switch(opt) {
+		case PR_MEMALLOC_GET_FLAGS:
+			return current->flags & MEMALLOC_FLAGS_MASK;
+		case PR_MEMALLOC_SET_FLAGS:
+			if (flags & ~MEMALLOC_FLAGS_MASK)
+				return -EINVAL;
+			pflags = current->flags & ~MEMALLOC_FLAGS_MASK;
+			current->flags = pflags | flags;
+			return current->flags;
+		case PR_MEMALLOC_CLEAR_FLAGS:
+			if (flags & ~MEMALLOC_FLAGS_MASK)
+				return -EINVAL;
+			current->flags &= ~flags;
+			return current->flags;
+	}
+
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2585,6 +2615,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_SET_TASK_CT_FIELDS:
 		error = prctl_set_task_ct_fields(me, arg2, arg3);
 		break;
+	case PR_MEMALLOC_FLAGS:
+		error = prctl_memalloc_flags(arg2, arg3);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
2.39.1



More information about the Devel mailing list