[Devel] [PATCH RHEL9 COMMIT] prctl: Add option to manage memory allocation scopes
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Apr 20 16:55:29 MSK 2023
The commit is pushed to "branch-rh9-5.14.0-162.18.1.vz9.19.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-162.18.1.vz9.19.6
------>
commit fd6b3e293f84f3b4a3112ca835e34f40d955d5ee
Author: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
Date: Wed Apr 12 20:08:54 2023 +0300
prctl: Add option to manage memory allocation scopes
Currently there is no way to hint the kernel to avoid triggering
page reclaims. This is useful in networked file systems,
which can deadlock in the synchronous reclaim path and to reduce
jitter when streaming which can be induced by a synchronouse reclaim.
To aid the userspace add interface to manage PF_MEMALLOC, PF_MEMALLOC_NOIO,
PF_MEMALLOC_NOFS, PF_MEMALLOC_PIN flags via prctl.
Interface is defined via option PR_MEMALLOC_FLAGS and respective
PR_MEMALLOC_GET_FLAGS, PR_MEMALLOC_SET_FLAGS and PR_MEMALLOC_CLEAR_FLAGS.
Flag values used are defined in the kernel header include/linux/prctl.h.
- PR_MEMALLOC_GET_FLAGS: returns flags within MEMALLOC_FLAGS_MASK mask
- PR_MEMALLOC_CLEAR_FLAGS: clears flags provided in the argument
(within MEMALLOC_FLAGS_MASK mask), leaving other flags (within
MEMALLOC_FLAGS_MASK mask, but not present in the provided arg) AS IS
- PR_MEMALLOC_SET_FLAGS: clears ALL flags within MEMALLOC_FLAGS_MASK
mask and sets only those flags provided by the prctl argument
The prctl is permitted for Host init userns only.
https://jira.sw.ru/browse/PSBM-141577
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
Feature: vStorage
---
include/uapi/linux/prctl.h | 6 ++++++
kernel/sys.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 4baf1c5b0be7..409bba71a92b 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -277,4 +277,10 @@ struct prctl_task_ct_fields {
__s64 start_boottime;
};
+/* Set task memalloc flags */
+#define PR_MEMALLOC_FLAGS 1001
+#define PR_MEMALLOC_GET_FLAGS 1
+#define PR_MEMALLOC_SET_FLAGS 2
+#define PR_MEMALLOC_CLEAR_FLAGS 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 54d7bc990e8f..16895f921266 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2313,6 +2313,39 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
+#define MEMALLOC_FLAGS_MASK (PF_MEMALLOC | PF_MEMALLOC_NOFS | \
+ PF_MEMALLOC_NOIO | PF_MEMALLOC_PIN)
+
+static int prctl_memalloc_flags(int opt, unsigned long flags)
+{
+ unsigned int pflags;
+
+#ifdef CONFIG_VE
+ if (!ve_is_super(get_exec_env()))
+ return -ENOSYS;
+#endif
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ switch(opt) {
+ case PR_MEMALLOC_GET_FLAGS:
+ return current->flags & MEMALLOC_FLAGS_MASK;
+ case PR_MEMALLOC_SET_FLAGS:
+ if (flags & ~MEMALLOC_FLAGS_MASK)
+ return -EINVAL;
+ pflags = current->flags & ~MEMALLOC_FLAGS_MASK;
+ current->flags = pflags | flags;
+ return current->flags;
+ case PR_MEMALLOC_CLEAR_FLAGS:
+ if (flags & ~MEMALLOC_FLAGS_MASK)
+ return -EINVAL;
+ current->flags &= ~flags;
+ return current->flags;
+ }
+
+ return -EINVAL;
+}
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2585,6 +2618,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TASK_CT_FIELDS:
error = prctl_set_task_ct_fields(me, arg2, arg3);
break;
+ case PR_MEMALLOC_FLAGS:
+ error = prctl_memalloc_flags(arg2, arg3);
+ break;
default:
error = -EINVAL;
break;
More information about the Devel
mailing list