[Devel] [PATCH vz7] /proc/self/memflags: allow userspace to set kernel memory allocation flags
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Mar 20 13:06:10 MSK 2023
Nikolay, please review the patch.
--
Best regards,
Konstantin Khorenko,
Virtuozzo Linux Kernel Team
On 20.03.2023 10:20, Alexander Atanasov wrote:
> Currently there is no way to control if page reclaim can be done.
> This can be useful in networked file systems, which can deadlock
> in the synchronous reclaim path and streaming which can get
> unexpected jitter when a synchronouse reclaim is done.
>
> To improve this add interface to set PF_MEMALLOC and
> PF_MEMALLOC_NOIO flags on current process.
>
> Reading from /proc/self/memflags returns current flags.
> Writing to /proc/self/memflags sets the flags.
> Flag values used are defined in the kernel header include/linux/sched.h.
>
> https://jira.sw.ru/browse/PSBM-141577
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
> fs/proc/base.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 77 insertions(+)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index eafb5574c543..c776dd8b3f33 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1425,6 +1425,81 @@ static const struct file_operations proc_oom_score_adj_operations = {
> .llseek = default_llseek,
> };
>
> +#define MEMALLOC_FLAGS_MASK (PF_MEMALLOC | PF_MEMALLOC_NOIO)
> +
> +static ssize_t memalloc_flags_read(struct file *file, char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct task_struct *task = get_proc_task(file_inode(file));
> + char buffer[PROC_NUMBUF];
> + unsigned int pflags;
> + size_t len;
> +
> + if (!task)
> + return -ESRCH;
> +
> + pflags = READ_ONCE(task->flags) & MEMALLOC_FLAGS_MASK;
> + put_task_struct(task);
> + len = snprintf(buffer, sizeof(buffer), "%d\n", pflags);
> + return simple_read_from_buffer(buf, count, ppos, buffer, len);
> +}
> +static ssize_t memalloc_flags_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct task_struct *task = get_proc_task(file_inode(file));
> + char buffer[PROC_NUMBUF];
> + int memalloc_flags;
> + int err = -ESRCH;
> + unsigned int pflags;
> +
> + if (!task)
> + goto out;
> +
> + if (get_exec_env() != get_ve0()) {
> + err = -EPERM;
> + goto out;
> + }
> +
> + /*
> + * Potential issue here if task != current
> + * concurrent setting of flags need R/W_ONCE
> + * but flags are expected to change only from current
> + */
> + if (task != current) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + memset(buffer, 0, sizeof(buffer));
> + if (count > sizeof(buffer) - 1)
> + count = sizeof(buffer) - 1;
> + if (copy_from_user(buffer, buf, count)) {
> + err = -EFAULT;
> + goto out;
> + }
> +
> + err = kstrtoint(strstrip(buffer), 0, &memalloc_flags);
> + if (err)
> + goto out;
> + if (memalloc_flags & ~MEMALLOC_FLAGS_MASK) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + pflags = READ_ONCE(task->flags) & ~MEMALLOC_FLAGS_MASK;
> + WRITE_ONCE(task->flags, pflags | memalloc_flags);
> +out:
> + if (task)
> + put_task_struct(task);
> + return err < 0 ? err : count;
> +}
> +
> +static const struct file_operations proc_memalloc_flags_operations = {
> + .read = memalloc_flags_read,
> + .write = memalloc_flags_write,
> + .llseek = default_llseek,
> +};
> +
> #ifdef CONFIG_AUDITSYSCALL
> #define TMPBUFLEN 21
> static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
> @@ -3132,6 +3207,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> INF("oom_score", S_IRUGO, proc_oom_score),
> REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
> REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
> + REG("memalloc_flags", S_IRUGO|S_IWUSR, proc_memalloc_flags_operations),
> #ifdef CONFIG_AUDITSYSCALL
> REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
> REG("sessionid", S_IRUGO, proc_sessionid_operations),
> @@ -3499,6 +3575,7 @@ static const struct pid_entry tid_base_stuff[] = {
> INF("oom_score", S_IRUGO, proc_oom_score),
> REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
> REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
> + REG("memalloc_flags", S_IRUGO|S_IWUSR, proc_memalloc_flags_operations),
> #ifdef CONFIG_AUDITSYSCALL
> REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
> REG("sessionid", S_IRUGO, proc_sessionid_operations),
More information about the Devel
mailing list