[Devel] [PATCH rhel7] procfs: always expose /proc/<pid>/map_files/ and make it readable

Konstantin Khorenko khorenko at virtuozzo.com
Fri May 20 07:19:27 PDT 2016


This patch is a step for CRIU to work under non-root user.
The idea is good, but there is a long-long way ahead and it even can finally possible only after next "Virtuozzo 8" kernel appearance.

=> let's not include preparation patches until feature of running CRIU under non-root work on mainstream kernel as least.

Surely this is valid until such patches are not required due to some other reasons as well.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 05/16/2016 10:42 PM, Andrey Vagin wrote:
> Acked-by: Andrey Vagin <avagin at virtuozzo.com>
>
> On Mon, May 16, 2016 at 11:28:51AM +0300, Cyrill Gorcunov wrote:
>> This is a backport of commit
>>
>> ML: bdb4d100afe9818aebd1d98ced575c5ef143456c
>>
>> From: Calvin Owens <calvinowens at fb.com>
>>
>> Currently, /proc/<pid>/map_files/ is restricted to CAP_SYS_ADMIN, and is
>> only exposed if CONFIG_CHECKPOINT_RESTORE is set.
>>
>> Each mapped file region gets a symlink in /proc/<pid>/map_files/
>> corresponding to the virtual address range at which it is mapped.  The
>> symlinks work like the symlinks in /proc/<pid>/fd/, so you can follow them
>> to the backing file even if that backing file has been unlinked.
>>
>> Currently, files which are mapped, unlinked, and closed are impossible to
>> stat() from userspace.  Exposing /proc/<pid>/map_files/ closes this
>> functionality "hole".
>>
>> Not being able to stat() such files makes noticing and explicitly
>> accounting for the space they use on the filesystem impossible.  You can
>> work around this by summing up the space used by every file in the
>> filesystem and subtracting that total from what statfs() tells you, but
>> that obviously isn't great, and it becomes unworkable once your filesystem
>> becomes large enough.
>>
>> This patch moves map_files/ out from behind CONFIG_CHECKPOINT_RESTORE, and
>> adjusts the permissions enforced on it as follows:
>>
>> * proc_map_files_lookup()
>> * proc_map_files_readdir()
>> * map_files_d_revalidate()
>>
>> 	Remove the CAP_SYS_ADMIN restriction, leaving only the current
>> 	restriction requiring PTRACE_MODE_READ. The information made
>> 	available to userspace by these three functions is already
>> 	available in /proc/PID/maps with MODE_READ, so I don't see any
>> 	reason to limit them any further (see below for more detail).
>>
>> * proc_map_files_follow_link()
>>
>> 	This stub has been added, and requires that the user have
>> 	CAP_SYS_ADMIN in order to follow the links in map_files/,
>> 	since there was concern on LKML both about the potential for
>> 	bypassing permissions on ancestor directories in the path to
>> 	files pointed to, and about what happens with more exotic
>> 	memory mappings created by some drivers (ie dma-buf).
>>
>> In older versions of this patch, I changed every permission check in
>> the four functions above to enforce MODE_ATTACH instead of MODE_READ.
>> This was an oversight on my part, and after revisiting the discussion
>> it seems that nobody was concerned about anything outside of what is
>> made possible by ->follow_link(). So in this version, I've left the
>> checks for PTRACE_MODE_READ as-is.
>>
>> [akpm at linux-foundation.org: catch up with concurrent proc_pid_follow_link() changes]
>> Signed-off-by: Calvin Owens <calvinowens at fb.com>
>> Reviewed-by: Kees Cook <keescook at chromium.org>
>> Cc: Andy Lutomirski <luto at amacapital.net>
>> Cc: Cyrill Gorcunov <gorcunov at openvz.org>
>> Cc: Joe Perches <joe at perches.com>
>> Cc: Kirill A. Shutemov <kirill at shutemov.name>
>> Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
>> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
>> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
>> ---
>>
>> Kostya, please wait for Ack from Andrew. The patch on its own is not
>> bound to some of the bug we're working on now but usefull in general
>> and probably will help us with renaming of memfd restored memory
>> in criu (we use memfd to be able to restore anonymous shared memory
>> in userns case but memfd mangles the backend name, we didn't find
>> any problem with it yet, but been talking to Andrew and he agreed
>> that we might need to do something with this problem, and this patch
>> is first step).
>>
>>   fs/proc/base.c |   44 +++++++++++++++++++++++---------------------
>>   1 file changed, 23 insertions(+), 21 deletions(-)
>>
>> Index: linux-pcs7.git/fs/proc/base.c
>> ===================================================================
>> --- linux-pcs7.git.orig/fs/proc/base.c
>> +++ linux-pcs7.git/fs/proc/base.c
>> @@ -1925,8 +1925,6 @@ end_instantiate:
>>   	return filldir(dirent, name, len, filp->f_pos, ino, type);
>>   }
>>
>> -#ifdef CONFIG_CHECKPOINT_RESTORE
>> -
>>   /*
>>    * dname_to_vma_addr - maps a dentry name into two unsigned longs
>>    * which represent vma start and end addresses.
>> @@ -1953,11 +1951,6 @@ static int map_files_d_revalidate(struct
>>   	if (flags & LOOKUP_RCU)
>>   		return -ECHILD;
>>
>> -	if (!capable(CAP_SYS_ADMIN)) {
>> -		status = -EPERM;
>> -		goto out_notask;
>> -	}
>> -
>>   	inode = dentry->d_inode;
>>   	task = get_proc_task(inode);
>>   	if (!task)
>> @@ -2048,6 +2041,28 @@ struct map_files_info {
>>   	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
>>   };
>>
>> +/*
>> + * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
>> + * symlinks may be used to bypass permissions on ancestor directories in the
>> + * path to the file in question.
>> + */
>> +static void *proc_map_files_follow_link(struct dentry *dentry, struct nameidata *nd)
>> +{
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return ERR_PTR(-EPERM);
>> +
>> +	return proc_pid_follow_link(dentry, nd);
>> +}
>> +
>> +/*
>> + * Identical to proc_pid_link_inode_operations except for follow_link()
>> + */
>> +static const struct inode_operations proc_map_files_link_inode_operations = {
>> +	.readlink	= proc_pid_readlink,
>> +	.follow_link	= proc_map_files_follow_link,
>> +	.setattr	= proc_setattr,
>> +};
>> +
>>   static struct dentry *
>>   proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
>>   			   struct task_struct *task, const void *ptr)
>> @@ -2063,7 +2078,7 @@ proc_map_files_instantiate(struct inode
>>   	ei = PROC_I(inode);
>>   	ei->op.proc_get_link = proc_map_files_get_link;
>>
>> -	inode->i_op = &proc_pid_link_inode_operations;
>> +	inode->i_op = &proc_map_files_link_inode_operations;
>>   	inode->i_size = 64;
>>   	inode->i_mode = S_IFLNK;
>>
>> @@ -2087,10 +2102,6 @@ static struct dentry *proc_map_files_loo
>>   	struct dentry *result;
>>   	struct mm_struct *mm;
>>
>> -	result = ERR_PTR(-EPERM);
>> -	if (!capable(CAP_SYS_ADMIN))
>> -		goto out;
>> -
>>   	result = ERR_PTR(-ENOENT);
>>   	task = get_proc_task(dir);
>>   	if (!task)
>> @@ -2143,10 +2154,6 @@ proc_map_files_readdir(struct file *filp
>>   	ino_t ino;
>>   	int ret;
>>
>> -	ret = -EPERM;
>> -	if (!capable(CAP_SYS_ADMIN))
>> -		goto out;
>> -
>>   	ret = -ENOENT;
>>   	task = get_proc_task(inode);
>>   	if (!task)
>> @@ -2353,9 +2360,6 @@ static const struct file_operations proc
>>   	.release	= seq_release_private,
>>   };
>>
>> -
>> -#endif /* CONFIG_CHECKPOINT_RESTORE */
>> -
>>   #ifdef CONFIG_VE
>>   static long proc_aio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>>   {
>> @@ -2911,9 +2915,7 @@ static const struct inode_operations pro
>>   static const struct pid_entry tgid_base_stuff[] = {
>>   	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
>>   	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
>> -#ifdef CONFIG_CHECKPOINT_RESTORE
>>   	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
>> -#endif
>>   	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
>>   	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
>>   #ifdef CONFIG_NET
> .
>


More information about the Devel mailing list