[Devel] [PATCH rhel7] procfs: always expose /proc/<pid>/map_files/ and make it readable

Cyrill Gorcunov gorcunov at virtuozzo.com
Mon May 16 01:28:51 PDT 2016


This is a backport of commit

ML: bdb4d100afe9818aebd1d98ced575c5ef143456c

From: Calvin Owens <calvinowens at fb.com>

Currently, /proc/<pid>/map_files/ is restricted to CAP_SYS_ADMIN, and is
only exposed if CONFIG_CHECKPOINT_RESTORE is set.

Each mapped file region gets a symlink in /proc/<pid>/map_files/
corresponding to the virtual address range at which it is mapped.  The
symlinks work like the symlinks in /proc/<pid>/fd/, so you can follow them
to the backing file even if that backing file has been unlinked.

Currently, files which are mapped, unlinked, and closed are impossible to
stat() from userspace.  Exposing /proc/<pid>/map_files/ closes this
functionality "hole".

Not being able to stat() such files makes noticing and explicitly
accounting for the space they use on the filesystem impossible.  You can
work around this by summing up the space used by every file in the
filesystem and subtracting that total from what statfs() tells you, but
that obviously isn't great, and it becomes unworkable once your filesystem
becomes large enough.

This patch moves map_files/ out from behind CONFIG_CHECKPOINT_RESTORE, and
adjusts the permissions enforced on it as follows:

* proc_map_files_lookup()
* proc_map_files_readdir()
* map_files_d_revalidate()

	Remove the CAP_SYS_ADMIN restriction, leaving only the current
	restriction requiring PTRACE_MODE_READ. The information made
	available to userspace by these three functions is already
	available in /proc/PID/maps with MODE_READ, so I don't see any
	reason to limit them any further (see below for more detail).

* proc_map_files_follow_link()

	This stub has been added, and requires that the user have
	CAP_SYS_ADMIN in order to follow the links in map_files/,
	since there was concern on LKML both about the potential for
	bypassing permissions on ancestor directories in the path to
	files pointed to, and about what happens with more exotic
	memory mappings created by some drivers (ie dma-buf).

In older versions of this patch, I changed every permission check in
the four functions above to enforce MODE_ATTACH instead of MODE_READ.
This was an oversight on my part, and after revisiting the discussion
it seems that nobody was concerned about anything outside of what is
made possible by ->follow_link(). So in this version, I've left the
checks for PTRACE_MODE_READ as-is.

[akpm at linux-foundation.org: catch up with concurrent proc_pid_follow_link() changes]
Signed-off-by: Calvin Owens <calvinowens at fb.com>
Reviewed-by: Kees Cook <keescook at chromium.org>
Cc: Andy Lutomirski <luto at amacapital.net>
Cc: Cyrill Gorcunov <gorcunov at openvz.org>
Cc: Joe Perches <joe at perches.com>
Cc: Kirill A. Shutemov <kirill at shutemov.name>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---

Kostya, please wait for Ack from Andrew. The patch on its own is not
bound to some of the bug we're working on now but usefull in general
and probably will help us with renaming of memfd restored memory
in criu (we use memfd to be able to restore anonymous shared memory
in userns case but memfd mangles the backend name, we didn't find
any problem with it yet, but been talking to Andrew and he agreed
that we might need to do something with this problem, and this patch
is first step).

 fs/proc/base.c |   44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

Index: linux-pcs7.git/fs/proc/base.c
===================================================================
--- linux-pcs7.git.orig/fs/proc/base.c
+++ linux-pcs7.git/fs/proc/base.c
@@ -1925,8 +1925,6 @@ end_instantiate:
 	return filldir(dirent, name, len, filp->f_pos, ino, type);
 }
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-
 /*
  * dname_to_vma_addr - maps a dentry name into two unsigned longs
  * which represent vma start and end addresses.
@@ -1953,11 +1951,6 @@ static int map_files_d_revalidate(struct
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	if (!capable(CAP_SYS_ADMIN)) {
-		status = -EPERM;
-		goto out_notask;
-	}
-
 	inode = dentry->d_inode;
 	task = get_proc_task(inode);
 	if (!task)
@@ -2048,6 +2041,28 @@ struct map_files_info {
 	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
 };
 
+/*
+ * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
+ * symlinks may be used to bypass permissions on ancestor directories in the
+ * path to the file in question.
+ */
+static void *proc_map_files_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	return proc_pid_follow_link(dentry, nd);
+}
+
+/*
+ * Identical to proc_pid_link_inode_operations except for follow_link()
+ */
+static const struct inode_operations proc_map_files_link_inode_operations = {
+	.readlink	= proc_pid_readlink,
+	.follow_link	= proc_map_files_follow_link,
+	.setattr	= proc_setattr,
+};
+
 static struct dentry *
 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
 			   struct task_struct *task, const void *ptr)
@@ -2063,7 +2078,7 @@ proc_map_files_instantiate(struct inode
 	ei = PROC_I(inode);
 	ei->op.proc_get_link = proc_map_files_get_link;
 
-	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_op = &proc_map_files_link_inode_operations;
 	inode->i_size = 64;
 	inode->i_mode = S_IFLNK;
 
@@ -2087,10 +2102,6 @@ static struct dentry *proc_map_files_loo
 	struct dentry *result;
 	struct mm_struct *mm;
 
-	result = ERR_PTR(-EPERM);
-	if (!capable(CAP_SYS_ADMIN))
-		goto out;
-
 	result = ERR_PTR(-ENOENT);
 	task = get_proc_task(dir);
 	if (!task)
@@ -2143,10 +2154,6 @@ proc_map_files_readdir(struct file *filp
 	ino_t ino;
 	int ret;
 
-	ret = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
-		goto out;
-
 	ret = -ENOENT;
 	task = get_proc_task(inode);
 	if (!task)
@@ -2353,9 +2360,6 @@ static const struct file_operations proc
 	.release	= seq_release_private,
 };
 
-
-#endif /* CONFIG_CHECKPOINT_RESTORE */
-
 #ifdef CONFIG_VE
 static long proc_aio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2911,9 +2915,7 @@ static const struct inode_operations pro
 static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
-#ifdef CONFIG_CHECKPOINT_RESTORE
 	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
-#endif
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
 	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET


More information about the Devel mailing list