[Devel] [RFC][PATCH 2/4] Provide a new procfs interface to set next upid nr(s)

Nadia.Derbey at bull.net Nadia.Derbey at bull.net
Mon Mar 10 06:50:56 PDT 2008


[PATCH 02/04]

This patch proposes the procfs facilities needed to feed the id(s) for the
next task to be forked.

say n is the number of pids to be provided through procfs:

if an
echo "n X0 X1 ... X<n-1>" > /proc/self/next_pids
is issued, the next task to be forked will have its upid nrs set as follows
(say it is forked in a pid ns of level L):

level         upid nr
L ----------> X0
..
L - i ------> Xi
..
L - n + 1 --> X<n-1>

Then, for levels L-n down to level 0, the pids will be left to the kernel
choice.

Signed-off-by: Nadia Derbey <Nadia.Derbey at bull.net>

---
 fs/proc/base.c         |   74 ++++++++++++++++++++++++
 include/linux/sysids.h |   36 +++++++++++-
 kernel/set_nextid.c    |  147 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 254 insertions(+), 3 deletions(-)

Index: linux-2.6.25-rc3-mm1/include/linux/sysids.h
===================================================================
--- linux-2.6.25-rc3-mm1.orig/include/linux/sysids.h	2008-03-10 11:39:10.000000000 +0100
+++ linux-2.6.25-rc3-mm1/include/linux/sysids.h	2008-03-10 12:49:27.000000000 +0100
@@ -9,12 +9,46 @@
 #define _LINUX_SYSIDS_H
 
 #define SYS_ID_IPC 1
+#define SYS_ID_PID 2
+
+#define NPIDS_SMALL       32
+#define NPIDS_PER_BLOCK   ((unsigned int)(PAGE_SIZE / sizeof(pid_t)))
+
+/* access the pids "array" with this macro */
+#define PID_AT(pi, i)	\
+	((pi)->blocks[(i) / NPIDS_PER_BLOCK][(i) % NPIDS_PER_BLOCK])
+
+
+/*
+ * The next process to be created is associated to a set of upid nrs: one for
+ * each pid namespace level that process belongs to.
+ * upid nrs from level 0 up to level <npids - 1> will be automatically
+ * allocated.
+ * upid nr for level npids will be set to blocks[0][0]
+ * upid nr for level <npids + i> will be set to PID_AT(pids, i);
+ */
+struct pid_list {
+	int npids;
+	pid_t small_block[NPIDS_SMALL];
+	int nblocks;
+	pid_t *blocks[0];
+};
+
 
 struct sys_id {
 	int flag;	/* which id should be set */
-	int ipc;
+	struct {
+		int ipc;
+		struct pid_list *pids;
+	} ids;
 };
 
+#define ipc_id ids.ipc
+#define pid_ids ids.pids
+
+extern void pids_free(struct pid_list *);
 extern int ipc_set_nextid(struct task_struct *, int id);
+extern ssize_t pid_get_nextids(struct task_struct *, char *);
+extern ssize_t pid_set_nextids(struct task_struct *, char *);
 
 #endif /* _LINUX_SYSIDS_H */
Index: linux-2.6.25-rc3-mm1/fs/proc/base.c
===================================================================
--- linux-2.6.25-rc3-mm1.orig/fs/proc/base.c	2008-03-10 11:22:20.000000000 +0100
+++ linux-2.6.25-rc3-mm1/fs/proc/base.c	2008-03-10 12:27:34.000000000 +0100
@@ -1095,7 +1095,7 @@ static ssize_t next_ipcid_read(struct fi
 		return -ESRCH;
 
 	sid = task->next_id;
-	next_ipcid = (sid) ? ((sid->flag & SYS_ID_IPC) ? sid->ipc : -1)
+	next_ipcid = (sid) ? ((sid->flag & SYS_ID_IPC) ? sid->ipc_id : -1)
 			: -1;
 
 	put_task_struct(task);
@@ -1144,6 +1144,76 @@ static const struct file_operations proc
 };
 
 
+static ssize_t next_pids_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	char *page;
+	ssize_t length;
+
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (!task)
+		return -ESRCH;
+
+	if (count > PROC_BLOCK_SIZE)
+		count = PROC_BLOCK_SIZE;
+
+	length = -ENOMEM;
+	page = (char *) __get_free_page(GFP_TEMPORARY);
+	if (!page)
+		goto out;
+
+	length = pid_get_nextids(task, (char *) page);
+	if (length >= 0)
+		length = simple_read_from_buffer(buf, count, ppos,
+						(char *)page, length);
+	free_page((unsigned long) page);
+
+out:
+	put_task_struct(task);
+	return length;
+}
+
+static ssize_t next_pids_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	char *page;
+	ssize_t length;
+
+	if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
+		return -EPERM;
+
+	if (count >= PAGE_SIZE)
+		count = PAGE_SIZE - 1;
+
+	if (*ppos != 0) {
+		/* No partial writes. */
+		return -EINVAL;
+	}
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page)
+		return -ENOMEM;
+	length = -EFAULT;
+	if (copy_from_user(page, buf, count))
+		goto out_free_page;
+
+	page[count] = '\0';
+	length = pid_set_nextids(current, page);
+	if (!length)
+		length = count;
+
+out_free_page:
+	free_page((unsigned long) page);
+	return length;
+}
+
+static const struct file_operations proc_next_pids_operations = {
+	.read		= next_pids_read,
+	.write		= next_pids_write,
+};
+
+
 #ifdef CONFIG_SCHED_DEBUG
 /*
  * Print out various scheduling related per-task fields:
@@ -2456,6 +2526,7 @@ static const struct pid_entry tgid_base_
 	INF("io",	S_IRUGO, pid_io_accounting),
 #endif
 	REG("next_ipcid", S_IRUGO|S_IWUSR, next_ipcid),
+	REG("next_pids",  S_IRUGO|S_IWUSR, next_pids),
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
@@ -2782,6 +2853,7 @@ static const struct pid_entry tid_base_s
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
 	REG("next_ipcid", S_IRUGO|S_IWUSR, next_ipcid),
+	REG("next_pids",  S_IRUGO|S_IWUSR, next_pids),
 };
 
 static int proc_tid_base_readdir(struct file * filp,
Index: linux-2.6.25-rc3-mm1/kernel/set_nextid.c
===================================================================
--- linux-2.6.25-rc3-mm1.orig/kernel/set_nextid.c	2008-03-10 10:09:47.000000000 +0100
+++ linux-2.6.25-rc3-mm1/kernel/set_nextid.c	2008-03-10 12:47:30.000000000 +0100
@@ -8,8 +8,59 @@
  */
 
 #include <linux/sched.h>
+#include <linux/string.h>
 
 
+extern int pid_max;
+
+
+
+static struct pid_list *pids_alloc(int idsetsize)
+{
+	struct pid_list *pids;
+	int nblocks;
+	int i;
+
+	nblocks = (idsetsize + NPIDS_PER_BLOCK - 1) / NPIDS_PER_BLOCK;
+	BUG_ON(nblocks < 1);
+
+	pids = kmalloc(sizeof(*pids) + nblocks * sizeof(pid_t *), GFP_KERNEL);
+	if (!pids)
+		return NULL;
+	pids->npids = idsetsize;
+	pids->nblocks = nblocks;
+
+	if (idsetsize <= NPIDS_SMALL)
+		pids->blocks[0] = pids->small_block;
+	else {
+		for (i = 0; i < nblocks; i++) {
+			pid_t *b;
+			b = (void *)__get_free_page(GFP_KERNEL);
+			if (!b)
+				goto out_undo_partial_alloc;
+			pids->blocks[i] = b;
+		}
+	}
+	return pids;
+
+out_undo_partial_alloc:
+	while (--i >= 0)
+		free_page((unsigned long)pids->blocks[i]);
+
+	kfree(pids);
+	return NULL;
+}
+
+void pids_free(struct pid_list *pids)
+{
+	if (pids->blocks[0] != pids->small_block) {
+		int i;
+		for (i = 0; i < pids->nblocks; i++)
+			free_page((unsigned long)pids->blocks[i]);
+	}
+	kfree(pids);
+}
+
 
 int ipc_set_nextid(struct task_struct *task, int id)
 {
@@ -23,9 +74,103 @@ int ipc_set_nextid(struct task_struct *t
 		task->next_id = sid;
 	}
 
-	sid->ipc = id;
+	sid->ipc_id = id;
 	sid->flag |= SYS_ID_IPC;
 
 	return 0;
 }
 
+ssize_t pid_get_nextids(struct task_struct *task, char *buffer)
+{
+	ssize_t count = 0;
+	struct sys_id *sid;
+	char *bufptr = buffer;
+	int i;
+
+	sid = task->next_id;
+	if (!sid)
+		return sprintf(buffer, "-1");
+
+	if (!(sid->flag & SYS_ID_PID))
+		return sprintf(buffer, "-1");
+
+	count = sprintf(&bufptr[count], "%d ", sid->pid_ids->npids);
+
+	for (i = 0; i < sid->pid_ids->npids - 1; i++)
+		count += sprintf(&bufptr[count], "%d ",
+				PID_AT(sid->pid_ids, i));
+
+	count += sprintf(&bufptr[count], "%d", PID_AT(sid->pid_ids, i));
+
+	return count;
+}
+
+/*
+ * Parses a line written to /proc/self/next_pids.
+ * this line has the following format:
+ * npids pid0  .... pidx
+ * with x = npids - 1
+ */
+ssize_t pid_set_nextids(struct task_struct *task, char *buffer)
+{
+	char *token, *end, *out = buffer;
+	struct sys_id *sid;
+	struct pid_list *pids;
+	int npids, i;
+	ssize_t rc;
+
+	rc = -EINVAL;
+	token = strsep(&out, " ");
+	if (!token)
+		goto out;
+
+	npids = simple_strtol(token, &end, 0);
+	if (*end)
+		goto out;
+
+	if (npids <= 0 || npids > pid_max)
+		goto out;
+
+	rc = -ENOMEM;
+	pids = pids_alloc(npids);
+	if (!pids)
+		goto out;
+
+	rc = -EINVAL;
+	i = 0;
+	while ((token = strsep(&out, " ")) != NULL && i < npids) {
+		pid_t pid;
+
+		if (!*token)
+			goto out_free;
+		pid = simple_strtol(token, &end, 0);
+		if ((*end && *end != '\n') || end == token || pid < 0)
+			goto out_free;
+		PID_AT(pids, i) = pid;
+		i++;
+	}
+
+	if (i != npids)
+		/* Not enough pids compared to npids */
+		goto out_free;
+
+	sid = current->next_id;
+	if (!sid) {
+		rc = -ENOMEM;
+		sid = kzalloc(sizeof(*sid), GFP_KERNEL);
+		if (!sid)
+			goto out_free;
+		current->next_id = sid;
+	} else if (sid->flag & SYS_ID_PID)
+		kfree(sid->pid_ids);
+
+	rc = 0;
+
+	sid->pid_ids = pids;
+	sid->flag |= SYS_ID_PID;
+out:
+	return rc;
+out_free:
+	pids_free(pids);
+	return rc;
+}

--
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list