[Devel] [RFC][PATCH 3/3] Set si_pid to 0 for signals from ancestor namespace
Sukadev Bhattiprolu
sukadev at linux.vnet.ibm.com
Tue Nov 11 22:48:19 PST 2008
From: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
Date: Tue, 11 Nov 2008 17:00:36 -0800
Subject: [PATCH 3/3] sig: Handle pid namespace crossing when sending signals.
Setting si_pid correctly in the context of pid namespaces is tricky.
Currently with the special cases in do_notify_parent and
do_notify_parent_cldstop we handle all of the day to day cases
properly except sending a signal to a task in a child pid namespace.
For that case we need to pretend to be the kernel and set si_pid to 0.
There are also a few theoretical cases where we can trigger sending a
signal from a task in one pid namespace to a task in another pid
namespace. With no necessary correlation between one or the other.
In those cases when the source pid namespace is a child of the
destination pid namespace we actually have a valid pid value
we can and should report to user space.
This patch modifies the code to handle the full general case for
setting si_pid. The code is a little longer but occurs only once and
making it some easier to understand and verify it is correct.
I add a struct pid sender parameter to __group_send_sig_info, as that is
the only function called with si_pid != task_tgid_vnr(current). So we can
correctly handle the sending of a signal to the parent of an arbitrary
task.
Changelog[v2]:
- Port to 2.6.28-rc3
- Add in_interrupt() check in set_sigqueue_pid() (for SIGIO)
Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
Signed-off-by: Eric W. Biederman <ebiederm at xmission.com>
---
drivers/char/tty_io.c | 4 +-
include/linux/signal.h | 3 +-
ipc/mqueue.c | 2 +-
kernel/posix-cpu-timers.c | 12 +++---
kernel/signal.c | 88 +++++++++++++++++++++++++++-----------------
5 files changed, 65 insertions(+), 44 deletions(-)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 59f4721..51ab382 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -615,8 +615,8 @@ static void do_tty_hangup(struct work_struct *work)
spin_unlock_irq(&p->sighand->siglock);
continue;
}
- __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
- __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p, NULL);
+ __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p, NULL);
put_pid(p->signal->tty_old_pgrp); /* A noop */
spin_lock_irqsave(&tty->ctrl_lock, flags);
if (tty->pgrp)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f..a576de8 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -234,7 +234,8 @@ static inline int valid_signal(unsigned long sig)
extern int next_signal(struct sigpending *pending, sigset_t *mask);
extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
-extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *,
+ struct pid *sender);
extern long do_sigpending(void __user *, unsigned long);
extern int sigprocmask(int, sigset_t *, sigset_t *);
extern int show_unhandled_signals;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 68eb857..8968a4c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -506,7 +506,7 @@ static void __do_notify(struct mqueue_inode_info *info)
sig_i.si_errno = 0;
sig_i.si_code = SI_MESGQ;
sig_i.si_value = info->notify.sigev_value;
- sig_i.si_pid = task_tgid_vnr(current);
+ sig_i.si_pid = 0; /* Uses default current tgid */
sig_i.si_uid = current->uid;
kill_pid_info(info->notify.sigev_signo,
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2..c72efb1 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1014,7 +1014,7 @@ static void check_thread_timers(struct task_struct *tsk,
* At the hard limit, we just die.
* No need to calculate anything else now.
*/
- __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk, NULL);
return;
}
if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
@@ -1029,7 +1029,7 @@ static void check_thread_timers(struct task_struct *tsk,
printk(KERN_INFO
"RT Watchdog Timeout: %s[%d]\n",
tsk->comm, task_pid_nr(tsk));
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk, NULL);
}
}
}
@@ -1122,7 +1122,7 @@ static void check_process_timers(struct task_struct *tsk,
sig->it_prof_expires = cputime_add(
sig->it_prof_expires, ptime);
}
- __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk, NULL);
}
if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
(cputime_eq(prof_expires, cputime_zero) ||
@@ -1138,7 +1138,7 @@ static void check_process_timers(struct task_struct *tsk,
sig->it_virt_expires = cputime_add(
sig->it_virt_expires, utime);
}
- __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk, NULL);
}
if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
(cputime_eq(virt_expires, cputime_zero) ||
@@ -1154,14 +1154,14 @@ static void check_process_timers(struct task_struct *tsk,
* At the hard limit, we just die.
* No need to calculate anything else now.
*/
- __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk, NULL);
return;
}
if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
/*
* At the soft limit, send a SIGXCPU every second.
*/
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk, NULL);
if (sig->rlim[RLIMIT_CPU].rlim_cur
< sig->rlim[RLIMIT_CPU].rlim_max) {
sig->rlim[RLIMIT_CPU].rlim_cur++;
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc6..28a48ee 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -28,6 +28,7 @@
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
#include <trace/sched.h>
+#include <linux/hardirq.h>
#include <asm/param.h>
#include <asm/uaccess.h>
@@ -798,8 +799,40 @@ static inline int legacy_queue(struct sigpending *signals, int sig)
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
+static void set_sigqueue_pid(struct sigqueue *q, struct task_struct *t,
+ struct pid *sender)
+{
+ struct pid_namespace *ns;
+
+ /* Set si_pid to the pid number of sender in the pid namespace of
+ * our destination task for all siginfo types that support it.
+ */
+ switch(q->info.si_code & __SI_MASK) {
+ /* siginfo without si_pid */
+ case __SI_TIMER:
+ case __SI_POLL:
+ case __SI_FAULT:
+ break;
+ /* siginfo with si_pid */
+ case __SI_KILL:
+ case __SI_CHLD:
+ case __SI_RT:
+ case __SI_MESGQ:
+ default:
+ /* si_pid for SI_KERNEL is always 0 */
+ if (q->info.si_code == SI_KERNEL || in_interrupt())
+ break;
+ /* Is current not the sending task? */
+ if (!sender)
+ sender = task_tgid(current);
+ ns = task_active_pid_ns(t);
+ q->info.si_pid = pid_nr_ns(sender, ns);
+ break;
+ }
+}
+
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
- int group)
+ int group, struct pid *sender)
{
struct sigpending *pending;
struct sigqueue *q;
@@ -843,7 +876,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_USER;
- q->info.si_pid = task_pid_vnr(current);
+ q->info.si_pid = 0; /* Uses current tgid */
q->info.si_uid = current->uid;
break;
case (unsigned long) SEND_SIG_PRIV:
@@ -857,6 +890,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
copy_siginfo(&q->info, info);
break;
}
+ set_sigqueue_pid(q, t, sender);
} else if (!is_si_special(info)) {
if (sig >= SIGRTMIN && info->si_code != SI_USER)
/*
@@ -906,15 +940,16 @@ static int __init setup_print_fatal_signals(char *str)
__setup("print-fatal-signals=", setup_print_fatal_signals);
int
-__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+ struct pid *sender)
{
- return send_signal(sig, info, p, 1);
+ return send_signal(sig, info, p, 1, sender);
}
static int
specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
- return send_signal(sig, info, t, 0);
+ return send_signal(sig, info, t, 0, NULL);
}
/*
@@ -1018,7 +1053,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
if (!ret && sig) {
ret = -ESRCH;
if (lock_task_sighand(p, &flags)) {
- ret = __group_send_sig_info(sig, info, p);
+ ret = __group_send_sig_info(sig, info, p, NULL);
unlock_task_sighand(p, &flags);
}
}
@@ -1108,7 +1143,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
if (sig && p->sighand) {
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
- ret = __group_send_sig_info(sig, info, p);
+ ret = __group_send_sig_info(sig, info, p, NULL);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
out_unlock:
@@ -1344,6 +1379,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
struct sighand_struct *psig;
struct task_cputime cputime;
int ret = sig;
+ struct pid *sender;
BUG_ON(sig == -1);
@@ -1353,24 +1389,11 @@ int do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ /* We are under tasklist_lock so no need to call get_pid */
+ sender = task_pid(tsk);
info.si_signo = sig;
info.si_errno = 0;
- /*
- * we are under tasklist_lock here so our parent is tied to
- * us and cannot exit and release its namespace.
- *
- * the only it can is to switch its nsproxy with sys_unshare,
- * bu uncharing pid namespaces is not allowed, so we'll always
- * see relevant namespace
- *
- * write_lock() currently calls preempt_disable() which is the
- * same as rcu_read_lock(), but according to Oleg, this is not
- * correct to rely on this
- */
- rcu_read_lock();
- info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
- rcu_read_unlock();
-
+ info.si_pid = 0; /* Filled in later from sender */
info.si_uid = tsk->uid;
thread_group_cputime(tsk, &cputime);
@@ -1412,7 +1435,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
sig = -1;
}
if (valid_signal(sig) && sig > 0)
- __group_send_sig_info(sig, &info, tsk->parent);
+ __group_send_sig_info(sig, &info, tsk->parent, sender);
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);
@@ -1425,6 +1448,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
unsigned long flags;
struct task_struct *parent;
struct sighand_struct *sighand;
+ struct pid *sender;
if (tsk->ptrace & PT_PTRACED)
parent = tsk->parent;
@@ -1433,15 +1457,11 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
parent = tsk->real_parent;
}
+ /* We are under tasklist_lock so no need to call get_pid */
+ sender = task_pid(tsk);
info.si_signo = SIGCHLD;
info.si_errno = 0;
- /*
- * see comment in do_notify_parent() abot the following 3 lines
- */
- rcu_read_lock();
- info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
- rcu_read_unlock();
-
+ info.si_pid = 0; /* Filled in later from sender */
info.si_uid = tsk->uid;
info.si_utime = cputime_to_clock_t(tsk->utime);
@@ -1466,7 +1486,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
spin_lock_irqsave(&sighand->siglock, flags);
if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
!(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
- __group_send_sig_info(SIGCHLD, &info, parent);
+ __group_send_sig_info(SIGCHLD, &info, parent, sender);
/*
* Even if SIGCHLD is not generated, we must wake up wait4 calls.
*/
@@ -2210,7 +2230,7 @@ sys_kill(pid_t pid, int sig)
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_USER;
- info.si_pid = task_tgid_vnr(current);
+ info.si_pid = 0; /* Uses default current tgid */
info.si_uid = current->uid;
return kill_something_info(sig, &info, pid);
@@ -2227,7 +2247,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
- info.si_pid = task_tgid_vnr(current);
+ info.si_pid = 0; /* Uses default current tgid */
info.si_uid = current->uid;
rcu_read_lock();
--
1.5.2.5
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list