[Devel] Re: [PATCH 1/2] signal checkpoint: define /proc/pid/sig/
Cedric Le Goater
clg at fr.ibm.com
Mon Jun 11 07:43:12 PDT 2007
Serge E. Hallyn wrote:
> As I mentioned earlier, I don't know what sort of approach we want
> to take to guide checkpoint and restart. I.e. do we want it to be
> a mostly userspace-orchestrated affair, or entirely done in the
> kernel using the freezer or some other mechanism in response to a
> single syscall or containerfs file write?
>
> If we wanted to do a lot of the work in userspace, here is a pair of
> patches to read and restore signal information. It's entirely unsafe
> wrt locking, bc i would assume that if we did in fact do c/r from
> userspace, we would have some way of entirely pulling the task off
> the runqueue while doing our thing...
>
> Anyway, this is purely to start discussion.
>
> thanks,
> -serge
>
>>From 30bbe322942e5ed86bad63861dad80595cd04063 Mon Sep 17 00:00:00 2001
> From: Serge E. Hallyn <serue at us.ibm.com>
> Date: Mon, 30 Apr 2007 16:22:44 -0400
> Subject: [PATCH 1/2] signal checkpoint: define /proc/pid/sig/
>
> Define /proc/<pid>/sig/ directory containing files to report
> on a process' signal info.
>
> Files defined:
> action: list signal action
> altstack: print sigaltstack location and size
> blocked: print blocked signal mask
> pending: print pending signals and siginfo
> shared_pending: print shared pending signals and siginfo
> waiters: list tasks wait4()ing on task PID.
some of these are already in /proc/<pid>/status and /proc/<pid>/stat
should we continue to use /proc ? or switch to some other mechanisms
like getnetlink (taskstats) to map kernel structures.
[ ... ]
> +/*
> + * print a sigset_t to a buffer. Return # characters printed,
> + * not including the final ending '\0'.
> + */
> +static int print_sigset(char *buf, sigset_t *sig)
> +{
> + int i;
> +
> + for (i=0; i<_NSIG; i++) {
> + if (sigismember(sig, i))
> + buf[i] = '1';
> + else
> + buf[i] = '0';
> + }
> + buf[_NSIG] = '\0';
> +
> + return _NSIG;
> +}
you might want to use render_sigset_t() in fs/proc array.C
> +static int print_sigset_alloc(char **bufp, sigset_t *sig)
> +{
> + char *buf;
> +
> + *bufp = buf = kmalloc(_NSIG+1, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + return print_sigset(buf, sig);
> +}
> +
> +static int print_wait_info(char **bufp, struct signal_struct *signal)
> +{
> + char *buf;
> + wait_queue_t *wait;
> +
> + *bufp = buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + spin_lock(&signal->wait_chldexit.lock);
> +
> + list_for_each_entry(wait, &signal->wait_chldexit.task_list, task_list) {
> + struct task_struct *tsk = wait->private;
> +
> + if (buf - *bufp +50 > PAGE_SIZE) {
> + spin_unlock(&signal->wait_chldexit.lock);
> + kfree(buf);
> + *bufp = NULL;
> + return -ENOMEM;
> + }
> + WARN_ON(wait->func != default_wake_function);
> + buf += sprintf(buf, "%u %d\n", wait->flags, tsk->pid);
> + }
> +
> + spin_unlock(&signal->wait_chldexit.lock);
> +
> + return buf-*bufp;
> +}
> +
> +static int print_sigpending_alloc(char **bufp, struct sigpending *pending)
> +{
> + int alloced=0;
> + char *buf, *p;
> + struct sigqueue *q;
> + struct siginfo *info;
> +
> + alloced = PAGE_SIZE;
> + p = buf = kmalloc(alloced, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + p += print_sigset(buf, &pending->signal);
> + p += sprintf(p, "\n");
> +
> + list_for_each_entry(q, &pending->list, list) {
> + info = &q->info;
> + if (p-buf+215 > alloced) {
> + int len=p-buf;
> + char *buf2;
> + alloced += PAGE_SIZE;
> + buf2 = kmalloc(alloced, GFP_KERNEL);
> + if (!buf2) {
> + kfree(buf);
> + return -ENOMEM;
> + }
> + memcpy(buf2, buf, alloced - PAGE_SIZE);
> + kfree(buf);
> + buf = buf2;
> + p = buf+len;
> + }
> +
> + p += sprintf(p, "sig %d: user %d flags %d",
> + info->si_signo, (int)q->user->uid, q->flags);
> + p += sprintf(p, " errno %d code %d\n",
> + info->si_errno, info->si_code);
> +
> + switch(info->si_signo) {
> + case SIGKILL:
> + p += sprintf(p, " spid %d suid %d\n",
> + info->_sifields._kill._pid,
> + info->_sifields._kill._uid);
> + break;
> + /* XXX skipping posix1b timers and signals for now */
> + case SIGCHLD:
> + p += sprintf(p, " pid %d uid %d status %d utime %lu stime %lu\n",
> + info->_sifields._sigchld._pid,
> + info->_sifields._sigchld._uid,
> + info->_sifields._sigchld._status,
> + info->_sifields._sigchld._utime,
> + info->_sifields._sigchld._stime);
> + break;
> + case SIGILL:
> + case SIGFPE:
> + case SIGSEGV:
> + case SIGBUS:
> +#ifdef __ARCH_SI_TRAPNO
> + p += sprintf(p, " addr %lu trapno %d\n",
> + (unsigned long)info->_sifields._sigfault._addr,
> + info->_sifields._sigfault._trapno);
> +#else
> + p += sprintf(p, " addr %lu\n",
> + (unsigned long)info->_sifields._sigfault._addr);
> +#endif
> + break;
> + case SIGPOLL:
> + p += sprintf(p, " band %ld fd %d\n",
> + (long)info->_sifields._sigpoll._band,
> + info->_sifields._sigpoll._fd);
> + break;
> + default:
> + p += sprintf(p, " Unsupported siginfo for signal %d\n",
> + info->si_signo);
> + break;
> + }
> + }
> + *bufp = buf;
> + return p-buf;
> +}
I think we are reaching the limit of /proc when we expose the pending siginfos.
> +static int print_sigaction_list(char **bufp, struct sighand_struct *sighand)
> +{
> + struct k_sigaction *action;
> + int maxlen;
> + int i;
> + char *buf;
> +
> + /* two unsigned longs (20 chars), one int (10 chars), a sigset_t, 3 spaces, plus a newline */
> + maxlen = 10 + 20*2 + _NSIG + 4;
> + /* and we have _NSIG of those entries, plus an ending \0 */
> + maxlen *= _NSIG+1;
> +
> + *bufp = buf = kmalloc(maxlen, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + spin_lock(&sighand->siglock);
> + for (i=0; i<_NSIG; i++) {
> + action = &sighand->action[i];
> + buf += sprintf(buf, "%10d %20lu ", i, action->sa.sa_flags);
> + buf += print_sigset(buf, &action->sa.sa_mask);
> + buf += sprintf(buf, " %20lu\n", (unsigned long)action->sa.sa_handler);
> + BUG_ON(buf-*bufp > maxlen);
> + }
> + spin_unlock(&sighand->siglock);
> + return buf-*bufp;
> +}
> +
> +int task_read_procsig(struct task_struct *p, char *name, char **value)
> +{
> + int ret;
> +
> + if (current != p) {
> + ret = security_ptrace(current, p);
> + if (ret)
> + return ret;
> + }
> +
> + ret = -EINVAL;
> +
> + if (strcmp(name, "pending") == 0) {
> + /* not masking out blocked signals yet */
> + ret = print_sigpending_alloc(value, &p->pending);
> + } else if (strcmp(name, "blocked") == 0) {
> + /* not masking out blocked signals yet */
> + ret = print_sigset_alloc(value, &p->blocked);
> + } else if (strcmp(name, "shared_pending") == 0) {
> + ret = print_sigpending_alloc(value, &p->signal->shared_pending);
> + } else if (strcmp(name, "waiters") == 0) {
> + ret = print_wait_info(value, p->signal);
> + } else if (strcmp(name, "action") == 0) {
> + ret = print_sigaction_list(value, p->sighand);
> + } else if (strcmp(name, "altstack") == 0) {
> + *value = kmalloc(40, GFP_KERNEL);
> + ret = -ENOMEM;
> + if (*value) {
> + ret = sprintf(*value, "%lu %zd", p->sas_ss_sp, p->sas_ss_size);
> + }
> + }
> +
> + return ret;
> +}
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list