[Devel] Re: [PATCH 1/2] signal checkpoint: define /proc/pid/sig/

Cedric Le Goater clg at fr.ibm.com
Mon Jun 11 07:43:12 PDT 2007


Serge E. Hallyn wrote:
> As I mentioned earlier, I don't know what sort of approach we want
> to take to guide checkpoint and restart.  I.e. do we want it to be
> a mostly userspace-orchestrated affair, or entirely done in the
> kernel using the freezer or some other mechanism in response to a
> single syscall or containerfs file write?
> 
> If we wanted to do a lot of the work in userspace, here is a pair of
> patches to read and restore signal information.  It's entirely unsafe
> wrt locking, bc i would assume that if we did in fact do c/r from
> userspace, we would have some way of entirely pulling the task off
> the runqueue while doing our thing...
> 
> Anyway, this is purely to start discussion.
> 
> thanks,
> -serge
> 
>>From 30bbe322942e5ed86bad63861dad80595cd04063 Mon Sep 17 00:00:00 2001
> From: Serge E. Hallyn <serue at us.ibm.com>
> Date: Mon, 30 Apr 2007 16:22:44 -0400
> Subject: [PATCH 1/2] signal checkpoint: define /proc/pid/sig/
> 
> Define /proc/<pid>/sig/ directory containing files to report
> on a process' signal info.
> 
> Files defined:
> 	action: list signal action
> 	altstack: print sigaltstack location and size
> 	blocked: print blocked signal mask
> 	pending: print pending signals and siginfo
> 	shared_pending: print shared pending signals and siginfo
> 	waiters: list tasks wait4()ing on task PID.

some of these are already in /proc/<pid>/status and /proc/<pid>/stat 

should we continue to use /proc ? or switch to some other mechanisms
like getnetlink (taskstats) to map kernel structures.

[ ... ]

> +/*
> + * print a sigset_t to a buffer.  Return # characters printed,
> + * not including the final ending '\0'.
> + */
> +static int print_sigset(char *buf, sigset_t *sig)
> +{
> +	int i;
> +
> +	for (i=0; i<_NSIG; i++) {
> +		if (sigismember(sig, i))
> +			buf[i] = '1';
> +		else
> +			buf[i] = '0';
> +	}
> +	buf[_NSIG] = '\0';
> +
> +	return _NSIG;
> +}

you might want to use render_sigset_t() in fs/proc array.C

> +static int print_sigset_alloc(char **bufp, sigset_t *sig)
> +{
> +	char *buf;
> +
> +	*bufp = buf = kmalloc(_NSIG+1, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	return print_sigset(buf, sig);
> +}
> +
> +static int print_wait_info(char **bufp, struct signal_struct *signal)
> +{
> +	char *buf;
> +	wait_queue_t *wait;
> +
> +	*bufp = buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	spin_lock(&signal->wait_chldexit.lock);
> +
> +	list_for_each_entry(wait, &signal->wait_chldexit.task_list, task_list) {
> +		struct task_struct *tsk = wait->private;
> +
> +		if (buf - *bufp +50 > PAGE_SIZE) {
> +			spin_unlock(&signal->wait_chldexit.lock);
> +			kfree(buf);
> +			*bufp = NULL;
> +			return -ENOMEM;
> +		}
> +		WARN_ON(wait->func != default_wake_function);
> +		buf += sprintf(buf, "%u %d\n", wait->flags, tsk->pid);
> +	}
> +
> +	spin_unlock(&signal->wait_chldexit.lock);
> +
> +	return buf-*bufp;
> +}
> +
> +static int print_sigpending_alloc(char **bufp, struct sigpending *pending)
> +{
> +	int alloced=0;
> +	char *buf, *p;
> +	struct sigqueue *q;
> +	struct siginfo *info;
> +
> +	alloced = PAGE_SIZE;
> +	p = buf = kmalloc(alloced, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +	
> +	p += print_sigset(buf, &pending->signal);
> +	p += sprintf(p, "\n");
> +
> +	list_for_each_entry(q, &pending->list, list) {
> +		info = &q->info;
> +		if (p-buf+215 > alloced) {
> +			int len=p-buf;
> +			char *buf2;
> +			alloced += PAGE_SIZE;
> +			buf2 = kmalloc(alloced, GFP_KERNEL);
> +			if (!buf2) {
> +				kfree(buf);
> +				return -ENOMEM;
> +			}
> +			memcpy(buf2, buf, alloced - PAGE_SIZE);
> +			kfree(buf);
> +			buf = buf2;
> +			p = buf+len;
> +		}
> +
> +		p += sprintf(p, "sig %d: user %d flags %d",
> +			info->si_signo, (int)q->user->uid, q->flags);
> +		p += sprintf(p, " errno %d code %d\n",
> +			info->si_errno, info->si_code);
> +
> +		switch(info->si_signo) {
> +		case SIGKILL:
> +			p += sprintf(p, "  spid %d suid %d\n",
> +				info->_sifields._kill._pid,
> +				info->_sifields._kill._uid);
> +			break;
> +		/* XXX skipping posix1b timers and signals for now */
> +		case SIGCHLD:
> +			p += sprintf(p, "  pid %d uid %d status %d utime %lu stime %lu\n",
> +				info->_sifields._sigchld._pid,
> +				info->_sifields._sigchld._uid,
> +				info->_sifields._sigchld._status,
> +				info->_sifields._sigchld._utime,
> +				info->_sifields._sigchld._stime);
> +			break;
> +		case SIGILL:
> +		case SIGFPE:
> +		case SIGSEGV:
> +		case SIGBUS:
> +#ifdef __ARCH_SI_TRAPNO
> +			p += sprintf(p, "  addr %lu trapno %d\n",
> +				(unsigned long)info->_sifields._sigfault._addr,
> +				info->_sifields._sigfault._trapno);
> +#else
> +			p += sprintf(p, "  addr %lu\n",
> +				(unsigned long)info->_sifields._sigfault._addr);
> +#endif
> +			break;
> +		case SIGPOLL:
> +			p += sprintf(p, "  band %ld fd %d\n",
> +				(long)info->_sifields._sigpoll._band,
> +				info->_sifields._sigpoll._fd);
> +			break;
> +		default:
> +			p += sprintf(p, "  Unsupported siginfo for signal %d\n",
> +				info->si_signo);
> +			break;
> +		}
> +	}
> +	*bufp = buf;
> +	return p-buf;
> +}

I think we are reaching the limit of /proc when we expose the pending siginfos.

> +static int print_sigaction_list(char **bufp, struct sighand_struct *sighand)
> +{
> +	struct k_sigaction *action;
> +	int maxlen;
> +	int i;
> +	char *buf;
> +
> +	/* two unsigned longs (20 chars), one int (10 chars), a sigset_t, 3 spaces, plus a newline */
> +	maxlen = 10 + 20*2 + _NSIG + 4;
> +	/* and we have _NSIG of those entries, plus an ending \0 */
> +	maxlen *= _NSIG+1;
> +
> +	*bufp = buf = kmalloc(maxlen, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	spin_lock(&sighand->siglock);
> +	for (i=0; i<_NSIG; i++) {
> +		action = &sighand->action[i];
> +		buf += sprintf(buf, "%10d %20lu ", i, action->sa.sa_flags);
> +		buf += print_sigset(buf, &action->sa.sa_mask);
> +		buf += sprintf(buf, " %20lu\n", (unsigned long)action->sa.sa_handler);
> +		BUG_ON(buf-*bufp > maxlen);
> +	}
> +	spin_unlock(&sighand->siglock);
> +	return buf-*bufp;
> +}
> +
> +int task_read_procsig(struct task_struct *p, char *name, char **value)
> +{
> +	int ret;
> +
> +	if (current != p) {
> +		ret = security_ptrace(current, p);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = -EINVAL;
> +
> +	if (strcmp(name, "pending") == 0) {
> +		/* not masking out blocked signals  yet */
> +		ret = print_sigpending_alloc(value, &p->pending);
> +	} else if (strcmp(name, "blocked") == 0) {
> +		/* not masking out blocked signals  yet */
> +		ret = print_sigset_alloc(value, &p->blocked);
> +	} else if (strcmp(name, "shared_pending") == 0) {
> +		ret = print_sigpending_alloc(value, &p->signal->shared_pending);
> +	} else if (strcmp(name, "waiters") == 0) {
> +		ret = print_wait_info(value, p->signal);
> +	} else if (strcmp(name, "action") == 0) {
> +		ret = print_sigaction_list(value, p->sighand);
> +	}  else if (strcmp(name, "altstack") == 0) {
> +		*value = kmalloc(40, GFP_KERNEL);
> +		ret = -ENOMEM;
> +		if (*value) {
> +			ret = sprintf(*value, "%lu %zd", p->sas_ss_sp, p->sas_ss_size);
> +		}
> +	}
> +
> +	return ret;
> +}

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list