[CRIU] [PATCH RFC] seize: don't wory if a cgroup contains some extra tasks

Andrew Vagin avagin at odin.com
Thu Oct 29 07:43:04 PDT 2015


On Thu, Oct 29, 2015 at 04:50:49PM +0300, Pavel Emelyanov wrote:
> On 10/27/2015 07:19 PM, Andrey Vagin wrote:
> > From: Andrew Vagin <avagin at openvz.org>
> > 
> > A freezer cgroup can contain tasks which will be not dumped,
> > criu unfreezes the group, so we need to freeze all extra
> > task with ptrace like we do for target tasks.
> > 
> > Currently we attache and send an interrupt signals to these tasks,
> > but we don't call waitpid() for them, so then waitpid(-1, ...)
> > returns these tasks where we don't expect to see them.
> > 
> > Signed-off-by: Andrew Vagin <avagin at openvz.org>
> > ---
> >  include/ptrace.h |  2 ++
> >  ptrace.c         |  2 ++
> >  seize.c          | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 68 insertions(+)
> > 
> > diff --git a/include/ptrace.h b/include/ptrace.h
> > index 079ad63..a3852ce 100644
> > --- a/include/ptrace.h
> > +++ b/include/ptrace.h
> > @@ -67,6 +67,8 @@ struct ptrace_peeksiginfo_args {
> >  
> >  #define SI_EVENT(_si_code)	(((_si_code) & 0xFFFF) >> 8)
> >  
> > +extern int frozen_processes;
> > +
> >  extern int seize_catch_task(pid_t pid);
> >  extern int seize_wait_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds);
> >  extern int suspend_seccomp(pid_t pid);
> > diff --git a/ptrace.c b/ptrace.c
> > index b28824b..922fc87 100644
> > --- a/ptrace.c
> > +++ b/ptrace.c
> > @@ -120,6 +120,8 @@ int seize_wait_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds)
> >  try_again:
> >  
> >  	ret = wait4(pid, &status, __WALL, NULL);
> > +	if (ret > 0)
> > +		frozen_processes--;
> 
> What if we goto try_again() below and wait4 the same task twice?

I will check. Thanks.

> 
> >  	wait_errno = errno;
> >  
> >  	ret2 = parse_pid_status(pid, &cr);
> > diff --git a/seize.c b/seize.c
> > index 20c0349..e3cd2b4 100644
> > --- a/seize.c
> > +++ b/seize.c
> > @@ -84,6 +84,63 @@ int freeze_cgroup()
> >  	return 0;
> >  }
> >  
> > +/* A number of tasks in a freezer cgroup which are not going to be dumped */
> > +int frozen_processes;
> > +
> > +/*
> > + * A freezer cgroup can contain tasks which will not be dumped
> > + * and we need to wait them, because the are interupted them by ptrace.
> > + */
> > +static int freezer_wait_processes()
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < frozen_processes; i++) {
> > +		int status;
> > +		pid_t pid;
> > +
> > +		/*
> > +		 * Here we are going to skip tasks which are already traced.
> > +		 * Ptraced tasks looks like children for us, so if
> > +		 * a task isn't ptraced yet, waitpid() will return a error.
> > +		 */
> > +		pid = waitpid(-1, &status, 0);
> > +		if (pid < 0) {
> > +			pr_perror("Unable to wait processes");
> > +			return -1;
> > +		}
> > +		pr_warn("Unexpected process %d in the freezer cgroup (status 0x%x)\n", pid, status);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int freezer_detach(void)
> > +{
> > +	char path[PATH_MAX];
> > +	FILE *f;
> > +
> > +	if (!frozen_processes)
> > +		return 0;
> > +
> > +	snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
> > +	f = fopen(path, "r");
> > +	if (f == NULL) {
> > +		pr_perror("Unable to open %s", path);
> > +		return -1;
> > +	}
> > +	while (fgets(path, sizeof(path), f)) {
> > +		pid_t pid;
> > +
> > +		pid = atoi(path);
> > +
> > +		if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
> > +			pr_perror("Unable to detach from %d\n", pid);
> 
> Why do we need to manually detach from everybody?

We do this for other tasks. We want to check that we collect all
children.

> 
> > +	}
> > +	fclose(f);
> > +	return 0;
> > +}
> > +
> >  static int freeze_processes(void)
> >  {
> >  	int i, ret, fd, exit_code = -1;
> > @@ -159,6 +216,7 @@ static int freeze_processes(void)
> >  				fclose(f);
> >  				goto err;
> >  			}
> > +			frozen_processes++;
> 
> Why? Why does the ptrace attach failure result in frozen_processes++?

I don't notice that the previous if checks an error code and state. I
will fix. Thanks.

> 
> >  		}
> >  		fclose(f);
> >  
> > @@ -324,6 +382,9 @@ static void pstree_wait(struct pstree_item *root_item)
> >  			}
> >  		}
> >  	}
> > +
> > +	if (!opts.freeze_cgroup)
> > +		freezer_detach();
> >  	pid = wait4(-1, &status, __WALL, NULL);
> >  	if (pid > 0) {
> >  		pr_err("Unexpected child %d\n", pid);
> > @@ -536,6 +597,9 @@ int collect_pstree(pid_t pid)
> >  	if (ret < 0)
> >  		goto err;
> >  
> > +	if (opts.freeze_cgroup && freezer_wait_processes())
> > +		return -1;
> > +
> >  	timing_stop(TIME_FREEZING);
> >  	timing_start(TIME_FROZEN);
> >  
> > 
> 


More information about the CRIU mailing list