[CRIU] [PATCH] dump: check for conflicts with the dead processes
Pavel Emelyanov
xemul at virtuozzo.com
Thu Mar 3 06:12:29 PST 2016
On 03/03/2016 04:17 PM, Evgenii Shatokhin wrote:
> It may happen that a process has completed but its files in /proc/PID/
> are still open by another process (see remap_dead_pid test from zdtm
> suite, for example).
>
> If the PID number has been given to some newer thread since then, this
> can be problematic. If that thread is the main thread of some process,
> it seems to be handled OK on restore. However, if it is a secondary
> thread, restore fails with an error like:
>
> pie: Error (pie/restorer.c:439): Thread pid mismatch 4404/4403
>
> This is because open_remap_dead_process() adds a helper with PID 4403 to
> restore /proc/4403/* and that clashes with the thread's PID.
>
> It seems reasonable to detect such things at the checkpoint stage and
> refuse to dump, rather than to fail during restore.
>
> https://jira.sw.ru/browse/PSBM-44217
>
> Signed-off-by: Evgenii Shatokhin <eshatokhin at virtuozzo.com>
> ---
> criu/cr-dump.c | 29 +++++++++++++++++++++++++++++
> criu/files-reg.c | 16 ++++++++++++----
> criu/include/files-reg.h | 1 +
> 3 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 4ec2a55..586201e 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1170,6 +1170,26 @@ err_cure:
> goto err_free;
> }
>
> +static int dead_pid_conflict(struct pstree_item *item)
> +{
> + int i;
> +
> + for (i = 0; i < item->nr_threads; i++) {
> + /*
> + * If the dead PID was given to a main thread of another
> + * process, this is handled during restore.
> + */
> + if (item->pid.real == item->threads[i].real ||
> + !dead_pid_exists(item->threads[i].virt))
> + continue;
> +
> + pr_err("Conflict with a dead task with the same PID as of this thread (virt %d, real %d).\n",
> + item->threads[i].virt, item->threads[i].real);
> + return -1;
> + }
> + return 0;
> +}
> +
> static int dump_one_task(struct pstree_item *item)
> {
> pid_t pid = item->pid.real;
> @@ -1348,6 +1368,15 @@ static int dump_one_task(struct pstree_item *item)
> goto err;
> }
>
> + /*
> + * It may happen that a process has completed but its files in
> + * /proc/PID/ are still open by another process. If the PID has been
> + * given to some newer thread since then, we may be unable to dump
> + * all this.
> + */
> + if (dead_pid_conflict(item))
The dead_pid_conflict() checks for dead_pids array which, in turn, gets filled
eventually by calling dump_one_task() on different tasks in a list. Thus at
any time you may pass this check for some pid, that would be added there later.
-- Pavel
> + goto err_cure;
> +
> ret = parasite_cure_seized(parasite_ctl);
> if (ret) {
> pr_err("Can't cure (pid: %d) from parasite\n", pid);
> diff --git a/criu/files-reg.c b/criu/files-reg.c
> index 511abfe..3bf1d9e 100644
> --- a/criu/files-reg.c
> +++ b/criu/files-reg.c
> @@ -805,16 +805,24 @@ static int dump_linked_remap(char *path, int len, const struct stat *ost,
> &rpe, PB_REMAP_FPATH);
> }
>
> -static int have_seen_dead_pid(pid_t pid)
> +pid_t *dead_pids;
> +int n_dead_pids;
> +
> +int dead_pid_exists(pid_t pid)
> {
> - static pid_t *dead_pids = NULL;
> - static int n_dead_pids = 0;
> - size_t i;
> + int i;
>
> for (i = 0; i < n_dead_pids; i++) {
> if (dead_pids[i] == pid)
> return 1;
> }
> + return 0;
> +}
> +
> +static int have_seen_dead_pid(pid_t pid)
> +{
> + if (dead_pid_exists(pid))
> + return 1;
>
> if (xrealloc_safe(&dead_pids, sizeof(*dead_pids) * (n_dead_pids + 1)))
> return -1;
> diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h
> index 1cbefb0..0740fe4 100644
> --- a/criu/include/files-reg.h
> +++ b/criu/include/files-reg.h
> @@ -55,5 +55,6 @@ extern void try_clean_remaps(int ns_fd);
> extern int strip_deleted(struct fd_link *link);
>
> extern int prepare_procfs_remaps(void);
> +extern int dead_pid_exists(pid_t pid);
>
> #endif /* __CR_FILES_REG_H__ */
>
More information about the CRIU
mailing list