[CRIU] [PATCH v5 4/8] memory: don't use parent memdump if detected possible pid reuse

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Fri Feb 16 11:16:15 MSK 2018


Please drop v5.

On 02/16/2018 10:55 AM, Pavel Tikhomirov wrote:
> From: ptikhomirov <ptikhomirov at virtuozzo.com>
> 
> We have a problem when a pid is reused between consequent dumps we can't
> understand if pagemap and pages from images of parent dump are invalid
> to restore these pid already. That can lead even to wrong memory
> restored for these pid, see the test in last patch.
> 
> So these is a try do separate processes with (likely) invalid previous
> memory dump from processes with 100% valid previous dump.
> 
> For that we use the value of /proc/<pid>/stat's start_time and also the
> timestamp of each (pre)dump. If the start time is strictly less than the
> timestamp, that means that the pagemap for these pid from previous dump
> is valid - was done for exactly the same process.
> 
> Creation time is in centiseconds by default so if predump is really fast
> (<1csec) we can have false negative decisions for some processes, but in
> case of long running processes we are fine.
> 
> https://jira.sw.ru/browse/PSBM-67502
> 
> v2: remove __maybe_unused for get_parent_stats; fix get_parent_stats to
> have static typing; print warning only if unsure; check has_dump_uptime
> v3: read parent stats from image only once; reuse stat from previous
> parse_pid_stat call on dump
> v4: move code to function; use unsigned long long for ticks; put
> proc_pid_stat on mem_dump_ctl; print warning on all pid-reuse cases
> v5: free parent's stats entry properly, pass it in arguments to
> (pre_)dump_one_task
> 
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
>   criu/cr-dump.c     | 26 +++++++++++++++++++++----
>   criu/include/mem.h |  9 +++++++--
>   criu/mem.c         | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   criu/stats.c       |  2 +-
>   4 files changed, 86 insertions(+), 8 deletions(-)
> 
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 9e8425504..354818fb4 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -1229,7 +1229,7 @@ static int assign_parasite_pids(struct pstree_item *item, struct parasite_dump_m
>   	return 0;
>   }
>   
> -static int pre_dump_one_task(struct pstree_item *item)
> +static int pre_dump_one_task(struct pstree_item *item, StatsEntry *parent_se)
>   {
>   	pid_t pid = item->pid->real;
>   	struct vm_area_list vmas;
> @@ -1289,6 +1289,8 @@ static int pre_dump_one_task(struct pstree_item *item)
>   
>   	mdc.pre_dump = true;
>   	mdc.lazy = false;
> +	mdc.stat = NULL;
> +	mdc.parent_se = parent_se;
>   
>   	ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl);
>   	if (ret)
> @@ -1307,7 +1309,7 @@ static int pre_dump_one_task(struct pstree_item *item)
>   	goto err_free;
>   }
>   
> -static int dump_one_task(struct pstree_item *item)
> +static int dump_one_task(struct pstree_item *item, StatsEntry *parent_se)
>   {
>   	pid_t pid = item->pid->real;
>   	struct vm_area_list vmas;
> @@ -1448,6 +1450,8 @@ static int dump_one_task(struct pstree_item *item)
>   
>   	mdc.pre_dump = false;
>   	mdc.lazy = opts.lazy_pages;
> +	mdc.stat = &pps_buf;
> +	mdc.parent_se = parent_se;
>   
>   	ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl);
>   	if (ret)
> @@ -1635,6 +1639,7 @@ static int cr_pre_dump_finish(int ret)
>   int cr_pre_dump_tasks(pid_t pid)
>   {
>   	struct pstree_item *item;
> +	StatsEntry *parent_se;
>   	int ret = -1;
>   
>   	if (opts.remote && push_snapshot_id() < 0) {
> @@ -1698,10 +1703,16 @@ int cr_pre_dump_tasks(pid_t pid)
>   	if (collect_namespaces(false) < 0)
>   		goto err;
>   
> +	/* Errors handled later in detect_pid_reuse */
> +	parent_se = get_parent_stats();
> +
>   	for_each_pstree_item(item)
> -		if (pre_dump_one_task(item))
> +		if (pre_dump_one_task(item, parent_se))
>   			goto err;
>   
> +	if (parent_se)
> +		stats_entry__free_unpacked(parent_se, NULL);
> +
>   	ret = cr_dump_shmem();
>   	if (ret)
>   		goto err;
> @@ -1832,6 +1843,7 @@ int cr_dump_tasks(pid_t pid)
>   {
>   	InventoryEntry he = INVENTORY_ENTRY__INIT;
>   	struct pstree_item *item;
> +	StatsEntry *parent_se;
>   	int pre_dump_ret = 0;
>   	int ret = -1;
>   
> @@ -1925,11 +1937,17 @@ int cr_dump_tasks(pid_t pid)
>   	if (collect_seccomp_filters() < 0)
>   		goto err;
>   
> +	/* Errors handled later in detect_pid_reuse */
> +	parent_se = get_parent_stats();
> +
>   	for_each_pstree_item(item) {
> -		if (dump_one_task(item))
> +		if (dump_one_task(item, parent_se))
>   			goto err;
>   	}
>   
> +	if (parent_se)
> +		stats_entry__free_unpacked(parent_se, NULL);
> +
>   	/*
>   	 * It may happen that a process has completed but its files in
>   	 * /proc/PID/ are still open by another process. If the PID has been
> diff --git a/criu/include/mem.h b/criu/include/mem.h
> index bb897c599..e61e175a8 100644
> --- a/criu/include/mem.h
> +++ b/criu/include/mem.h
> @@ -4,6 +4,9 @@
>   #include <stdbool.h>
>   #include "int.h"
>   #include "vma.pb-c.h"
> +#include "pid.h"
> +#include "proc_parse.h"
> +#include "stats.pb-c.h"
>   
>   struct parasite_ctl;
>   struct vm_area_list;
> @@ -12,8 +15,10 @@ struct pstree_item;
>   struct vma_area;
>   
>   struct mem_dump_ctl {
> -	bool	pre_dump;
> -	bool	lazy;
> +	bool			pre_dump;
> +	bool			lazy;
> +	struct proc_pid_stat	*stat;
> +	StatsEntry		*parent_se;
>   };
>   
>   extern bool vma_has_guard_gap_hidden(struct vma_area *vma);
> diff --git a/criu/mem.c b/criu/mem.c
> index 4c6942a11..63df25d54 100644
> --- a/criu/mem.c
> +++ b/criu/mem.c
> @@ -30,9 +30,11 @@
>   #include "fault-injection.h"
>   #include "prctl.h"
>   #include <compel/compel.h>
> +#include "proc_parse.h"
>   
>   #include "protobuf.h"
>   #include "images/pagemap.pb-c.h"
> +#include "images/stats.pb-c.h"
>   
>   static int task_reset_dirty_track(int pid)
>   {
> @@ -290,6 +292,50 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
>   	return ret;
>   }
>   
> +static int detect_pid_reuse(struct pstree_item *item,
> +			    struct proc_pid_stat* pps,
> +			    StatsEntry *parent_se)
> +{
> +	struct proc_pid_stat pps_buf;
> +	unsigned long long tps; /* ticks per second */
> +	int ret;
> +
> +	tps = sysconf(_SC_CLK_TCK);
> +	if (tps == -1) {
> +		pr_perror("Failed to get clock ticks via sysconf");
> +		return -1;
> +	}
> +
> +	if (!pps) {
> +		pps = &pps_buf;
> +		ret = parse_pid_stat(item->pid->real, pps);
> +		if (ret < 0)
> +			return -1;
> +	}
> +
> +	if (!parent_se) {
> +		pr_perror("No parent stats, see errors in get_parent_stats");
> +		return -1;
> +	}
> +
> +	if (parent_se->dump->has_dump_uptime) {
> +		unsigned long long dump_ticks;
> +
> +		dump_ticks = parent_se->dump->dump_uptime/(USEC_PER_SEC/tps);
> +
> +		if (pps->start_time >= dump_ticks) {
> +			/* Print "*" if unsure */
> +			pr_warn("Pid reuse%s detected for pid %d\n",
> +				pps_buf.start_time == dump_ticks ? "*" : "",
> +				item->pid->real);
> +			return 1;
> +		}
> +	} else
> +		pr_warn_once("Parent image has no dump timestamp, " \
> +			     "pid reuse detection is OFF!\n");
> +	return 0;
> +}
> +
>   static int __parasite_dump_pages_seized(struct pstree_item *item,
>   		struct parasite_dump_pages_args *args,
>   		struct vm_area_list *vma_area_list,
> @@ -303,6 +349,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   	int ret = -1;
>   	unsigned cpp_flags = 0;
>   	unsigned long pmc_size;
> +	int possible_pid_reuse = 0;
>   
>   	if (opts.check_only)
>   		return 0;
> @@ -360,6 +407,14 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   			xfer.parent = NULL + 1;
>   	}
>   
> +	if (xfer.parent) {
> +		possible_pid_reuse = detect_pid_reuse(item, mdc->stat,
> +						      mdc->parent_se);
> +		if (possible_pid_reuse == -1)
> +			goto out_xfer;
> +	}
> +
> +
>   	/*
>   	 * Step 1 -- generate the pagemap
>   	 */
> @@ -386,7 +441,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>   		else {
>   again:
>   			ret = generate_iovs(vma_area, pp, map, &off,
> -				has_parent);
> +				has_parent && !possible_pid_reuse);
>   			if (ret == -EAGAIN) {
>   				BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>   
> diff --git a/criu/stats.c b/criu/stats.c
> index 4823e48de..4d646c104 100644
> --- a/criu/stats.c
> +++ b/criu/stats.c
> @@ -213,7 +213,7 @@ void write_stats(int what)
>   		display_stats(what, &stats);
>   }
>   
> -__maybe_unused StatsEntry *get_parent_stats(void)
> +StatsEntry *get_parent_stats(void)
>   {
>   	struct cr_img *img;
>   	StatsEntry *se;
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the CRIU mailing list