[CRIU] [PATCH 3/3] dump: use freezer cgroup to seize processes (v2)
Christopher Covington
cov at codeaurora.org
Tue Aug 4 04:50:46 PDT 2015
On 08/03/2015 10:18 PM, Andrey Vagin wrote:
> Without using a freezer cgroup, we need to do a few iterations to catch
> all tasks, because a new tasks can be born. If new tasks appear faster
> than criu collects them, criu fails. The freezer cgroup allows to
> solve this problem.
>
> We freeze the freezer group, then attaches to tasks with ptrace and thaw
> the freezer cgroup. We suppose that all tasks which are going to be
> dumped in a specified freezer group.
>
> v2: fix comments from Christopher
>
> Cc: Christopher Covington <cov at codeaurora.org>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-exec.c | 2 +-
> crtools.c | 6 ++
> include/cr_options.h | 1 +
> include/ptrace.h | 5 +-
> proc_parse.c | 1 +
> ptrace.c | 54 +++++++++------
> seize.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++--
> 7 files changed, 226 insertions(+), 28 deletions(-)
>
> diff --git a/cr-exec.c b/cr-exec.c
> index f3d55f6..b149c38 100644
> --- a/cr-exec.c
> +++ b/cr-exec.c
> @@ -130,7 +130,7 @@ int cr_exec(int pid, char **opt)
> goto out;
> }
>
> - prev_state = ret = seize_task(pid, -1, &creds);
> + prev_state = ret = seize_task(pid, -1, &creds, PTRACE_FREEZE);
> if (ret < 0) {
> pr_err("Can't seize task %d\n", pid);
> goto out;
> diff --git a/crtools.c b/crtools.c
> index 6af6080..9096420 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[])
> { "enable-fs", required_argument, 0, 1065 },
> { "enable-external-sharing", no_argument, 0, 1066 },
> { "enable-external-masters", no_argument, 0, 1067 },
> + { "freeze-cgroup", required_argument, 0, 1068 },
> { },
> };
>
> @@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[])
> case 1067:
> opts.enable_external_masters = true;
> break;
> + case 1068:
> + opts.freeze_cgroup = optarg;
> + break;
> case 'M':
> {
> char *aux;
> @@ -676,6 +680,8 @@ usage:
> " 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
> " --exec-cmd execute the command specified after '--' on successful\n"
> " restore making it the parent of the restored process\n"
> +" --freeze-cgroup\n"
> +" use cgroup freezer to collect processes\n"
> "\n"
> "* Special resources support:\n"
> " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 19c2f77..f981806 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -57,6 +57,7 @@ struct cr_options {
> char *output;
> char *root;
> char *pidfile;
> + char *freeze_cgroup;
> struct list_head veth_pairs;
> struct list_head scripts;
> struct list_head ext_mounts;
> diff --git a/include/ptrace.h b/include/ptrace.h
> index 44b66c9..ce91a45 100644
> --- a/include/ptrace.h
> +++ b/include/ptrace.h
> @@ -67,7 +67,10 @@ struct ptrace_peeksiginfo_args {
>
> #define SI_EVENT(_si_code) (((_si_code) & 0xFFFF) >> 8)
>
> -extern int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds);
> +#define PTRACE_FREEZE_PRE 0 /* Attach to the process (PTRACE_SEIZE and PTRACE_INTERRUPT) */
> +#define PTRACE_FREEZE_POST 1 /* Wait when the process will be stopped */
> +#define PTRACE_FREEZE 2 /* Do both stages together */
> +extern int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds, int freeze);
> extern int suspend_seccomp(pid_t pid);
> extern int unseize_task(pid_t pid, int orig_state, int state);
> extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
> diff --git a/proc_parse.c b/proc_parse.c
> index 95d6505..ea98a97 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -1135,6 +1135,7 @@ struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump)
>
> new->nsid = nsid;
>
> + pr_debug("%s\n", str);
> ret = parse_mountinfo_ent(str, new, &fsname);
> if (ret < 0) {
> pr_err("Bad format in %d mountinfo: '%s'\n", pid, str);
> diff --git a/ptrace.c b/ptrace.c
> index 905eaec..e614be4 100644
> --- a/ptrace.c
> +++ b/ptrace.c
> @@ -60,11 +60,11 @@ int suspend_seccomp(pid_t pid)
> * up with someone else.
> */
>
> -int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds)
> +int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds, int freeze)
> {
> siginfo_t si;
> int status;
> - int ret, ret2, ptrace_errno, wait_errno = 0;
> + int ret = 0, ret2, ptrace_errno = 0, wait_errno = 0;
> struct proc_status_creds cr;
>
> /*
> @@ -72,23 +72,25 @@ int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds)
> */
> memzero(&cr, sizeof(struct proc_status_creds));
>
> - ret = ptrace(PTRACE_SEIZE, pid, NULL, 0);
> - ptrace_errno = errno;
> - if (ret == 0) {
> - /*
> - * If we SEIZE-d the task stop it before going
> - * and reading its stat from proc. Otherwise task
> - * may die _while_ we're doing it and we'll have
> - * inconsistent seize/state pair.
> - *
> - * If task dies after we seize it but before we
> - * do this interrupt, we'll notice it via proc.
> - */
> - ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
> - if (ret < 0) {
> - pr_perror("SEIZE %d: can't interrupt task", pid);
> - ptrace(PTRACE_DETACH, pid, NULL, NULL);
> - goto err;
> + if (freeze != PTRACE_FREEZE_POST) {
> + ret = ptrace(PTRACE_SEIZE, pid, NULL, 0);
> + ptrace_errno = errno;
> + if (ret == 0) {
> + /*
> + * If we SEIZE-d the task stop it before going
> + * and reading its stat from proc. Otherwise task
> + * may die _while_ we're doing it and we'll have
> + * inconsistent seize/state pair.
> + *
> + * If task dies after we seize it but before we
> + * do this interrupt, we'll notice it via proc.
> + */
> + ret = ptrace(PTRACE_INTERRUPT, pid, NULL, NULL);
> + if (ret < 0) {
> + pr_perror("SEIZE %d: can't interrupt task", pid);
> + ptrace(PTRACE_DETACH, pid, NULL, NULL);
> + goto err;
> + }
> }
> }
>
> @@ -101,8 +103,18 @@ int seize_task(pid_t pid, pid_t ppid, struct proc_status_creds **creds)
>
> try_again:
> if (!ret) {
> - ret = wait4(pid, &status, __WALL, NULL);
> + int flags = 0;
> +
> + if (freeze == PTRACE_FREEZE_PRE)
> + flags |= WNOHANG;
> +
> + ret = wait4(pid, &status, __WALL | flags, NULL);
> wait_errno = errno;
> +
> + if (freeze == PTRACE_FREEZE_PRE && ret > 0) {
> + pr_err("The %d task is not frozen\n", pid);
> + goto err;
> + }
> }
>
> ret2 = parse_pid_status(pid, &cr);
> @@ -126,6 +138,8 @@ try_again:
>
> return TASK_DEAD;
> }
> + if (freeze == PTRACE_FREEZE_PRE)
> + return 0;
>
> if ((ppid != -1) && (cr.ppid != ppid)) {
> pr_err("Task pid reused while suspending (%d: %d -> %d)\n",
> diff --git a/seize.c b/seize.c
> index e9be332..6ed664f 100644
> --- a/seize.c
> +++ b/seize.c
> @@ -18,6 +18,163 @@
>
> #define NR_ATTEMPTS 5
There appears to be an identical definition in cr-dump.c. Perhaps they can be
consolidated into a single definition in a common header?
> +const char frozen[] = "FROZEN";
> +const char freezing[] = "FREEZING";
> +const char thawed[] = "THAWED";
> +
> +static const char *get_freezer_state(int fd)
> +{
> + int ret;
> + char path[PATH_MAX];
> +
> + lseek(fd, 0, SEEK_SET);
> + ret = read(fd, path, sizeof(path) - 1);
> + if (ret <= 0) {
I don't know if this could happen on cgroupsfs:
should you loop if 0 < ret < sizeof(path) - 1?
> + pr_perror("Unable to get a current state");
> + goto err;
> + }
> + if (path[ret - 1] == '\n')
> + path[ret - 1] = 0;
> + else
> + path[ret] = 0;
> +
> + pr_debug("freezer.state=%s\n", path);
> + if (strcmp(path, frozen) == 0)
> + return frozen;
> + if (strcmp(path, freezing) == 0)
> + return freezing;
> + if (strcmp(path, thawed) == 0)
> + return thawed;
> +
> + pr_err("Unknown freezer state: %s", path);
> +err:
> + return NULL;
> +}
> +
> +static bool freezer_thawed;
> +
> +static int freezer_restore_state(void)
> +{
> + int fd;
> + char path[PATH_MAX];
> +
> + if (!opts.freeze_cgroup || freezer_thawed)
> + return 0;
> +
> + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> + fd = open(path, O_RDWR);
> + if (fd < 0) {
> + pr_perror("Unable to open %s", path);
> + return -1;
> + }
> +
> + if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> + pr_perror("Unable to freeze tasks");
> + close(fd);
> + return -1;
> + }
> + close(fd);
> + return 0;
> +}
> +
> +static int freeze_processes(void)
> +{
> + int i, ret, fd, exit_code = -1;
> + char path[PATH_MAX];
> + const char *state = thawed;
> + FILE *f;
> +
> + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
> + fd = open(path, O_RDWR);
> + if (fd < 0) {
> + pr_perror("Unable to open %s", path);
> + return -1;
> + }
> + state = get_freezer_state(fd);
> + if (!state) {
> + close(fd);
> + return -1;
> + }
> + if (state == thawed)
> + freezer_thawed = true;
> +
> + lseek(fd, 0, SEEK_SET);
> + if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
> + pr_perror("Unable to freeze tasks");
> + close(fd);
> + return -1;
> + }
> +
> + /*
> + * There is not way to wait a specified state, so we need to poll the
> + * freezer.state
> + */
> + for (i = 0; i < NR_ATTEMPTS; i++) {
> + struct timespec req = {};
> +
> + snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
> + f = fopen(path, "r");
> + if (f == NULL) {
> + pr_perror("Unable to open %s", path);
> + goto err;
> + }
> + while (fgets(path, sizeof(path), f)) {
> + pid_t pid;
> +
> + pid = atoi(path);
> +
> + ret = wait4(pid, NULL, __WALL | WNOHANG, NULL);
> + if (ret == 0) /* skip already seized tasks */
> + continue;
> + if (seize_task(pid, 0, NULL, PTRACE_FREEZE_PRE)) {
> + fclose(f);
> + goto err;
> + }
> + }
> + fclose(f);
> +
> + if (state == frozen)
> + break;
> +
> + state = get_freezer_state(fd);
> + if (!state)
> + goto err;
> +
> + if (state == frozen) {
> + /*
> + * Enumerate all tasks one more time to collect all new
> + * tasks, which can be born while the cgroup is being frozen.
> + */
> +
> + continue;
> + }
> +
> + req.tv_nsec = 10000000 * i;
> + nanosleep(&req, NULL);
> + }
> +
> + if (i == NR_ATTEMPTS) {
> + pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
> + goto err;
> + }
> +
> + exit_code = 0;
> +err:
> + if (exit_code == 0 || freezer_thawed) {
> + lseek(fd, 0, SEEK_SET);
> + if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) {
> + pr_perror("Unable to thaw tasks");
> + exit_code = -1;
> + }
> + }
I guess you don't need to poll freezer.state here, because there aren't
further events that are dependent on the transition.
> + if (close(fd)) {
> + pr_perror("Unable to thaw tasks");
> + return -1;
> + }
> +
> + return exit_code;
> +}
> +
> static inline bool child_collected(struct pstree_item *i, pid_t pid)
> {
> struct pstree_item *c;
> @@ -33,7 +190,9 @@ static int collect_task(struct pstree_item *item);
> static int collect_children(struct pstree_item *item)
> {
> pid_t *ch;
> - int ret, i, nr_children, nr_inprogress;
> + int ret, i, nr_children, nr_inprogress, freeze;
> +
> + freeze = opts.freeze_cgroup ? PTRACE_FREEZE_POST : PTRACE_FREEZE;
>
> ret = parse_children(item->pid.real, &ch, &nr_children);
> if (ret < 0)
> @@ -58,7 +217,7 @@ static int collect_children(struct pstree_item *item)
> goto free;
> }
>
> - ret = seize_task(pid, item->pid.real, &dmpi(c)->pi_creds);
> + ret = seize_task(pid, item->pid.real, &dmpi(c)->pi_creds, freeze);
> if (ret < 0) {
> /*
> * Here is a race window between parse_children() and seize(),
> @@ -143,6 +302,9 @@ void pstree_switch_state(struct pstree_item *root_item, int st)
> {
> struct pstree_item *item = root_item;
>
> + if (st != TASK_DEAD)
> + freezer_restore_state();
> +
> pr_info("Unfreezing tasks into %d\n", st);
> for_each_pstree_item(item)
> unseize_task_and_threads(item, st);
> @@ -174,7 +336,9 @@ static inline bool thread_collected(struct pstree_item *i, pid_t tid)
> static int collect_threads(struct pstree_item *item)
> {
> struct pid *threads = NULL;
> - int nr_threads = 0, i = 0, ret, nr_inprogress, nr_stopped = 0;
> + int nr_threads = 0, i = 0, ret, nr_inprogress, nr_stopped = 0, freeze;
> +
> + freeze = opts.freeze_cgroup ? PTRACE_FREEZE_POST : PTRACE_FREEZE;
>
> ret = parse_threads(item->pid.real, &threads, &nr_threads);
> if (ret < 0)
> @@ -207,7 +371,7 @@ static int collect_threads(struct pstree_item *item)
> pr_info("\tSeizing %d's %d thread\n",
> item->pid.real, pid);
>
> - ret = seize_task(pid, item_ppid(item), &dmpi(item)->pi_creds);
> + ret = seize_task(pid, item_ppid(item), &dmpi(item)->pi_creds, freeze);
> if (ret < 0) {
> /*
> * Here is a race window between parse_threads() and seize(),
> @@ -251,6 +415,9 @@ static int collect_loop(struct pstree_item *item,
> {
> int attempts = NR_ATTEMPTS, nr_inprogress = 1;
>
> + if (opts.freeze_cgroup)
> + attempts = 2; /* double check that we skip nothing */
attempts was 5. You decrease it to 2. How does that provide additional checking?
> /*
> * While we scan the proc and seize the children/threads
> * new ones can appear (with clone(CLONE_PARENT) or with
> @@ -307,7 +474,13 @@ err_close:
>
> int collect_pstree(pid_t pid)
> {
> - int ret;
> + int ret, freeze = PTRACE_FREEZE;
> +
> + if (opts.freeze_cgroup) {
> + if (freeze_processes())
> + return -1;
> + freeze = PTRACE_FREEZE_POST;
> + }
>
> timing_start(TIME_FREEZING);
>
> @@ -316,7 +489,7 @@ int collect_pstree(pid_t pid)
> return -1;
>
> root_item->pid.real = pid;
> - ret = seize_task(pid, -1, &dmpi(root_item)->pi_creds);
> + ret = seize_task(pid, -1, &dmpi(root_item)->pi_creds, freeze);
> if (ret < 0)
> goto err;
> pr_info("Seized task %d, state %d\n", pid, ret);
>
--
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
More information about the CRIU
mailing list