[CRIU] [PATCH v8 09/10] restore: print error messages on all error-paths in restore_root_task
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Thu Feb 22 14:49:27 MSK 2018
On restore we once had:
Run criu restore
Fail RESTORE failed: Error(0): Unknown
=[log]=> dump/zdtm/static/env00/85/1/restore.log
------------------------ grep Error ------------------------
(00.122659) Running setup-namespaces scripts
(00.122748) 1: cg: Cgroups 1 inherited from parent
(00.122765) 1: uns: calling usernsd_recv_transport (0, 0)
(00.123140) uns: daemon calls 0x48d8b0 (106, 1, 0)
(00.145095) Error (criu/mount.c:3175): mnt: Can't remove the directory
/tmp/.criu.mntns.SIOGLA: No such file or directory
(00.145124) uns: calling exit_usernsd (-1, 1)
(00.145174) uns: daemon calls 0x48d880 (104, -1, 1)
(00.145181) uns: `- daemon exits w/ 0
(00.149591) uns: daemon stopped
(00.149607) Error (criu/cr-restore.c:2498): Restoring FAILED.
------------------------ ERROR OVER ------------------------
Only "Restoring FAILED." error, so no clue to what's really going on.
Printing errors for these "silent" errorpaths can help resolve issues in
future:
restore_root_task
-> fork_with_pid -> open_core -> pb_read_one -> do_pb_read_one
-> xmalloc
-> prepare_userns -> write_id_map -> get_service_fd
-> restore_wait_inprogress_tasks
->__restore_wait_inprogress_tasks -> futex_get
-> restore_switch_stage -> restore_wait_other_tasks
-> __restore_wait_inprogress_tasks -> futex_get
-> move_veth_to_bridge -> external_for_each_type -> move_to_bridge
-> external_val
-> fault_injected -> __fault_injected
-> depopulate_roots_yard -> try_clean_remaps -> clean_one_remap
-> rst_get_mnt_root -> lookup_mnt_id -> __lookup_mnt_id
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
criu/cr-restore.c | 42 ++++++++++++++++++++++++++++++++----------
1 file changed, 32 insertions(+), 10 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index a7316f79c..8c5d067f0 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2244,14 +2244,18 @@ static int restore_root_task(struct pstree_item *init)
if (prepare_userns_hook())
return -1;
- if (prepare_namespace_before_tasks())
+ if (prepare_namespace_before_tasks()) {
+ pr_err("Failed to prepare namespace before tasks\n");
return -1;
+ }
__restore_switch_stage_nw(CR_STATE_ROOT_TASK);
ret = fork_with_pid(init);
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("Failed to fork init task\n");
goto out;
+ }
restore_origin_ns_hook();
@@ -2285,21 +2289,28 @@ static int restore_root_task(struct pstree_item *init)
* uid_map and gid_map must be filled from a parent user namespace.
* prepare_userns_creds() must be called after filling mappings.
*/
- if ((root_ns_mask & CLONE_NEWUSER) && prepare_userns(init->pid->real, userns_entry))
+ if ((root_ns_mask & CLONE_NEWUSER) && prepare_userns(init->pid->real, userns_entry)) {
+ pr_err("Failed to prepare userns\n");
goto out_destroy;
+ }
pr_info("Wait until namespaces are created\n");
ret = restore_wait_inprogress_tasks();
- if (ret)
+ if (ret) {
+ pr_err("Failed to wait inprogress tasks\n");
goto out_destroy;
+ }
ret = run_scripts(ACT_SETUP_NS);
if (ret)
goto out_destroy;
ret = restore_switch_stage(CR_STATE_PREPARE_NAMESPACES);
- if (ret)
+ if (ret) {
+ pr_err("Failed to switch restore stage to " \
+ "CR_STATE_PREPARE_NAMESPACES\n");
goto out_destroy;
+ }
if (root_ns_mask & CLONE_NEWNS) {
mnt_ns_fd = open_proc(init->pid->real, "ns/mnt");
@@ -2329,8 +2340,10 @@ static int restore_root_task(struct pstree_item *init)
skip_ns_bouncing:
ret = restore_wait_inprogress_tasks();
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("Failed to wait inprogress tasks\n");
goto out_destroy;
+ }
/*
* Zombies die after CR_STATE_RESTORE which is switched
@@ -2347,23 +2360,30 @@ static int restore_root_task(struct pstree_item *init)
goto out_kill;
ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("Failed to switch restore stage to " \
+ "CR_STATE_RESTORE_SIGCHLD\n");
goto out_kill;
+ }
ret = stop_usernsd();
if (ret < 0)
goto out_kill;
ret = move_veth_to_bridge();
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("Failed to move veth to bridge\n");
goto out_kill;
+ }
ret = prepare_cgroup_properties();
if (ret < 0)
goto out_kill;
- if (fault_injected(FI_POST_RESTORE))
+ if (fault_injected(FI_POST_RESTORE)) {
+ pr_err("Post restore fault injected\n");
goto out_kill;
+ }
ret = run_scripts(ACT_POST_RESTORE);
if (ret != 0) {
@@ -2377,8 +2397,10 @@ static int restore_root_task(struct pstree_item *init)
* There is no need to call try_clean_remaps() after this point,
* as restore went OK and all ghosts were removed by the openers.
*/
- if (depopulate_roots_yard(mnt_ns_fd, false))
+ if (depopulate_roots_yard(mnt_ns_fd, false)) {
+ pr_err("Failed to depopulate roots yard\n");
goto out_kill;
+ }
close_safe(&mnt_ns_fd);
--
2.14.3
More information about the CRIU
mailing list