[CRIU] [PATCH] Add docker phaul driver
Pavel Emelyanov
xemul at parallels.com
Thu Oct 8 02:45:52 PDT 2015
Thanks, Hui! See my comments inline.
> See the instruction at test/docker/HOWTO
>
> Signed-off-by: Hui Kang <hkang.sunysb at gmail.com>
> ---
> .gitignore | 1 +
> p.haul | 2 +-
> phaul/images.py | 5 ++
> phaul/p_haul_docker.py | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
> phaul/p_haul_iters.py | 86 +++++++++++++++++++----------
> phaul/p_haul_pid.py | 3 ++
> phaul/p_haul_service.py | 10 ++--
> phaul/p_haul_type.py | 2 +
> phaul/xem_rpc.py | 1 -
> test/docker/HOWTO | 77 ++++++++++++++++++++++++++
> 10 files changed, 296 insertions(+), 32 deletions(-)
> create mode 100644 phaul/p_haul_docker.py
> create mode 100644 test/docker/HOWTO
>
> diff --git a/.gitignore b/.gitignore
> index 739cb45..1ab6601 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -2,3 +2,4 @@ build/
> *.pyc
> rpc_pb2.py
> stats_pb2.py
> +*~
> \ No newline at end of file
> diff --git a/p.haul b/p.haul
> index aea7971..739ea95 100755
> --- a/p.haul
> +++ b/p.haul
> @@ -21,7 +21,7 @@ import phaul.xem_rpc as ph_xem_rpc
> #
>
> parser = argparse.ArgumentParser("Process HAULer")
> -parser.add_argument("type", help="Type of hat to haul, e.g. vz or lxc")
> +parser.add_argument("type", help="Type of hat to haul, e.g. vz, lxc, or docker")
> parser.add_argument("id", help="ID of what to haul")
> parser.add_argument("to", help="IP where to haul")
> parser.add_argument("-v", help="Verbosity level", default=ph_criu_api.def_verb, type=int, dest="verbose")
> diff --git a/phaul/images.py b/phaul/images.py
> index b9326ce..223bc6b 100644
> --- a/phaul/images.py
> +++ b/phaul/images.py
> @@ -137,6 +137,11 @@ class phaul_images:
> logging.info("Sending images to target")
>
> start = time.time()
> +
> + if htype.get_driver_name() == "docker" :
> + htype.send_criu_images()
> + return
> +
Can you describe why existing p.haul images sending code doesn't work for Docker?
We're currently improving the p.haul code to be more generic, so your comments
on existing shortcomings will be very valuable.
> cdir = self.image_dir()
>
> th.start_accept_images(phaul_images.IMGDIR)
> diff --git a/phaul/p_haul_iters.py b/phaul/p_haul_iters.py
> index f23e799..3061c47 100644
> --- a/phaul/p_haul_iters.py
> +++ b/phaul/p_haul_iters.py
> @@ -34,8 +34,14 @@ class phaul_iter_worker:
>
> logging.info("Setting up local")
> self.img = images.phaul_images("dmp")
> - self.criu = criu_api.criu_conn(self.data_sk)
> self.htype = p_haul_type.get_src(p_type)
> +
> + if self.htype.get_driver_name() != "docker" :
> + # docker will talk to swrk
Why isn't "just criu" not enough?
> + self.criu = criu_api.criu_conn(self.data_sk)
> + else:
> + self.criu = ""
> +
> if not self.htype:
> raise Exception("No htype driver found")
>
> @@ -45,13 +51,15 @@ class phaul_iter_worker:
>
> self.pid = self.htype.root_task_pid()
> self.fs.set_target_host(host[0])
> + self.target_host = host[0]
>
> logging.info("Setting up remote")
> self.th.setup(p_type)
>
> def set_options(self, opts):
> self.th.set_options(opts)
> - self.criu.verbose(opts["verbose"])
> + if self.htype.get_driver_name() != "docker" :
> + self.criu.verbose(opts["verbose"])
Why do you need to decrease the criu verbosity for Docker?
> self.img.set_options(opts)
> self.htype.set_options(opts)
> self.__force = opts["force"]
> @@ -75,8 +83,10 @@ class phaul_iter_worker:
> def start_migration(self):
> self._mstat.start()
>
> - if not self.__force:
> - self.validate_cpu()
> + # TODO fix it
> + if self.htype.get_driver_name() != "docker" :
> + if not self.__force:
> + self.validate_cpu()
CPU validation should be performed before migration, why do you
turn it off for Docker?
>
> logging.info("Preliminary FS migration")
> self.fs.set_work_dir(self.img.work_dir())
> @@ -84,7 +94,30 @@ class phaul_iter_worker:
>
> logging.info("Starting iterations")
>
> - while True:
> + # For Docker, we take a different path
Please, explain what the path is :)
> + if self.htype.get_driver_name() == "docker" :
> + logging.info("Take a special path for Docker")
> +
> + self.htype.dump()
> + logging.info("\tDocker dump succeeded")
> +
> + logging.info("FS and images sync")
> + # sync the aufs filesystem again
> + self.fs.stop_migration()
> +
> + # send the docker criu image to host
> + self.htype.send_criu_images(self.target_host)
> +
> + logging.info("Asking target host to restore")
> + self.th.restore_from_images()
> +
> + return
> +
> + # TODO: Do not do predump for docker right now. Add page-server
> + # to docker C/R API, then we can enable
> + # the pre-dump
> +
> + while True :
> logging.info("* Iteration %d", self.iteration)
>
> self.th.start_iter()
> @@ -141,31 +174,30 @@ class phaul_iter_worker:
>
> logging.info("Final dump and restore")
>
> - self.th.start_iter()
> - self.img.new_image_dir()
> + self.th.start_iter()
> + self.img.new_image_dir()
>
> logging.info("\tIssuing dump command to service")
>
> - req = criu_req.make_dump_req(
> - self.pid, self.htype, self.img, self.criu, self.fs)
> - resp = self.criu.send_req(req)
> - while True:
> - if resp.type != cr_rpc.NOTIFY:
> - raise Exception("Dump failed")
> -
> - if resp.notify.script == "post-dump":
> - #
> - # Dump is effectively over. Now CRIU
> - # waits for us to do whatever we want
> - # and keeps the tasks frozen.
> - #
> - break
> -
> - elif resp.notify.script == "network-lock":
> - self.htype.net_lock()
> - elif resp.notify.script == "network-unlock":
> - self.htype.net_unlock()
> -
> + req = criu_req.make_dump_req(
Indentation was tuned, please, use tabs for indents, otherwise it's
not clear what has changed.
> + self.pid, self.htype, self.img, self.criu, self.fs)
> + resp = self.criu.send_req(req)
> + while True:
> + if resp.type != cr_rpc.NOTIFY:
> + raise Exception("Dump failed")
> +
> + if resp.notify.script == "post-dump":
> + #
> + # Dump is effectively over. Now CRIU
> + # waits for us to do whatever we want
> + # and keeps the tasks frozen.
> + #
> + break
> +
> + elif resp.notify.script == "network-lock":
> + self.htype.net_lock()
> + elif resp.notify.script == "network-unlock":
> + self.htype.net_unlock()
> logging.info("\t\tNotify (%s)", resp.notify.script)
> resp = self.criu.ack_notify()
>
> diff --git a/phaul/p_haul_pid.py b/phaul/p_haul_pid.py
> index 47cf651..20433a1 100644
> --- a/phaul/p_haul_pid.py
> +++ b/phaul/p_haul_pid.py
> @@ -13,6 +13,9 @@ class p_haul_type:
> self.pid = int(id)
> self._pidfile = None
>
> + def get_driver_name(self):
> + return name
> +
> #
> # Initialize itself for source node or destination one
> #
> diff --git a/phaul/p_haul_service.py b/phaul/p_haul_service.py
> index f2cadb9..8963c2c 100644
> --- a/phaul/p_haul_service.py
> +++ b/phaul/p_haul_service.py
> @@ -44,11 +44,13 @@ class phaul_service:
> def rpc_setup(self, htype_id):
> logging.info("Setting up service side %s", htype_id)
> self.img = images.phaul_images("rst")
> - self.criu = criu_api.criu_conn(self.data_sk)
> self.htype = p_haul_type.get_dst(htype_id)
> + if self.htype.get_driver_name() != "docker" :
> + self.criu = criu_api.criu_conn(self.data_sk)
Am I right that at restore side you just want to call "docker restore"?
>
> def rpc_set_options(self, opts):
> - self.criu.verbose(opts["verbose"])
> + if self.htype.get_driver_name() != "docker" :
> + self.criu.verbose(opts["verbose"])
> self.img.set_options(opts)
> self.htype.set_options(opts)
>
> @@ -86,7 +88,9 @@ class phaul_service:
>
> def rpc_restore_from_images(self):
> logging.info("Restoring from images")
> - self.htype.put_meta_images(self.img.image_dir())
> + if self.htype.get_driver_name() != "docker" :
> + self.htype.put_meta_images(self.img.image_dir())
> +
> self.htype.final_restore(self.img, self.criu)
> logging.info("Restore succeeded")
> self.restored = True
> diff --git a/phaul/p_haul_type.py b/phaul/p_haul_type.py
> index 4fa2833..61e6810 100644
> --- a/phaul/p_haul_type.py
> +++ b/phaul/p_haul_type.py
> @@ -8,11 +8,13 @@ import logging
> import p_haul_vz
> import p_haul_pid
> import p_haul_lxc
> +import p_haul_docker
>
> haul_types = {
> p_haul_vz.name: p_haul_vz,
> p_haul_pid.name: p_haul_pid,
> p_haul_lxc.name: p_haul_lxc,
> + p_haul_docker.name: p_haul_docker,
> }
>
> def __get(id):
> diff --git a/phaul/xem_rpc.py b/phaul/xem_rpc.py
> index d320013..1662967 100644
> --- a/phaul/xem_rpc.py
> +++ b/phaul/xem_rpc.py
> @@ -95,7 +95,6 @@ class _rpc_server_sk:
> if data[0] == RPC_CALL:
> if not self._master:
> raise Exception("Proto seq error")
> -
Not needed hunk.
> res = getattr(self._master, "rpc_" + data[1])(*data[2])
> elif data[0] == RPC_CMD:
> res = getattr(self, data[1])(mgr, *data[2])
> diff --git a/test/docker/HOWTO b/test/docker/HOWTO
> new file mode 100644
> index 0000000..0b777c2
-- Pavel
More information about the CRIU
mailing list