[CRIU] [PATCH v5] Add docker phaul driver

Pavel Emelyanov xemul at parallels.com
Mon Oct 26 02:06:46 PDT 2015


Please, find my comment inline.

> diff --git a/p.haul b/p.haul
> index 11460b4..478e1c7 100755
> --- a/p.haul
> +++ b/p.haul
> @@ -25,7 +25,7 @@ import phaul.htype
>  
>  parser = argparse.ArgumentParser("Process HAULer")
>  parser.add_argument("type", choices=phaul.htype.get_haul_names(),
> -	help="Type of hat to haul, e.g. vz or lxc")
> +	help="Type of hat to haul, e.g. vz, lxc, or docker")
>  parser.add_argument("id", help="ID of what to haul")
>  parser.add_argument("--to", help="IP where to haul")
>  parser.add_argument("--fdrpc", help="File descriptor of rpc socket", type=int, required=True)
> diff --git a/phaul/fs_haul_subtree.py b/phaul/fs_haul_subtree.py
> index a9bd559..e20ad1b 100644
> --- a/phaul/fs_haul_subtree.py
> +++ b/phaul/fs_haul_subtree.py
> @@ -11,29 +11,38 @@ import logging
>  rsync_log_file = "rsync.log"
>  
>  class p_haul_fs:
> -	def __init__(self, subtree_path):
> -		logging.info("Initialized subtree FS hauler (%s)", subtree_path)
> -		self.__root = subtree_path
> +	def __init__(self, subtree_paths):
> +		self.__roots = []
> +		for path in subtree_paths:
> +			logging.info("Initialized subtree FS hauler (%s)", path)
> +			self.__roots.append(path)
> +
>  		self.__thost = None
>  
>  	def set_options(self, opts):
>  		self.__thost = opts["to"]
>  
> +	def set_target_host(self, thost):
> +		self.__thost = thost
> +
>  	def set_work_dir(self, wdir):
>  		self.__wdir = wdir
>  
>  	def __run_rsync(self):
>  		logf = open(os.path.join(self.__wdir, rsync_log_file), "w+")
> -		dst = "%s:%s" % (self.__thost, os.path.dirname(self.__root))
>  
> -		# First rsync might be very long. Wait for it not
> -		# to produce big pause between the 1st pre-dump and
> -		# .stop_migration
> +		for dir_name in self.__roots:
> +
> +			dst = "%s:%s" % (self.__thost, os.path.dirname(dir_name))
> +
> +			# First rsync might be very long. Wait for it not
> +			# to produce big pause between the 1st pre-dump and
> +			# .stop_migration
>  
> -		ret = sp.call(["rsync", "-a", self.__root, dst],
> +			ret = sp.call(["rsync", "-a", dir_name, dst],
>  				stdout = logf, stderr = logf)
> -		if ret != 0:
> -			raise Exception("Rsync failed")
> +			if ret != 0:
> +				raise Exception("Rsync failed")
>  
>  	def start_migration(self):
>  		logging.info("Starting FS migration")
> diff --git a/phaul/htype.py b/phaul/htype.py
> index f3a4774..bee49b4 100644
> --- a/phaul/htype.py
> +++ b/phaul/htype.py
> @@ -11,6 +11,7 @@ __haul_modules = {
>  	"vz": "p_haul_vz",
>  	"pid": "p_haul_pid",
>  	"lxc": "p_haul_lxc",
> +	"docker": "p_haul_docker",
>  }
>  
>  def __get(id):
> diff --git a/phaul/images.py b/phaul/images.py
> index 57dca59..e48f96f 100644
> --- a/phaul/images.py
> +++ b/phaul/images.py
> @@ -145,7 +145,8 @@ class phaul_images:
>  		tf = img_tar(sk, cdir)
>  
>  		logging.info("\tPack")
> -		for img in filter(lambda x: x.endswith(".img"), os.listdir(cdir)):
> +		# .json file is needed for docker, i.e., descriptor.json
> +		for img in filter(lambda x: x.endswith(('.img', '.json')) , os.listdir(cdir)):

Why do we include .json files from images dir? Where do the come from?

>  			tf.add(img)
>  
>  		logging.info("\tAdd htype images")
> diff --git a/phaul/iters.py b/phaul/iters.py
> index 9be4e97..00b2a02 100644
> --- a/phaul/iters.py
> +++ b/phaul/iters.py
> @@ -113,7 +113,7 @@ class phaul_iter_worker:
>  		if self.pre_dump == PRE_DUMP_AUTO_DETECT:
>  			# pre-dump auto-detection
>  			try:
> -				self.pre_dump = self.pre_dump_check()
> +				self.pre_dump = (self.pre_dump_check() and self.htype.can_pre_dump())
>  				logging.info("\t`- Auto %s" % (self.pre_dump and 'enabled' or 'disabled'))
>  			except:
>  				# The available criu seems to not
> @@ -121,6 +121,11 @@ class phaul_iter_worker:
>  				self.pre_dump = PRE_DUMP_DISABLE
>  				logging.info("\t`- Auto detection not possible "
>  						"- Disabled")
> +		elif self.pre_dump == PRE_DUMP_DISABLE:
> +			logging.info("\t`- Command-line disabled")
> +		elif self.htype.can_pre_dump() == False:
> +			self.pre_dump = False
> +			logging.info("\t`- Type does not support pre-dump")

Why is this elif elif part required?

>  		else:
>  			logging.info("\t`- Command-line %s" % (self.pre_dump and 'enabled' or 'disabled'))
>  
> diff --git a/phaul/p_haul_docker.py b/phaul/p_haul_docker.py
> new file mode 100644
> index 0000000..d2bb1e3
> --- /dev/null
> +++ b/phaul/p_haul_docker.py
> @@ -0,0 +1,158 @@
> +#
> +# Docker container hauler
> +#
> +
> +import os
> +import logging
> +import time
> +import fs_haul_subtree
> +import pycriu.rpc
> +import json
> +from pprint import pprint
> +import subprocess as sp
> +from subprocess import PIPE
> +
> +# TODO use docker-py
> +# import docker
> +
> +# Some constants for docker
> +name = "docker"
> +docker_exec = "/usr/bin/docker-1.9.0-dev"
> +docker_dir = "/var/lib/docker/"
> +docker_run_meta_dir = "/var/run/docker/execdriver/native"
> +
> +class p_haul_type:
> +	def __init__(self, ctid):
> +
> +		# TODO ctid must > 3 digit; with docker-py, we can also resolve
> +		#	  container name
> +		if len(ctid) < 3:
> +			raise Exception("Docker container ID must be > 3 digits")
> +
> +		self._ctid = ctid
> +		self._ct_rootfs = ""
> +
> +	def get_driver_name(self):
> +		return name
> +
> +	def init_src(self):
> +		self.full_ctid = self.get_full_ctid()
> +		self.__load_ct_config(docker_dir)
> +
> +	def init_dst(self):
> +		pass
> +
> +	def adjust_criu_req(self, req):
> +		"""Add module-specific options to criu request"""
> +		pass
> +
> +	def root_task_pid(self):
> +		# Do we need this for Docker?
> +		return self.full_ctid
> +
> +	def __load_ct_config(self, path):
> +
> +		# Each docker container has 3 directories that need to be
> +		# migrated: (1) root filesystem, (2) container configuration,
> +		# (3) runtime meta state
> +		self._ct_rootfs = os.path.join(docker_dir, "aufs/mnt", self.full_ctid)
> +		self._ct_config_dir = os.path.join(docker_dir, "containers", self.full_ctid)
> +		self._ct_run_meta_dir = os.path.join(docker_run_meta_dir, self.full_ctid)
> +		logging.info("Container rootfs: %s", self._ct_rootfs)
> +		logging.info("Container config: %s", self._ct_config_dir)
> +		logging.info("Container meta: %s", self._ct_run_meta_dir)
> +
> +	def set_options(self, opts):
> +		pass
> +
> +	# Remove any specific FS setup
> +	def umount(self):
> +		pass
> +
> +	def get_fs(self, fs_sk=None):
> +		# use rsync for rootfs and configuration directories
> +		return fs_haul_subtree.p_haul_fs([self._ct_rootfs, self._ct_config_dir])
> +
> +	def get_fs_receiver(self, fs_sk=None):
> +		return None
> +
> +	def get_full_ctid(self):
> +		dir_name_list = os.listdir(os.path.join(docker_dir, "containers"))
> +
> +		full_id = ""
> +		for name in dir_name_list:
> +			name = name.rsplit("/")
> +			if (name[0].find(self._ctid) == 0):
> +				full_id = name[0]
> +				break
> +
> +		if full_id != "":
> +			return full_id
> +		else:
> +			raise Exception("Can not find container fs")
> +
> +	def final_dump(self, pid, img, ccon, fs):
> +		logging.info("Dump docker container %s", pid)
> +
> +		# TODO: docker API does not have checkpoint right now
> +		# cli.checkpoint() so we have to use the command line
> +		# cli = docker.Client(base_url='unix://var/run/docker.sock')
> +		# output = cli.info()
> +		# call docker API
> +
> +		logf = open("/tmp/docker_checkpoint.log", "w+")
> +		image_path_opt = "--image-dir=" + img.image_dir()
> +		ret = sp.call([docker_exec, "checkpoint", image_path_opt, self._ctid],
> +			stdout = logf, stderr = logf)
> +		if ret != 0:
> +			raise Exception("docker checkpoint failed")
> +	#
> +	# Meta-images for docker -- /var/run/docker
> +	#
> +	def get_meta_images(self, path):
> +		# Send the meta state file with criu images
> +		return [(os.path.join(self._ct_run_meta_dir, "state.json"), "state.json")]
> +
> +	def put_meta_images(self, dir):
> +		# Create docker runtime meta dir on dst side
> +		with open(os.path.join(dir, "state.json")) as data_file:
> +			data = json.load(data_file)
> +		self.full_ctid=data["id"]
> +
> +		self.__load_ct_config(docker_dir)
> +		os.makedirs(self._ct_run_meta_dir)
> +		pd = sp.Popen(["cp", os.path.join(dir, "state.json"), self._ct_run_meta_dir], stdout = PIPE)
> +		status = pd.wait()
> +
> +	def kill_last_docker_daemon(self):
> +		p = sp.Popen(['pgrep', '-l' , docker_exec], stdout=sp.PIPE)
> +		out, err = p.communicate()
> +
> +		for line in out.splitlines():
> +			line = bytes.decode(line)
> +			pid = int(line.split(None, 1)[0])
> +			os.kill(pid, signal.SIGKILL)
> +
> +	def final_restore(self, img, criu):
> +		logf = open("/tmp/docker_restore.log", "w+")
> +
> +		# Kill any previous docker daemon in order to reload the
> +		# status of the migrated container
> +		self.kill_last_docker_daemon()
> +
> +		# start docker daemon in background
> +		daemon = sp.Popen([docker_exec, "daemon", "-s", "aufs"],
> +				 stdout = logf, stderr = logf)
> +		# daemon.wait() TODO: docker daemon not return
> +		time.sleep(2)
> +
> +		image_path_opt = "--image-dir=" + img.image_dir()
> +		ret = sp.call([docker_exec, "restore", image_path_opt, self._ctid],
> +						stdout = logf, stderr = logf)
> +		if ret != 0:
> +			raise Exception("docker restore failed")
> +
> +	def can_pre_dump(self):
> +		# XXX: Do not do predump for docker right now. Add page-server
> +		#	to docker C/R API, then we can enable the pre-dump
> +		return False
> diff --git a/phaul/p_haul_pid.py b/phaul/p_haul_pid.py
> index e354dff..43af654 100644
> --- a/phaul/p_haul_pid.py
> +++ b/phaul/p_haul_pid.py
> @@ -94,3 +94,6 @@ class p_haul_type:
>  	# Get list of veth pairs if any
>  	def veths(self):
>  		return []
> +
> +	def can_pre_dump(self):
> +		return True
> diff --git a/phaul/p_haul_vz.py b/phaul/p_haul_vz.py
> index 903006d..e346bdf 100644
> --- a/phaul/p_haul_vz.py
> +++ b/phaul/p_haul_vz.py
> @@ -244,6 +244,9 @@ class p_haul_type:
>  		return True
>  
>  
> +	def can_pre_dump(self):
> +		return True
> +
>  def parse_vz_config(body):
>  	"""Parse shell-like virtuozzo config file"""
>  
> diff --git a/phaul/service.py b/phaul/service.py
> index a03b43e..507d171 100644
> --- a/phaul/service.py
> +++ b/phaul/service.py
> @@ -72,7 +72,8 @@ class phaul_service:
>  	def rpc_start_iter(self):
>  		self.dump_iter_index += 1
>  		self.img.new_image_dir()
> -		self.start_page_server()
> +		if self.htype.get_driver_name() != "docker" :
> +			self.start_page_server()

Explicit docker check again. What bad happens if you let the page server
spawn?

BTW, a question to Nikita -- I don't see in p_haul_vz.py that we do final
dump go via page-server. Do we?

-- Pavel


More information about the CRIU mailing list