[CRIU] Process Migration using Sockets v3 - Patch 1/2

Pavel Emelyanov xemul at parallels.com
Thu Nov 19 05:30:45 PST 2015


Please, find the comments inline. These are mostly (but not all) cosmetic,
so fixing them should relatively easy.

I've also checked the patch with Linux's checkpatch.pl scrip, it reported
quite a lot of 'space indent', 'no space before (' and alike. Please, fix.

Also, we're doing the feature freeze today, so proxy-cache thing will go
into the 1.9 after 7 Dec.

I would also ask you to write tests for proxy-cache thing, otherwise
chances that it will be occasionally broken increase drastically.


> diff --git a/crtools.c b/crtools.c
> index ea8b889..eb19ade 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -43,6 +43,8 @@
>  
>  #include "setproctitle.h"
>  
> +#include "img-remote.h"
> +
>  struct cr_options opts;
>  
>  void init_opts(void)
> @@ -62,6 +64,8 @@ void init_opts(void)
>  	opts.cpu_cap = CPU_CAP_DEFAULT;
>  	opts.manage_cgroups = CG_MODE_DEFAULT;
>  	opts.ps_socket = -1;
> +	opts.addr = PROXY_FWD_HOST;
> +	opts.ps_port = CACHE_WRITE_PORT;

These fields are used by page-server and dump/pre-dump, so would you pick some
more generic names for these macros?

>  	opts.ghost_limit = DEFAULT_GHOST_LIMIT;
>  }
>  
> @@ -680,6 +696,8 @@ usage:
>  "  dedup          remove duplicates in memory dump\n"
>  "  cpuinfo dump   writes cpu information into image file\n"
>  "  cpuinfo check  validates cpu information read from image file\n"
> +"  image-cache    launch image-cache, used for process live migration\n"
> +"  image-proxy    launch image-proxy, used for process live migration\n"

Plz, add few words about on which side of the process each is used. E.g.

image-cache    launch destination-side cache for images
image-proxy    launch source-side proxy to send images to destination mode

>  	);
>  
>  	if (usage_error) {
> @@ -706,6 +724,7 @@ usage:
>  "                        restore making it the parent of the restored process\n"
>  "  --freeze-cgroup\n"
>  "                        use cgroup freezer to collect processes\n"
> +"  --remote              dump images directly to remote node\n"

Not directly, but via image-proxy, right?

>  "\n"
>  "* Special resources support:\n"
>  "  -x|--" USK_EXT_PARAM "inode,.." "      allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
> diff --git a/image.c b/image.c
> index dc9d6a1..a519788 100644
> --- a/image.c
> +++ b/image.c
> @@ -306,18 +307,50 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of
>  {
>  	int ret, flags;
>  
> -	flags = oflags & ~(O_NOBUF | O_SERVICE);
> +	flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);
>  
> -	ret = openat(dfd, path, flags, CR_FD_PERM);
> -	if (ret < 0) {
> -		if (!(flags & O_CREAT) && (errno == ENOENT)) {
> -			pr_info("No %s image\n", path);
> +	if(opts.remote && !(oflags & O_FORCE_LOCAL)) {
> +		char* snapshot_id = NULL;
> +
> +		/* Note: if dfd is the service fd then we need the current
> +                 * snapshot_id. Else we need a parent snapshot_id. */
> +		if (dfd == get_service_fd(IMG_FD_OFF))
> +			snapshot_id = get_curr_snapshot_id();
> +		else
> +			snapshot_id = get_snapshot_id_from_idx(dfd);

Can you move this if () into get_snapshot_id_from_idx()? So that here
we just have

snapshot_id = get_snapshot_id_from_idx(dfd)

?

> +
> +		if(snapshot_id == NULL) {
> +			ret = -1;
> +		}

Braces around one-line bodies (especially that simple ones) are not welcome.

> +		else if (flags == O_RDONLY) {
> +			pr_info("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",
> +			  path, snapshot_id);
> +			ret = read_remote_image_connection(snapshot_id, path);
> +		}
> +		else {
> +			pr_info("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",
> +			  path, snapshot_id);
> +			ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);
> +		}
> +
> +		if (ret < 0) {

This is not correct. Empty image should be set only if the error is ENOENT,
but any other open/setup error should be reported back.

> +			pr_info("No %s (dfd=%d) image\n", path, dfd);
>  			img->_x.fd = EMPTY_IMG_FD;
>  			goto skip_magic;
>  		}
> -
> -		pr_perror("Unable to open %s", path);
> -		goto err;
> +	}
> +	else {
> +		ret = openat(dfd, path, flags, CR_FD_PERM);
> +		if (ret < 0) {
> +			if (!(flags & O_CREAT) && (errno == ENOENT)) {
> +				pr_info("No %s image\n", path);
> +				img->_x.fd = EMPTY_IMG_FD;
> +				goto skip_magic;
> +			}
> +
> +			pr_perror("Unable to open %s", path);
> +			goto err;
> +		}
>  	}
>  
>  	img->_x.fd = ret;
> diff --git a/img-remote.c b/img-remote.c
> new file mode 100644
> index 0000000..3c0affe
> --- /dev/null
> +++ b/img-remote.c
> @@ -0,0 +1,245 @@
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <netdb.h>
> +#include "xmalloc.h"
> +#include "criu-log.h"
> +#include "img-remote.h"
> +#include "img-remote-proto.h"
> +#include "protobuf/remote-image.pb-c.h"
> +#include "protobuf-desc.h"
> +#include <fcntl.h>
> +
> +#define PB_LOCAL_IMAGE_SIZE PATHLEN
> +
> +static char* snapshot_id = NULL;
> +
> +LIST_HEAD(snapshot_head);
> +
> +/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an
> + * ID which corresponds to the working directory specefied by the user. */
> +struct snapshot {
> +	char snapshot_id[PATHLEN];
> +	struct list_head l;
> +};
> +
> +struct snapshot* new_snapshot(char* snapshot_id)
> +{
> +	struct snapshot* s = malloc(sizeof(struct snapshot));
> +	if (!s) {
> +		pr_perror("Failed to allocate snapshot structure");
> +	}
> +	strncpy(s->snapshot_id, snapshot_id, PATHLEN);
> +	return s;
> +}
> +
> +void add_snapshot(struct snapshot* snapshot)
> +{
> +	list_add_tail(&(snapshot->l), &snapshot_head);
> +}
> +
> +int read_remote_image_connection(char* snapshot_id, char* path)
> +{
> +	int error;
> +	int sockfd = setup_UNIX_client_socket(READ_IMG_PATH);

Here (and below) the paths to proxy-cache sockets are hard-coded. Is it going
to stay forever?

> +	if (sockfd < 0) {
> +		return -1;
> +	}
> +
> +	if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) {
> +		pr_perror("Error writing header for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +
> +	if (read_reply_header(sockfd, &error) < 0) {
> +		pr_perror("Error reading reply header for %s:%s", path, snapshot_id);
> +		return -1;
> +	}
> +
> +	if (!error) {
> +		pr_info("Image does exist (%s:%s)\n", path, snapshot_id);
> +		return sockfd;
> +	}
> +	else if (error == ENOENT) {
> +		pr_info("Image does not exist (%s:%s)\n", path, snapshot_id);
> +		close(sockfd);
> +		return -1;
> +	}
> +	else {
> +		pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id);
> +		close(sockfd);
> +		return -1;
> +	}
> +}
> +
> diff --git a/include/img-remote.h b/include/img-remote.h
> new file mode 100644
> index 0000000..f3bb810
> --- /dev/null
> +++ b/include/img-remote.h
> @@ -0,0 +1,95 @@
> +#include <limits.h>
> +
> +#ifndef IMAGE_REMOTE_H
> +#define	IMAGE_REMOTE_H
> +
> +#define PATHLEN PATH_MAX
> +#define DUMP_FINISH "DUMP_FINISH"
> +#define PARENT_IMG "parent"
> +#define NULL_SNAPSHOT_ID "null"
> +
> +/* This flag is used to enable local debugging (dump + proxy + cache + restore)
> + * on the same machine. The idea is that both dump and restore processes are
> + * orthogonal to this. */
> +#define LOCAL_DEVEL 1
> +
> +#if LOCAL_DEVEL
> +/* In local Devel, the image-proxy will open a local socket on WRITE_IMG_PATH and
> + * the image-cache will open a local socket on READ_IMG_PATH. */
> +#define PROXY_IMG_PATH "/tmp/criu-remote-proxy.sock"
> +#define CACHE_IMG_PATH "/tmp/criu-remote-cache.sock"
> +#define READ_IMG_PATH CACHE_IMG_PATH
> +#define WRITE_IMG_PATH PROXY_IMG_PATH
> +#else
> +#define IMG_PATH "/tmp/criu-remote.sock"
> +#define READ_IMG_PATH IMG_PATH
> +#define WRITE_IMG_PATH IMG_PATH
> +#define PROXY_IMG_PATH IMG_PATH
> +#define CACHE_IMG_PATH IMG_PATH
> +#endif
> +
> +/* Only the this channel is TCP.*/
> +#define CACHE_WRITE_PORT 9996
> +#define PROXY_FWD_PORT CACHE_WRITE_PORT
> +#define PROXY_FWD_HOST "localhost"
> +
> +/* Warning: This may be problematic because of double evaluation... */
> +#define MIN(x, y) (((x) < (y)) ? (x) : (y))

We have min() and min_t macros in compiler.h

> +
> +/* Called by restore to get the fd correspondent to a particular path. This call
> + * will block until the connection is received. */
> +int read_remote_image_connection(char* snapshot_id, char* path);
> +

> diff --git a/include/protobuf-desc.h b/include/protobuf-desc.h
> index ab7e4f2..ef7544d 100644
> --- a/include/protobuf-desc.h
> +++ b/include/protobuf-desc.h
> @@ -55,16 +55,20 @@ enum {
>  	PB_CPUINFO,
>  	PB_USERNS,
>  	PB_NETNS,
> +	PB_REMOTE_IMAGE,
> +	PB_LOCAL_IMAGE,         /* 50 */
> +	PB_LOCAL_IMAGE_REPLY,
> +	PB_SNAPSHOT_ID,

Plz, describe these images with comments in this enum.

>  
>  	/* PB_AUTOGEN_STOP */
>  
>  	PB_PAGEMAP_HEAD,
> -	PB_IDS,		/* 50 */
> +	PB_IDS,
>  	PB_SIGACT,
>  	PB_NETDEV,
>  	PB_REMAP_FPATH,
>  	PB_SK_QUEUES,
> -	PB_IPCNS_MSG,
> +	PB_IPCNS_MSG,		/* 60 */
>  	PB_IPCNS_MSG_ENT,
>  
>  	PB_MAX,

> diff --git a/page-read.c b/page-read.c
> index 832c057..40c38bc 100644
> --- a/page-read.c
> +++ b/page-read.c
> @@ -195,9 +204,24 @@ static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
>  	int pfd, ret;
>  	struct page_read *parent = NULL;
>  
> -	pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> -	if (pfd < 0 && errno == ENOENT)
> -		goto out;
> +	if(opts.remote) {
> +		/* Note: we are replacing a real directory FD for a snapshot_id
> +                 * index. Since we need the parent of the current snapshot_id,
> +                 * we want the current snapshot_id index minus one.

Is it 100% guaranteed that parent image is -1 from the current one?

> +                 * possible that dfd is already a snapshot_id index. We test it
> +                 * by comparing it to the service FD. When opening an image (see
> +                 * do_open_image) we convert the snapshot_id index into a real
> +                 * snapshot_id. */
> +		pfd = dfd == get_service_fd(IMG_FD_OFF) ?
> +                    get_curr_snapshot_id_idx() - 1 : dfd - 1;
> +		if(pfd < 0)
> +			goto out;
> +	}
> +	else {
> +		pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
> +		if (pfd < 0 && errno == ENOENT)
> +			goto out;
> +	}
>  
>  	parent = xmalloc(sizeof(*parent));
>  	if (!parent)

-- Pavel



More information about the CRIU mailing list