[CRIU] Re: [PATCH 4/4] inotify: Add checkpoint/restore
Pavel Emelyanov
xemul at parallels.com
Tue Apr 10 09:30:38 EDT 2012
> @@ -337,8 +342,10 @@ static int dump_one_fdinfo(struct fd_parms *p, int lfd,
> p->id = MAKE_FD_GENID(p->stat.st_dev, p->stat.st_ino, p->pos);
> if (S_ISFIFO(p->stat.st_mode))
> p->type = FDINFO_PIPE;
> - else
> + else if (S_ISREG(p->stat.st_mode))
This else covered not only S_ISREG-s, but also S_ISDIR and S_ISCHRDEV :\
> p->type = FDINFO_REG;
> + else
> + p->type = FDINFO_INOTIFY;
>
> return do_dump_one_fdinfo(p, lfd, cr_fdset);
> }
> @@ -429,7 +436,8 @@ static int dump_one_fd(pid_t pid, int fd, int lfd,
>
> if (S_ISREG(p.stat.st_mode) ||
> S_ISDIR(p.stat.st_mode) ||
> - S_ISFIFO(p.stat.st_mode))
> + S_ISFIFO(p.stat.st_mode) ||
> + is_inotify(lfd))
> return dump_one_fdinfo(&p, lfd, cr_fdset);
>
> return dump_unsupp_fd(&p);
<and>
> +/* Checks if file desciptor @fd is inotify */
> +int is_inotify(int fd)
> +{
> + char link[PATH_MAX], path[32];
> + ssize_t ret;
> +
> + snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
> + ret = readlink(path, link, sizeof(link));
> + if (ret < 0) {
> + pr_perror("Can't read link of fd %d\n", fd);
> + return -1;
> + }
> + link[ret] = 0;
> + if (!strcmp(link, "anon_inode:inotify"))
> + return 1;
> +
> + return 0;
> +}
This is wrong check. You should call statfs on fd and check that the fs magic
is anon-inode-fs one. Then (only then) parse the symlink. This is not only
the correct check, but will also facilitate signalfd, eventfd and other anonfd-s
dumping in the future.
> +struct inotify_file_entry {
> + u32 id;
> + u64 i_ino;
> + u32 mask;
> + u32 s_dev;
> + u32 r_dev;
> + u32 wd;
> + fh_t f_handle;
> +} __packed;
> +
Just putting watches in a raw is not enough. You should have an entry describing
inotify fd itself. The thing is that it has flags (O_NONBLOCK is only used now, but
still), fowner and we'll have to dump them there.
> +/* Returns path for mount device @s_dev */
> +static char *inotify_get_mnt_root(unsigned int s_dev)
> +{
> + static int last = 0;
> + int i;
> +
> + /* Cache hit rate is big */
> +again:
> + for (i = last; i < nr_mntinfo; i++) {
> + if (s_dev == mntinfo[i].s_dev) {
> + last = i;
> + return mntinfo[i].mnt_root;
> + }
> + }
> +
> + if (last) {
> + last = 0;
> + goto again;
> + }
> +
> + return NULL;
> +}
This is wrong, but enough for the first time (please, put this comment into the code).
Besides, this has to be in some generic .c file.
> + snprintf(path, sizeof(path), "/proc/self/fd/%d", wd);
> + ret = readlink(path, link, sizeof(link));
> + close(wd);
> + close(mntfd);
> +
> + if (ret < 1) {
> + pr_perror("Can't read self-link for %d\n", wd);
> + return -1;
> + }
Check if we can just push the /proc/self/fd/xxx path into inotify_add_watch.
> + attempt = 10, wd = 1;
This means that you only can restore wd-s less than 10 (or 11, I don't try hard
on corner case). This is wrong.
> + while (wd >= 0 && attempt--) {
> + wd = inotify_add_watch(i_fd, link, fe->mask);
> + if (wd < 0) {
> + pr_err("Can't add watch for %d with %d\n", i_fd, fe->wd);
> + break;
> + } else if (wd == fe->wd) {
> + ret = 0;
> + break;
> + }
> +
> + pr_debug("\t\tWatch got %d but %d expected\n", wd, fe->wd);
> + inotify_rm_watch(i_fd, wd);
Plus, you have to put a comment here stating that "kernel allocates wd-s sequentially,
this is suboptimal, but the kernel doesn't provide and API for this yet :(" and put
the respective entry in your long-term TODO list.
> + }
> + pr_info("Collected inotify: id %8x wd %8x s_dev %8x i_ino %16lx mask %8x\n",
> + info->ife.id, info->ife.wd, info->ife.s_dev, info->ife.i_ino, info->ife.mask);
> + pr_info("\t[fhandle] bytes %8x type %8x __handle %016lx:%016lx\n",
> + info->ife.f_handle.bytes, info->ife.f_handle.type,
> + info->ife.f_handle.__handle[0], info->ife.f_handle.__handle[1]);
> +
> + list_add(&info->list, &inotify_head);
Taking into account the way you restore them, this list should be sorted by wd
in an ascending order.
More information about the CRIU
mailing list