[CRIU] Re: [RFC] lock: Introduce futex_t and appropriate helpers
Pavel Emelyanov
xemul at parallels.com
Mon Mar 26 11:30:11 EDT 2012
On 03/26/2012 07:05 PM, Cyrill Gorcunov wrote:
> Pavel, I guess you meant something like below, right?
> (please don't apply it yet, I'll say when it'll be ready)
Yes. Plz, test and re-send, I'll apply.
> Cyrill
> ---
> From: Cyrill Gorcunov <gorcunov at openvz.org>
> Date: Mon, 26 Mar 2012 19:03:08 +0400
> Subject: [PATCH] lock: Introduce futex_t and appropriate helpers
>
> Instead of open-coded u32 variables poking lets use
> futex_t type and appropriate helpers where needed.
>
> This should increase readability.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> cr-restore.c | 57 +++++++++++++------------
> files.c | 29 +++++++------
> include/files.h | 7 ++-
> include/lock.h | 115 +++++++++++++++++++++++-----------------------------
> include/restorer.h | 6 +-
> restorer.c | 18 ++++----
> 6 files changed, 112 insertions(+), 120 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 3c29fcc..44d7cdc 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -58,11 +58,11 @@
> struct pipe_info {
> unsigned int pipeid;
> int pid;
> - u32 real_pid; /* futex */
> int read_fd;
> int write_fd;
> int status;
> - u32 users; /* futex */
> + futex_t real_pid;
> + futex_t users;
> };
>
> struct shmem_id {
> @@ -126,7 +126,7 @@ static void show_saved_pipes(void)
> for (i = 0; i < nr_pipes; i++)
> pr_info("\t\tpipeid %x pid %d users %d status %d\n",
> pipes[i].pipeid, pipes[i].pid,
> - pipes[i].users, pipes[i].status);
> + futex_get(&pipes[i].users), pipes[i].status);
> }
>
> static struct pipe_info *find_pipe(unsigned int pipeid)
> @@ -153,7 +153,7 @@ static int shmem_wait_and_open(int pid, struct shmem_info *si)
> si->pid, si->start, si->end);
>
> pr_info("%d: Waiting for [%s] to appear\n", pid, path);
> - cr_wait_until(&si->lock, 1);
> + futex_wait_until(&si->lock, 1);
>
> pr_info("%d: Opening shmem [%s] \n", pid, path);
> ret = open(path, O_RDWR);
> @@ -212,7 +212,7 @@ static int collect_shmem(int pid, struct vma_entry *vi)
> si->size = size;
> si->fd = -1;
>
> - cr_wait_init(&si->lock);
> + futex_init(&si->lock);
>
> return 0;
> }
> @@ -290,7 +290,7 @@ static int collect_pipe(int pid, struct pipe_entry *e, int p_fd)
> break;
> }
> } else
> - pipes[i].users++;
> + futex_inc(&pipes[i].users);
>
> return 0;
> }
> @@ -304,10 +304,11 @@ static int collect_pipe(int pid, struct pipe_entry *e, int p_fd)
>
> pipes[nr_pipes].pipeid = e->pipeid;
> pipes[nr_pipes].pid = pid;
> - pipes[nr_pipes].users = 0;
> pipes[nr_pipes].read_fd = -1;
> pipes[nr_pipes].write_fd = -1;
>
> + futex_init(&pipes[nr_pipes].users);
> +
> switch (e->flags & O_ACCMODE) {
> case O_RDONLY:
> pipes[nr_pipes].status = PIPE_RDONLY;
> @@ -399,7 +400,7 @@ static int prepare_shared(int ps_fd)
> return -1;
> }
> task_entries->nr = 0;
> - task_entries->start = CR_STATE_RESTORE;
> + futex_set(&task_entries->start, CR_STATE_RESTORE);
>
> pipes = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
> if (pipes == MAP_FAILED) {
> @@ -437,7 +438,7 @@ static int prepare_shared(int ps_fd)
> }
>
> if (!ret) {
> - task_entries->nr_in_progress = task_entries->nr;
> + futex_set(&task_entries->nr_in_progress, task_entries->nr);
>
> lseek(ps_fd, sizeof(u32), SEEK_SET);
>
> @@ -664,18 +665,18 @@ static int create_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int
> if (reopen_pipe(pfd[1], &pi->write_fd, &pi->read_fd, pipes_fd))
> return -1;
>
> - cr_wait_set(&pi->real_pid, pid);
> + futex_set_and_wake(&pi->real_pid, pid);
>
> pi->status |= PIPE_CREATED;
>
> pr_info("\t%d: Done, waiting for others (users %d) on %d pid with r:%d w:%d\n",
> - pid, pi->users, pi->real_pid, pi->read_fd, pi->write_fd);
> + pid, futex_get(&pi->users), futex_get(&pi->real_pid), pi->read_fd, pi->write_fd);
>
> if (!pipe_is_rw(pi)) {
> pr_info("\t%d: Waiting for %x pipe to attach (%d users left)\n",
> - pid, e->pipeid, pi->users);
> + pid, e->pipeid, futex_get(&pi->users));
>
> - cr_wait_until(&pi->users, 0);
> + futex_wait_until(&pi->users, 0);
>
> if ((e->flags & O_ACCMODE) == O_WRONLY)
> close_safe(&pi->read_fd);
> @@ -717,7 +718,7 @@ static int attach_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int
> pr_info("\t%d: Waiting for pipe %x to appear\n",
> pid, e->pipeid);
>
> - cr_wait_while(&pi->real_pid, 0);
> + futex_wait_while(&pi->real_pid, 0);
>
> if (move_img_fd(pipes_fd, e->fd))
> return -1;
> @@ -740,9 +741,9 @@ static int attach_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int
> goto out;
> }
>
> - sprintf(path, "/proc/%d/fd/%d", pi->real_pid, tmp);
> + sprintf(path, "/proc/%d/fd/%d", futex_get(&pi->real_pid), tmp);
> pr_info("\t%d: Attaching pipe %s (%d users left)\n",
> - pid, path, pi->users - 1);
> + pid, path, futex_get(&pi->users) - 1);
>
> fd = open(path, e->flags);
> if (fd < 0) {
> @@ -754,7 +755,7 @@ static int attach_pipe(int pid, struct pipe_entry *e, struct pipe_info *pi, int
> if (reopen_fd_as(e->fd, fd))
> return -1;
>
> - cr_wait_dec(&pi->users);
> + futex_dec_and_wake(&pi->users);
> out:
> tmp = set_fd_flags(e->fd, e->flags);
> if (tmp < 0)
> @@ -973,13 +974,13 @@ static int restore_one_zombie(int pid, int exit_code)
> pr_info("Restoring zombie with %d code\n", exit_code);
>
> if (task_entries != NULL) {
> - cr_wait_dec(&task_entries->nr_in_progress);
> - cr_wait_while(&task_entries->start, CR_STATE_RESTORE);
> + futex_dec_and_wake(&task_entries->nr_in_progress);
> + futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
>
> zombie_prepare_signals();
>
> - cr_wait_dec(&task_entries->nr_in_progress);
> - cr_wait_while(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
> + futex_dec_and_wake(&task_entries->nr_in_progress);
> + futex_wait_while(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
> }
>
> if (exit_code & 0x7f) {
> @@ -1124,7 +1125,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> pr_err("%d killed by signal %d\n",
> siginfo->si_pid, siginfo->si_status);
>
> - cr_wait_set(&task_entries->nr_in_progress, -1);
> + futex_set_and_wake(&task_entries->nr_in_progress, -1);
> }
>
> static int restore_task_with_children(void *_arg)
> @@ -1259,7 +1260,9 @@ static int restore_root_task(int fd, struct cr_options *opts)
> return -1;
>
> pr_info("Wait until all tasks are restored\n");
> - ret = cr_wait_until_greater(&task_entries->nr_in_progress, 0);
> + futex_wait_while_gt(&task_entries->nr_in_progress, 0);
> + ret = (int)futex_get(&task_entries->nr_in_progress);
> +
> out:
> if (ret < 0) {
> pr_err("Someone can't be restored\n");
> @@ -1268,9 +1271,9 @@ out:
> return 1;
> }
>
> - cr_wait_set(&task_entries->nr_in_progress, task_entries->nr);
> - cr_wait_set(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
> - cr_wait_until(&task_entries->nr_in_progress, 0);
> + futex_set_and_wake(&task_entries->nr_in_progress, task_entries->nr);
> + futex_set_and_wake(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
> + futex_wait_until(&task_entries->nr_in_progress, 0);
>
> ret = sigaction(SIGCHLD, &old_act, NULL);
> if (ret < 0) {
> @@ -1279,7 +1282,7 @@ out:
> }
>
> pr_info("Go on!!!\n");
> - cr_wait_set(&task_entries->start, CR_STATE_COMPLETE);
> + futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
>
> if (!opts->restore_detach)
> wait(NULL);
> diff --git a/files.c b/files.c
> index 7055771..1c6fc11 100644
> --- a/files.c
> +++ b/files.c
> @@ -75,7 +75,8 @@ static int collect_fd(int pid, struct fdinfo_entry *e)
>
> le->pid = pid;
> le->fd = e->addr;
> - le->real_pid = 0;
> +
> + futex_init(&le->real_pid);
>
> for (i = 0; i < nr_fdinfo_descs; i++) {
> desc = &fdinfo_descs[i];
> @@ -83,7 +84,7 @@ static int collect_fd(int pid, struct fdinfo_entry *e)
> if (desc->id != e->id)
> continue;
>
> - fdinfo_descs[i].users++;
> + futex_inc(&fdinfo_descs[i].users);
> list_add(&le->list, &desc->list);
>
> if (fdinfo_descs[i].pid < pid)
> @@ -106,8 +107,8 @@ static int collect_fd(int pid, struct fdinfo_entry *e)
> desc->id = e->id;
> desc->addr = e->addr;
> desc->pid = pid;
> - desc->users = 1;
> INIT_LIST_HEAD(&desc->list);
> + futex_set(&desc->users, 1);
>
> list_add(&le->list, &desc->list);
> nr_fdinfo_descs++;
> @@ -275,7 +276,7 @@ static int open_transport_fd(int pid, struct fdinfo_entry *fe,
> transport_name_gen(&saddr, &sun_len, getpid(), fe->addr);
>
> pr_info("\t%d: Create transport fd for %lx users %d\n", pid,
> - fe->addr, fi->users);
> + fe->addr, futex_get(&fi->users));
>
> fle = find_fdinfo_list_entry(pid, fe->addr, fi);
>
> @@ -295,7 +296,7 @@ static int open_transport_fd(int pid, struct fdinfo_entry *fe,
> return -1;
>
> pr_info("Wake up fdinfo pid=%d fd=%d\n", fle->pid, fle->fd);
> - cr_wait_set(&fle->real_pid, getpid());
> + futex_set_and_wake(&fle->real_pid, getpid());
>
> return 0;
> }
> @@ -318,7 +319,7 @@ static int open_fd(int pid, struct fdinfo_entry *fe,
> if (reopen_fd_as((int)fe->addr, tmp))
> return -1;
>
> - if (fi->users == 1)
> + if (futex_get(&fi->users) == 1)
> goto out;
>
> sock = socket(PF_UNIX, SOCK_DGRAM, 0);
> @@ -327,24 +328,24 @@ static int open_fd(int pid, struct fdinfo_entry *fe,
> return -1;
> }
>
> - cr_wait_set(&fi->real_pid, getpid());
> + futex_set_and_wake(&fi->real_pid, getpid());
>
> pr_info("\t%d: Create fd for %lx users %d\n", pid,
> - fe->addr, fi->users);
> + fe->addr, futex_get(&fi->users));
>
> list_for_each_entry(fle, &fi->list, list) {
> int len;
>
> - fi->users--;
> + futex_dec(&fi->users);
>
> if (pid == fle->pid)
> continue;
>
> pr_info("Wait fdinfo pid=%d fd=%d\n", fle->pid, fle->fd);
> - cr_wait_while(&fle->real_pid, 0);
> + futex_wait_while(&fle->real_pid, 0);
>
> pr_info("Send fd %d to %s\n", (int)fe->addr, saddr.sun_path + 1);
> - transport_name_gen(&saddr, &len, fle->real_pid, fle->fd);
> + transport_name_gen(&saddr, &len, futex_get(&fle->real_pid), fle->fd);
>
> if (send_fd(sock, &saddr, len, fe->addr) < 0) {
> pr_perror("Can't send file descriptor");
> @@ -352,7 +353,7 @@ static int open_fd(int pid, struct fdinfo_entry *fe,
> }
> }
>
> - BUG_ON(fi->users);
> + BUG_ON(futex_get(&fi->users));
> close(sock);
> out:
> return 0;
> @@ -376,7 +377,7 @@ static int receive_fd(int pid, struct fdinfo_entry *fe, struct fdinfo_desc *fi)
> }
>
> pr_info("\t%d: Receive fd for %lx users %d\n", pid,
> - fe->addr, fi->users);
> + fe->addr, futex_get(&fi->users));
>
> tmp = recv_fd(fe->addr);
> if (tmp < 0) {
> @@ -426,7 +427,7 @@ static int open_fdinfo(int pid, struct fdinfo_entry *fe, int *fdinfo_fd, int sta
> return -1;
>
> pr_info("\t%d: Got fd for %lx users %d\n", pid,
> - fe->addr, fi->users);
> + fe->addr, futex_get(&fi->users));
>
> BUG_ON(fe->type != FDINFO_REG);
>
> diff --git a/include/files.h b/include/files.h
> index e040380..fb8d952 100644
> --- a/include/files.h
> +++ b/include/files.h
> @@ -3,6 +3,7 @@
>
> #include "compiler.h"
> #include "types.h"
> +#include "lock.h"
> #include "list.h"
> #include "image.h"
>
> @@ -25,8 +26,8 @@ struct fdinfo_desc {
> u64 id;
> u64 addr;
> int pid;
> - u32 real_pid; /* futex */
> - u32 users; /* futex */
> + futex_t real_pid;
> + futex_t users;
> struct list_head list;
> };
>
> @@ -34,7 +35,7 @@ struct fdinfo_list_entry {
> struct list_head list;
> int fd;
> int pid;
> - u32 real_pid;
> + futex_t real_pid;
> };
>
> extern int prepare_fds(int pid);
> diff --git a/include/lock.h b/include/lock.h
> index 157e2f3..1a5eb6b 100644
> --- a/include/lock.h
> +++ b/include/lock.h
> @@ -11,86 +11,73 @@
> #include "syscall.h"
> #include "util.h"
>
> -/*
> - * Init futex @v value
> - */
> -static always_inline void cr_wait_init(u32 *v)
> +typedef struct {
> + u32 raw;
> +} futex_t;
> +
> +/* Get current futex @f value */
> +static inline u32 futex_get(futex_t *f)
> {
> - u32 val = 0;
> - atomic_set(v, val);
> + return atomic_get(&f->raw);
> }
>
> -/*
> - * Set futex @v value to @val and wake up all waiters
> - */
> -static always_inline void cr_wait_set(u32 *v, u32 val)
> +/* Set futex @f value to @v */
> +static inline void futex_set(futex_t *f, u32 v)
> {
> - int ret;
> -
> - atomic_set(v, val);
> -
> - ret = sys_futex(v, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
> - BUG_ON(ret < 0);
> + atomic_set(&f->raw, v);
> }
>
> -/*
> - * Decrement futex @v value to @val and wake up all waiters
> - */
> -static always_inline void cr_wait_dec(u32 *v)
> +#define futex_init(f) futex_set(f, 0)
> +
> +/* Wait on futex @__f value @__v become in condition @__c */
> +#define futex_wait_if_cond(__f, __v, __cond) \
> + do { \
> + int ret; \
> + u32 tmp; \
> + \
> + while (1) { \
> + tmp = (__f)->raw; \
> + if (tmp __cond (__v)) \
> + break; \
> + ret = sys_futex(&(__f)->raw, FUTEX_WAIT,\
> + tmp, NULL, NULL, 0); \
> + BUG_ON(ret < 0 && ret != -EWOULDBLOCK); \
> + } \
> + } while (0)
> +
> +/* Set futex @f to @v and wake up all waiters */
> +static inline void futex_set_and_wake(futex_t *f, u32 v)
> {
> - int ret;
> -
> - atomic_dec(v);
> -
> - ret = sys_futex(v, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
> - BUG_ON(ret < 0);
> + atomic_set(&f->raw, v);
> + BUG_ON(sys_futex(&f->raw, FUTEX_WAKE, INT_MAX, NULL, NULL, 0) < 0);
> }
>
> -/*
> - * Wait until futex @v value become @val
> - */
> -static always_inline void cr_wait_until(u32 *v, u32 val)
> +/* Decrement futex @f value and wake up all waiters */
> +static inline void futex_dec_and_wake(futex_t *f)
> {
> - int ret;
> - u32 tmp;
> -
> - while (1) {
> - tmp = *v;
> - if (tmp == val)
> - break;
> - ret = sys_futex(v, FUTEX_WAIT, tmp, NULL, NULL, 0);
> - BUG_ON(ret < 0 && ret != -EWOULDBLOCK);
> - }
> + atomic_dec(&f->raw);
> + BUG_ON(sys_futex(&f->raw, FUTEX_WAKE, INT_MAX, NULL, NULL, 0) < 0);
> }
>
> -/*
> - * Wait until futex @v value greater than @val
> - */
> -static always_inline s32 cr_wait_until_greater(u32 *v, s32 val)
> -{
> - int ret;
> - s32 tmp;
> +/* Plain increment futex @f value */
> +static inline void futex_inc(futex_t *f) { f->raw++; }
>
> - while (1) {
> - tmp = *v;
> - if (tmp <= val)
> - break;
> - ret = sys_futex(v, FUTEX_WAIT, tmp, NULL, NULL, 0);
> - BUG_ON(ret < 0 && ret != -EWOULDBLOCK);
> - }
> +/* Plain decrement futex @f value */
> +static inline void futex_dec(futex_t *f) { f->raw--; }
>
> - return tmp;
> -}
> +/* Wait until futex @f value become @v */
> +static inline void futex_wait_until(futex_t *f, u32 v)
> +{ futex_wait_if_cond(f, v, ==); }
>
> -/*
> - * Wait while futex @v value is @val
> - */
> -static always_inline void cr_wait_while(u32 *v, u32 val)
> -{
> - int ret;
> +/* Wait while futex @f value is greater than @v */
> +static inline void futex_wait_while_gt(futex_t *f, u32 v)
> +{ futex_wait_if_cond(f, v, <=); }
>
> - while (*v == val) {
> - ret = sys_futex(v, FUTEX_WAIT, val, NULL, NULL, 0);
> +/* Wait while futex @f value is @v */
> +static inline void futex_wait_while(futex_t *f, u32 v)
> +{
> + while (f->raw == v) {
> + int ret = sys_futex(&f->raw, FUTEX_WAIT, v, NULL, NULL, 0);
> BUG_ON(ret < 0 && ret != -EWOULDBLOCK);
> }
> }
> diff --git a/include/restorer.h b/include/restorer.h
> index 5801792..639e9f9 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -198,7 +198,7 @@ struct shmem_info {
> unsigned long size;
> int pid;
> int fd;
> - u32 lock; /* futex */
> + futex_t lock;
> };
>
> struct shmems {
> @@ -216,8 +216,8 @@ enum {
>
> struct task_entries {
> int nr;
> - u32 nr_in_progress;
> - u32 start; //futex
> + futex_t nr_in_progress;
> + futex_t start;
> };
>
> static always_inline struct shmem_info *
> diff --git a/restorer.c b/restorer.c
> index 44233e6..889cd2f 100644
> --- a/restorer.c
> +++ b/restorer.c
> @@ -46,7 +46,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> write_string(" killed by signal ");
> write_num_n(siginfo->si_status);
>
> - cr_wait_set(&task_entries->nr_in_progress, -1);
> + futex_set_and_wake(&task_entries->nr_in_progress, -1);
> /* sa_restorer may be unmaped, so we can't go back to userspace*/
> sys_kill(sys_getpid(), SIGSTOP);
> sys_exit(1);
> @@ -209,13 +209,13 @@ long restore_thread(struct thread_restore_args *args)
> */
>
> restore_creds(NULL);
> - cr_wait_dec(&task_entries->nr_in_progress);
> + futex_dec_and_wake(&task_entries->nr_in_progress);
>
> write_num(sys_gettid());
> write_string_n(": Restored");
>
> - cr_wait_while(&task_entries->start, CR_STATE_RESTORE);
> - cr_wait_dec(&task_entries->nr_in_progress);
> + futex_wait_while(&task_entries->start, CR_STATE_RESTORE);
> + futex_dec_and_wake(&task_entries->nr_in_progress);
>
> new_sp = (long)rt_sigframe + 8;
> asm volatile(
> @@ -423,7 +423,7 @@ long restore_task(struct task_restore_core_args *args)
> vma_entry->shmid);
> if (entry && entry->pid == my_pid &&
> entry->start == vma_entry->start)
> - cr_wait_set(&entry->lock, 1);
> + futex_set_and_wake(&entry->lock, 1);
> }
>
> if (vma_entry->prot & PROT_WRITE)
> @@ -652,20 +652,20 @@ long restore_task(struct task_restore_core_args *args)
>
> restore_creds(&args->creds);
>
> - cr_wait_dec(&args->task_entries->nr_in_progress);
> + futex_dec_and_wake(&args->task_entries->nr_in_progress);
>
> write_num(sys_getpid());
> write_string_n(": Restored");
>
> - cr_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
> + futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
>
> sys_sigaction(SIGCHLD, &args->sigchld_act, NULL);
>
> - cr_wait_dec(&args->task_entries->nr_in_progress);
> + futex_dec_and_wake(&args->task_entries->nr_in_progress);
>
> sys_close(args->logfd);
>
> - cr_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
> + futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
>
> /*
> * The code that prepared the itimers makes shure the
> --
> 1.7.7.6
>
> .
>
More information about the CRIU
mailing list