[CRIU] [PATCH] rst-malloc: introduce rst_mem_align

Tycho Andersen tycho.andersen at canonical.com
Wed Feb 10 08:54:06 PST 2016


On Wed, Feb 10, 2016 at 08:48:47AM -0800, Andrew Vagin wrote:
> On Wed, Feb 10, 2016 at 09:43:23AM -0700, Tycho Andersen wrote:
> > On Wed, Feb 10, 2016 at 08:38:24AM -0800, Andrew Vagin wrote:
> > > On Wed, Feb 10, 2016 at 09:06:36AM -0700, Tycho Andersen wrote:
> > > > Since we need to align some allocations (but not most of them), let's
> > > > switch to a helper called rst_mem_align, which aligns the free list pointer
> > > > for the next allocation.
> > > 
> > > When should we call rst_mem_align()?
> > 
> > I'm not sure, actually. I guess whenever there is going to be an
> > argument that will be passed to futex or atomic?
> > 
> > > Why do we not call it before each call of rst_mem_cpos()
> > 
> > I just translated all calls:
> > 
> > rst_mem_alloc() => rst_mem_align(); rst_mem_alloc()
> > rst_mem_alloc_cont() => rst_mem_alloc()
> > 
> > so in principle it should be the same as before (except the cpos is
> > always reported correctly).
> 
> It's wrong. I think we need to align a start address of an array too.
> 
> I mean that rst_mem_alloc() should return alligned address after
> rst_mem_cpos().
> 
> Initially I aligned an addess which is returned from rst_mem_cpos(), but
> then we decided that it isn't required. Now we see that it was a wrong
> idea.

rst_mem_cpos() and rst_mem_alloc() should always return the same
thing, otherwise it's a bug.

(leaving the discussion of when to align to the other thread)

> > 
> > Tycho
> > 
> > > > 
> > > > Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> > > > CC: Cyrill Gorcunov <gorcunov at gmail.com>
> > > > CC: Andrey Vagin <avagin at openvz.org>
> > > > ---
> > > >  cr-restore.c         | 21 ++++++++++++---------
> > > >  include/rst-malloc.h | 11 ++++++++---
> > > >  rst-malloc.c         | 28 ++++++++--------------------
> > > >  sk-tcp.c             |  2 +-
> > > >  timerfd.c            |  2 +-
> > > >  5 files changed, 30 insertions(+), 34 deletions(-)
> > > > 
> > > > diff --git a/cr-restore.c b/cr-restore.c
> > > > index 2155dbc..46b300b 100644
> > > > --- a/cr-restore.c
> > > > +++ b/cr-restore.c
> > > > @@ -775,7 +775,7 @@ static int collect_child_pids(int state, int *n)
> > > >  		if (pi->state != state)
> > > >  			continue;
> > > >  
> > > > -		child = rst_mem_alloc_cont(sizeof(*child), RM_PRIVATE);
> > > > +		child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
> > > >  		if (!child)
> > > >  			return -1;
> > > >  
> > > > @@ -2133,6 +2133,7 @@ out:
> > > >  
> > > >  static int prepare_task_entries(void)
> > > >  {
> > > > +	rst_mem_align(RM_SHREMAP);
> > > >  	task_entries_pos = rst_mem_cpos(RM_SHREMAP);
> > > >  	task_entries = rst_mem_alloc(sizeof(*task_entries), RM_SHREMAP);
> > > >  	if (!task_entries) {
> > > > @@ -2432,7 +2433,7 @@ static int prepare_posix_timers_from_fd(int pid)
> > > >  		if (ret <= 0)
> > > >  			break;
> > > >  
> > > > -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > > > +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > > >  		if (!t)
> > > >  			break;
> > > >  
> > > > @@ -2464,7 +2465,7 @@ static int prepare_posix_timers(int pid, CoreEntry *core)
> > > >  
> > > >  	posix_timers_nr = tte->n_posix;
> > > >  	for (i = 0; i < posix_timers_nr; i++) {
> > > > -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > > > +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > > >  		if (!t)
> > > >  			goto out;
> > > >  
> > > > @@ -2623,7 +2624,7 @@ static int prepare_rlimits_from_fd(int pid)
> > > >  		if (ret <= 0)
> > > >  			break;
> > > >  
> > > > -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> > > > +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
> > > >  		if (!r) {
> > > >  			pr_err("Can't allocate memory for resource %d\n",
> > > >  			       rlims_nr);
> > > > @@ -2660,7 +2661,7 @@ static int prepare_rlimits(int pid, CoreEntry *core)
> > > >  		return prepare_rlimits_from_fd(pid);
> > > >  
> > > >  	for (i = 0; i < rls->n_rlimits; i++) {
> > > > -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> > > > +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
> > > >  		if (!r) {
> > > >  			pr_err("Can't allocate memory for resource %d\n", i);
> > > >  			return -1;
> > > > @@ -2684,7 +2685,7 @@ static int signal_to_mem(SiginfoEntry *sie)
> > > >  	siginfo_t *info, *t;
> > > >  
> > > >  	info = (siginfo_t *) sie->siginfo.data;
> > > > -	t = rst_mem_alloc_cont(sizeof(siginfo_t), RM_PRIVATE);
> > > > +	t = rst_mem_alloc(sizeof(siginfo_t), RM_PRIVATE);
> > > >  	if (!t)
> > > >  		return -1;
> > > >  
> > > > @@ -2813,7 +2814,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> > > >  	}
> > > >  
> > > >  
> > > > -	args = rst_mem_alloc_cont(sizeof(*args), RM_PRIVATE);
> > > > +	args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
> > > >  	if (!args)
> > > >  		return ERR_PTR(-ENOMEM);
> > > >  
> > > > @@ -2838,6 +2839,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> > > >  			size_t lsm_profile_len;
> > > >  			char *lsm_profile;
> > > >  
> > > > +			rst_mem_align(RM_PRIVATE);
> > > >  			args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
> > > >  			lsm_profile_len = strlen(rendered);
> > > >  			lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
> > > > @@ -2874,6 +2876,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> > > >  	if (ce->n_groups) {
> > > >  		unsigned int *groups;
> > > >  
> > > > +		rst_mem_align(RM_PRIVATE);
> > > >  		args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
> > > >  		groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
> > > >  		if (!groups)
> > > > @@ -3020,7 +3023,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> > > >  	list_for_each_entry(vma, &vmas->h, list) {
> > > >  		VmaEntry *vme;
> > > >  
> > > > -		vme = rst_mem_alloc_cont(sizeof(*vme), RM_PRIVATE);
> > > > +		vme = rst_mem_alloc(sizeof(*vme), RM_PRIVATE);
> > > >  		if (!vme)
> > > >  			goto err_nv;
> > > >  
> > > > @@ -3038,7 +3041,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> > > >  	for (i = 0; i < mm->n_aios; i++) {
> > > >  		struct rst_aio_ring *raio;
> > > >  
> > > > -		raio = rst_mem_alloc_cont(sizeof(*raio), RM_PRIVATE);
> > > > +		raio = rst_mem_alloc(sizeof(*raio), RM_PRIVATE);
> > > >  		if (!raio)
> > > >  			goto err_nv;
> > > >  
> > > > diff --git a/include/rst-malloc.h b/include/rst-malloc.h
> > > > index f205622..9b400bb 100644
> > > > --- a/include/rst-malloc.h
> > > > +++ b/include/rst-malloc.h
> > > > @@ -47,8 +47,8 @@ enum {
> > > >  extern void rst_mem_switch_to_private(void);
> > > >  /*
> > > >   * Reports a cookie of a current shared buffer position, that
> > > > - * can later be used in rst_mem_cpos() to find out the object
> > > > - * pointer.
> > > > + * can later be used in rst_mem_remap_ptr() to find out the object
> > > > + * pointer in the restorer blob.
> > > >   */
> > > >  extern unsigned long rst_mem_cpos(int type);
> > > >  extern void *rst_mem_remap_ptr(unsigned long pos, int type);
> > > > @@ -58,8 +58,13 @@ extern void *rst_mem_remap_ptr(unsigned long pos, int type);
> > > >   * last object can be freed (pop-ed from buffer).
> > > >   */
> > > >  extern void *rst_mem_alloc(unsigned long size, int type);
> > > > -extern void *rst_mem_alloc_cont(unsigned long size, int type);
> > > >  extern void rst_mem_free_last(int type);
> > > > +
> > > > +/* Word-align the current freelist pointer for the next allocation. If we don't
> > > > + * align pointers, some futex and atomic operations can fail.
> > > > + */
> > > > +extern void rst_mem_align(int type);
> > > > +
> > > >  /*
> > > >   * Routines to remap SHREMAP and PRIVATE into restorer address space
> > > >   */
> > > > diff --git a/rst-malloc.c b/rst-malloc.c
> > > > index 8e17255..f4afcaf 100644
> > > > --- a/rst-malloc.c
> > > > +++ b/rst-malloc.c
> > > > @@ -128,7 +128,7 @@ unsigned long rst_mem_cpos(int type)
> > > >  {
> > > >  	struct rst_mem_type_s *t = &rst_mems[type];
> > > >  	BUG_ON(!t->remapable || !t->enabled);
> > > > -	return ((void*) round_up((unsigned long)t->free_mem, sizeof(void *))) - t->buf;
> > > > +	return t->free_mem - t->buf;
> > > >  }
> > > >  
> > > >  void *rst_mem_remap_ptr(unsigned long pos, int type)
> > > > @@ -138,7 +138,7 @@ void *rst_mem_remap_ptr(unsigned long pos, int type)
> > > >  	return t->buf + pos;
> > > >  }
> > > >  
> > > > -static void *__rst_mem_alloc(unsigned long size, int type)
> > > > +void *rst_mem_alloc(unsigned long size, int type)
> > > >  {
> > > >  	struct rst_mem_type_s *t = &rst_mems[type];
> > > >  	void *ret;
> > > > @@ -158,24 +158,6 @@ static void *__rst_mem_alloc(unsigned long size, int type)
> > > >  	return ret;
> > > >  }
> > > >  
> > > > -void *rst_mem_alloc(unsigned long size, int type)
> > > > -{
> > > > -	struct rst_mem_type_s *t = &rst_mems[type];
> > > > -
> > > > -	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> > > > -
> > > > -	return __rst_mem_alloc(size, type);
> > > > -}
> > > > -
> > > > -/* Allocate memory without gaps with a previous slice */
> > > > -void *rst_mem_alloc_cont(unsigned long size, int type)
> > > > -{
> > > > -	struct rst_mem_type_s *t = &rst_mems[type];
> > > > -	BUG_ON(!t->remapable);
> > > > -
> > > > -	return __rst_mem_alloc(size, type);
> > > > -}
> > > > -
> > > >  void rst_mem_free_last(int type)
> > > >  {
> > > >  	struct rst_mem_type_s *t = &rst_mems[type];
> > > > @@ -187,6 +169,12 @@ void rst_mem_free_last(int type)
> > > >  	t->last = 0; /* next free_last would be no-op */
> > > >  }
> > > >  
> > > > +void rst_mem_align(int type)
> > > > +{
> > > > +	struct rst_mem_type_s *t = &rst_mems[type];
> > > > +	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> > > > +}
> > > > +
> > > >  unsigned long rst_mem_lock(void)
> > > >  {
> > > >  	/*
> > > > diff --git a/sk-tcp.c b/sk-tcp.c
> > > > index 8e17024..dc95d3d 100644
> > > > --- a/sk-tcp.c
> > > > +++ b/sk-tcp.c
> > > > @@ -689,7 +689,7 @@ int rst_tcp_socks_prep(void)
> > > >  		if (ii->sk_fd == -1)
> > > >  			continue;
> > > >  
> > > > -		rs = rst_mem_alloc_cont(sizeof(*rs), RM_PRIVATE);
> > > > +		rs = rst_mem_alloc(sizeof(*rs), RM_PRIVATE);
> > > >  		if (!rs)
> > > >  			return -1;
> > > >  
> > > > diff --git a/timerfd.c b/timerfd.c
> > > > index b91e047..940190f 100644
> > > > --- a/timerfd.c
> > > > +++ b/timerfd.c
> > > > @@ -119,7 +119,7 @@ int rst_timerfd_prep(void)
> > > >  	list_for_each_entry(ti, &rst_timerfds, rlist) {
> > > >  		TimerfdEntry *tfe = ti->tfe;
> > > >  
> > > > -		t = rst_mem_alloc_cont(sizeof(*t), RM_PRIVATE);
> > > > +		t = rst_mem_alloc(sizeof(*t), RM_PRIVATE);
> > > >  		if (!t)
> > > >  			return -1;
> > > >  
> > > > -- 
> > > > 2.5.0
> > > > 


More information about the CRIU mailing list