[CRIU] [PATCH] rst-malloc: introduce rst_mem_align

Tycho Andersen tycho.andersen at canonical.com
Wed Feb 10 14:20:17 PST 2016


On Wed, Feb 10, 2016 at 01:45:56PM -0800, Andrew Vagin wrote:
> On Wed, Feb 10, 2016 at 11:49:10AM -0700, Tycho Andersen wrote:
> > Since we need to align some allocations (but not most of them), let's
> > switch to a helper called rst_mem_align, which aligns the free list pointer
> > for the next allocation.
> > 
> > v2: always rst_mem_align() before the beginning of each "set" of
> >     allocations; add a note about recommended usage of rst_mem_align().
> >
> 
> Acked-by: Andrew Vagin <avagin at virtuozzo.com>
> 
> I think we need to merge rst_mem_align and rst_mem_cpos. We can rename
> rst_mem_cpos to rst_mem_align_cpos() and do everything in this function.

I thought about this, but then I talked myself out of it thinking that
there may be a case where you want to check the position but don't
want to change alignment. I'm not sure why you'd ever want to do this
though, and it does make the API more painful to use, and since
alignment is idempotent anyway perhaps we should just do that. I'll
send another version.

Tycho

> Thanks,
> Andrew
>  
> > Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> > CC: Cyrill Gorcunov <gorcunov at gmail.com>
> > CC: Andrey Vagin <avagin at openvz.org>
> > ---
> >  cr-restore.c         | 31 +++++++++++++++++++++----------
> >  include/rst-malloc.h | 13 ++++++++++---
> >  rst-malloc.c         | 28 ++++++++--------------------
> >  sk-tcp.c             |  3 ++-
> >  timerfd.c            |  3 ++-
> >  5 files changed, 43 insertions(+), 35 deletions(-)
> > 
> > diff --git a/cr-restore.c b/cr-restore.c
> > index 2155dbc..ec16626 100644
> > --- a/cr-restore.c
> > +++ b/cr-restore.c
> > @@ -775,7 +775,7 @@ static int collect_child_pids(int state, int *n)
> >  		if (pi->state != state)
> >  			continue;
> >  
> > -		child = rst_mem_alloc_cont(sizeof(*child), RM_PRIVATE);
> > +		child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
> >  		if (!child)
> >  			return -1;
> >  
> > @@ -788,12 +788,14 @@ static int collect_child_pids(int state, int *n)
> >  
> >  static int collect_helper_pids()
> >  {
> > +	rst_mem_align(RM_PRIVATE);
> >  	helpers_pos = rst_mem_cpos(RM_PRIVATE);
> >  	return collect_child_pids(TASK_HELPER, &n_helpers);
> >  }
> >  
> >  static int collect_zombie_pids()
> >  {
> > +	rst_mem_align(RM_PRIVATE);
> >  	zombies_pos = rst_mem_cpos(RM_PRIVATE);
> >  	return collect_child_pids(TASK_DEAD, &n_zombies);
> >  }
> > @@ -2133,6 +2135,7 @@ out:
> >  
> >  static int prepare_task_entries(void)
> >  {
> > +	rst_mem_align(RM_SHREMAP);
> >  	task_entries_pos = rst_mem_cpos(RM_SHREMAP);
> >  	task_entries = rst_mem_alloc(sizeof(*task_entries), RM_SHREMAP);
> >  	if (!task_entries) {
> > @@ -2432,7 +2435,7 @@ static int prepare_posix_timers_from_fd(int pid)
> >  		if (ret <= 0)
> >  			break;
> >  
> > -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
> >  		if (!t)
> >  			break;
> >  
> > @@ -2457,6 +2460,7 @@ static int prepare_posix_timers(int pid, CoreEntry *core)
> >  	TaskTimersEntry *tte = core->tc->timers;
> >  	struct restore_posix_timer *t;
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	posix_timers_cpos = rst_mem_cpos(RM_PRIVATE);
> >  
> >  	if (!tte)
> > @@ -2464,7 +2468,7 @@ static int prepare_posix_timers(int pid, CoreEntry *core)
> >  
> >  	posix_timers_nr = tte->n_posix;
> >  	for (i = 0; i < posix_timers_nr; i++) {
> > -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> > +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
> >  		if (!t)
> >  			goto out;
> >  
> > @@ -2623,7 +2627,7 @@ static int prepare_rlimits_from_fd(int pid)
> >  		if (ret <= 0)
> >  			break;
> >  
> > -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> > +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
> >  		if (!r) {
> >  			pr_err("Can't allocate memory for resource %d\n",
> >  			       rlims_nr);
> > @@ -2654,13 +2658,14 @@ static int prepare_rlimits(int pid, CoreEntry *core)
> >  	TaskRlimitsEntry *rls = core->tc->rlimits;
> >  	struct rlimit *r;
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	rlims_cpos = rst_mem_cpos(RM_PRIVATE);
> >  
> >  	if (!rls)
> >  		return prepare_rlimits_from_fd(pid);
> >  
> >  	for (i = 0; i < rls->n_rlimits; i++) {
> > -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> > +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
> >  		if (!r) {
> >  			pr_err("Can't allocate memory for resource %d\n", i);
> >  			return -1;
> > @@ -2684,7 +2689,7 @@ static int signal_to_mem(SiginfoEntry *sie)
> >  	siginfo_t *info, *t;
> >  
> >  	info = (siginfo_t *) sie->siginfo.data;
> > -	t = rst_mem_alloc_cont(sizeof(siginfo_t), RM_PRIVATE);
> > +	t = rst_mem_alloc(sizeof(siginfo_t), RM_PRIVATE);
> >  	if (!t)
> >  		return -1;
> >  
> > @@ -2802,7 +2807,7 @@ static void rst_reloc_creds(struct thread_restore_args *thread_args,
> >  static struct thread_creds_args *
> >  rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> >  {
> > -	unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
> > +	unsigned long this_pos;
> >  	struct thread_creds_args *args;
> >  
> >  	if (!verify_cap_size(ce)) {
> > @@ -2812,8 +2817,10 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> >  		return ERR_PTR(-EINVAL);
> >  	}
> >  
> > +	rst_mem_align(RM_PRIVATE);
> > +	this_pos = rst_mem_cpos(RM_PRIVATE);
> >  
> > -	args = rst_mem_alloc_cont(sizeof(*args), RM_PRIVATE);
> > +	args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
> >  	if (!args)
> >  		return ERR_PTR(-ENOMEM);
> >  
> > @@ -2838,6 +2845,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> >  			size_t lsm_profile_len;
> >  			char *lsm_profile;
> >  
> > +			rst_mem_align(RM_PRIVATE);
> >  			args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
> >  			lsm_profile_len = strlen(rendered);
> >  			lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
> > @@ -2874,6 +2882,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
> >  	if (ce->n_groups) {
> >  		unsigned int *groups;
> >  
> > +		rst_mem_align(RM_PRIVATE);
> >  		args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
> >  		groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
> >  		if (!groups)
> > @@ -3016,11 +3025,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> >  	 * walk them and m(un|re)map.
> >  	 */
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	tgt_vmas = rst_mem_cpos(RM_PRIVATE);
> >  	list_for_each_entry(vma, &vmas->h, list) {
> >  		VmaEntry *vme;
> >  
> > -		vme = rst_mem_alloc_cont(sizeof(*vme), RM_PRIVATE);
> > +		vme = rst_mem_alloc(sizeof(*vme), RM_PRIVATE);
> >  		if (!vme)
> >  			goto err_nv;
> >  
> > @@ -3034,11 +3044,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> >  	 * Put info about AIO rings, they will get remapped
> >  	 */
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	aio_rings = rst_mem_cpos(RM_PRIVATE);
> >  	for (i = 0; i < mm->n_aios; i++) {
> >  		struct rst_aio_ring *raio;
> >  
> > -		raio = rst_mem_alloc_cont(sizeof(*raio), RM_PRIVATE);
> > +		raio = rst_mem_alloc(sizeof(*raio), RM_PRIVATE);
> >  		if (!raio)
> >  			goto err_nv;
> >  
> > diff --git a/include/rst-malloc.h b/include/rst-malloc.h
> > index f205622..9a47b33 100644
> > --- a/include/rst-malloc.h
> > +++ b/include/rst-malloc.h
> > @@ -47,8 +47,8 @@ enum {
> >  extern void rst_mem_switch_to_private(void);
> >  /*
> >   * Reports a cookie of a current shared buffer position, that
> > - * can later be used in rst_mem_cpos() to find out the object
> > - * pointer.
> > + * can later be used in rst_mem_remap_ptr() to find out the object
> > + * pointer in the restorer blob.
> >   */
> >  extern unsigned long rst_mem_cpos(int type);
> >  extern void *rst_mem_remap_ptr(unsigned long pos, int type);
> > @@ -58,8 +58,15 @@ extern void *rst_mem_remap_ptr(unsigned long pos, int type);
> >   * last object can be freed (pop-ed from buffer).
> >   */
> >  extern void *rst_mem_alloc(unsigned long size, int type);
> > -extern void *rst_mem_alloc_cont(unsigned long size, int type);
> >  extern void rst_mem_free_last(int type);
> > +
> > +/* Word-align the current freelist pointer for the next allocation. If we don't
> > + * align pointers, some futex and atomic operations can fail. Additionally,
> > + * there are performance benefits to aligning arrays, so it is recommended to
> > + * align before calling rst_mem_cpos() in most cases.
> > + */
> > +extern void rst_mem_align(int type);
> > +
> >  /*
> >   * Routines to remap SHREMAP and PRIVATE into restorer address space
> >   */
> > diff --git a/rst-malloc.c b/rst-malloc.c
> > index 8e17255..f4afcaf 100644
> > --- a/rst-malloc.c
> > +++ b/rst-malloc.c
> > @@ -128,7 +128,7 @@ unsigned long rst_mem_cpos(int type)
> >  {
> >  	struct rst_mem_type_s *t = &rst_mems[type];
> >  	BUG_ON(!t->remapable || !t->enabled);
> > -	return ((void*) round_up((unsigned long)t->free_mem, sizeof(void *))) - t->buf;
> > +	return t->free_mem - t->buf;
> >  }
> >  
> >  void *rst_mem_remap_ptr(unsigned long pos, int type)
> > @@ -138,7 +138,7 @@ void *rst_mem_remap_ptr(unsigned long pos, int type)
> >  	return t->buf + pos;
> >  }
> >  
> > -static void *__rst_mem_alloc(unsigned long size, int type)
> > +void *rst_mem_alloc(unsigned long size, int type)
> >  {
> >  	struct rst_mem_type_s *t = &rst_mems[type];
> >  	void *ret;
> > @@ -158,24 +158,6 @@ static void *__rst_mem_alloc(unsigned long size, int type)
> >  	return ret;
> >  }
> >  
> > -void *rst_mem_alloc(unsigned long size, int type)
> > -{
> > -	struct rst_mem_type_s *t = &rst_mems[type];
> > -
> > -	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> > -
> > -	return __rst_mem_alloc(size, type);
> > -}
> > -
> > -/* Allocate memory without gaps with a previous slice */
> > -void *rst_mem_alloc_cont(unsigned long size, int type)
> > -{
> > -	struct rst_mem_type_s *t = &rst_mems[type];
> > -	BUG_ON(!t->remapable);
> > -
> > -	return __rst_mem_alloc(size, type);
> > -}
> > -
> >  void rst_mem_free_last(int type)
> >  {
> >  	struct rst_mem_type_s *t = &rst_mems[type];
> > @@ -187,6 +169,12 @@ void rst_mem_free_last(int type)
> >  	t->last = 0; /* next free_last would be no-op */
> >  }
> >  
> > +void rst_mem_align(int type)
> > +{
> > +	struct rst_mem_type_s *t = &rst_mems[type];
> > +	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> > +}
> > +
> >  unsigned long rst_mem_lock(void)
> >  {
> >  	/*
> > diff --git a/sk-tcp.c b/sk-tcp.c
> > index 8e17024..4d4cf83 100644
> > --- a/sk-tcp.c
> > +++ b/sk-tcp.c
> > @@ -678,6 +678,7 @@ int rst_tcp_socks_prep(void)
> >  {
> >  	struct inet_sk_info *ii;
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	rst_tcp_socks_cpos = rst_mem_cpos(RM_PRIVATE);
> >  	list_for_each_entry(ii, &rst_tcp_repair_sockets, rlist) {
> >  		struct rst_tcp_sock *rs;
> > @@ -689,7 +690,7 @@ int rst_tcp_socks_prep(void)
> >  		if (ii->sk_fd == -1)
> >  			continue;
> >  
> > -		rs = rst_mem_alloc_cont(sizeof(*rs), RM_PRIVATE);
> > +		rs = rst_mem_alloc(sizeof(*rs), RM_PRIVATE);
> >  		if (!rs)
> >  			return -1;
> >  
> > diff --git a/timerfd.c b/timerfd.c
> > index b91e047..6ca1229 100644
> > --- a/timerfd.c
> > +++ b/timerfd.c
> > @@ -115,11 +115,12 @@ int rst_timerfd_prep(void)
> >  	struct timerfd_info *ti;
> >  	struct restore_timerfd *t;
> >  
> > +	rst_mem_align(RM_PRIVATE);
> >  	rst_timerfd_cpos = rst_mem_cpos(RM_PRIVATE);
> >  	list_for_each_entry(ti, &rst_timerfds, rlist) {
> >  		TimerfdEntry *tfe = ti->tfe;
> >  
> > -		t = rst_mem_alloc_cont(sizeof(*t), RM_PRIVATE);
> > +		t = rst_mem_alloc(sizeof(*t), RM_PRIVATE);
> >  		if (!t)
> >  			return -1;
> >  
> > -- 
> > 2.5.0
> > 


More information about the CRIU mailing list