[CRIU] [PATCH] rst-malloc: introduce rst_mem_align

Andrew Vagin avagin at virtuozzo.com
Wed Feb 10 13:45:56 PST 2016


On Wed, Feb 10, 2016 at 11:49:10AM -0700, Tycho Andersen wrote:
> Since we need to align some allocations (but not most of them), let's
> switch to a helper called rst_mem_align, which aligns the free list pointer
> for the next allocation.
> 
> v2: always rst_mem_align() before the beginning of each "set" of
>     allocations; add a note about recommended usage of rst_mem_align().
>

Acked-by: Andrew Vagin <avagin at virtuozzo.com>

I think we need to merge rst_mem_align and rst_mem_cpos. We can rename
rst_mem_cpos to rst_mem_align_cpos() and do everything in this function.

Thanks,
Andrew
 
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> CC: Cyrill Gorcunov <gorcunov at gmail.com>
> CC: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c         | 31 +++++++++++++++++++++----------
>  include/rst-malloc.h | 13 ++++++++++---
>  rst-malloc.c         | 28 ++++++++--------------------
>  sk-tcp.c             |  3 ++-
>  timerfd.c            |  3 ++-
>  5 files changed, 43 insertions(+), 35 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 2155dbc..ec16626 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -775,7 +775,7 @@ static int collect_child_pids(int state, int *n)
>  		if (pi->state != state)
>  			continue;
>  
> -		child = rst_mem_alloc_cont(sizeof(*child), RM_PRIVATE);
> +		child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
>  		if (!child)
>  			return -1;
>  
> @@ -788,12 +788,14 @@ static int collect_child_pids(int state, int *n)
>  
>  static int collect_helper_pids()
>  {
> +	rst_mem_align(RM_PRIVATE);
>  	helpers_pos = rst_mem_cpos(RM_PRIVATE);
>  	return collect_child_pids(TASK_HELPER, &n_helpers);
>  }
>  
>  static int collect_zombie_pids()
>  {
> +	rst_mem_align(RM_PRIVATE);
>  	zombies_pos = rst_mem_cpos(RM_PRIVATE);
>  	return collect_child_pids(TASK_DEAD, &n_zombies);
>  }
> @@ -2133,6 +2135,7 @@ out:
>  
>  static int prepare_task_entries(void)
>  {
> +	rst_mem_align(RM_SHREMAP);
>  	task_entries_pos = rst_mem_cpos(RM_SHREMAP);
>  	task_entries = rst_mem_alloc(sizeof(*task_entries), RM_SHREMAP);
>  	if (!task_entries) {
> @@ -2432,7 +2435,7 @@ static int prepare_posix_timers_from_fd(int pid)
>  		if (ret <= 0)
>  			break;
>  
> -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
>  		if (!t)
>  			break;
>  
> @@ -2457,6 +2460,7 @@ static int prepare_posix_timers(int pid, CoreEntry *core)
>  	TaskTimersEntry *tte = core->tc->timers;
>  	struct restore_posix_timer *t;
>  
> +	rst_mem_align(RM_PRIVATE);
>  	posix_timers_cpos = rst_mem_cpos(RM_PRIVATE);
>  
>  	if (!tte)
> @@ -2464,7 +2468,7 @@ static int prepare_posix_timers(int pid, CoreEntry *core)
>  
>  	posix_timers_nr = tte->n_posix;
>  	for (i = 0; i < posix_timers_nr; i++) {
> -		t = rst_mem_alloc_cont(sizeof(struct restore_posix_timer), RM_PRIVATE);
> +		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
>  		if (!t)
>  			goto out;
>  
> @@ -2623,7 +2627,7 @@ static int prepare_rlimits_from_fd(int pid)
>  		if (ret <= 0)
>  			break;
>  
> -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
>  		if (!r) {
>  			pr_err("Can't allocate memory for resource %d\n",
>  			       rlims_nr);
> @@ -2654,13 +2658,14 @@ static int prepare_rlimits(int pid, CoreEntry *core)
>  	TaskRlimitsEntry *rls = core->tc->rlimits;
>  	struct rlimit *r;
>  
> +	rst_mem_align(RM_PRIVATE);
>  	rlims_cpos = rst_mem_cpos(RM_PRIVATE);
>  
>  	if (!rls)
>  		return prepare_rlimits_from_fd(pid);
>  
>  	for (i = 0; i < rls->n_rlimits; i++) {
> -		r = rst_mem_alloc_cont(sizeof(*r), RM_PRIVATE);
> +		r = rst_mem_alloc(sizeof(*r), RM_PRIVATE);
>  		if (!r) {
>  			pr_err("Can't allocate memory for resource %d\n", i);
>  			return -1;
> @@ -2684,7 +2689,7 @@ static int signal_to_mem(SiginfoEntry *sie)
>  	siginfo_t *info, *t;
>  
>  	info = (siginfo_t *) sie->siginfo.data;
> -	t = rst_mem_alloc_cont(sizeof(siginfo_t), RM_PRIVATE);
> +	t = rst_mem_alloc(sizeof(siginfo_t), RM_PRIVATE);
>  	if (!t)
>  		return -1;
>  
> @@ -2802,7 +2807,7 @@ static void rst_reloc_creds(struct thread_restore_args *thread_args,
>  static struct thread_creds_args *
>  rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
>  {
> -	unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
> +	unsigned long this_pos;
>  	struct thread_creds_args *args;
>  
>  	if (!verify_cap_size(ce)) {
> @@ -2812,8 +2817,10 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
>  		return ERR_PTR(-EINVAL);
>  	}
>  
> +	rst_mem_align(RM_PRIVATE);
> +	this_pos = rst_mem_cpos(RM_PRIVATE);
>  
> -	args = rst_mem_alloc_cont(sizeof(*args), RM_PRIVATE);
> +	args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
>  	if (!args)
>  		return ERR_PTR(-ENOMEM);
>  
> @@ -2838,6 +2845,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
>  			size_t lsm_profile_len;
>  			char *lsm_profile;
>  
> +			rst_mem_align(RM_PRIVATE);
>  			args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
>  			lsm_profile_len = strlen(rendered);
>  			lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
> @@ -2874,6 +2882,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
>  	if (ce->n_groups) {
>  		unsigned int *groups;
>  
> +		rst_mem_align(RM_PRIVATE);
>  		args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
>  		groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
>  		if (!groups)
> @@ -3016,11 +3025,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	 * walk them and m(un|re)map.
>  	 */
>  
> +	rst_mem_align(RM_PRIVATE);
>  	tgt_vmas = rst_mem_cpos(RM_PRIVATE);
>  	list_for_each_entry(vma, &vmas->h, list) {
>  		VmaEntry *vme;
>  
> -		vme = rst_mem_alloc_cont(sizeof(*vme), RM_PRIVATE);
> +		vme = rst_mem_alloc(sizeof(*vme), RM_PRIVATE);
>  		if (!vme)
>  			goto err_nv;
>  
> @@ -3034,11 +3044,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	 * Put info about AIO rings, they will get remapped
>  	 */
>  
> +	rst_mem_align(RM_PRIVATE);
>  	aio_rings = rst_mem_cpos(RM_PRIVATE);
>  	for (i = 0; i < mm->n_aios; i++) {
>  		struct rst_aio_ring *raio;
>  
> -		raio = rst_mem_alloc_cont(sizeof(*raio), RM_PRIVATE);
> +		raio = rst_mem_alloc(sizeof(*raio), RM_PRIVATE);
>  		if (!raio)
>  			goto err_nv;
>  
> diff --git a/include/rst-malloc.h b/include/rst-malloc.h
> index f205622..9a47b33 100644
> --- a/include/rst-malloc.h
> +++ b/include/rst-malloc.h
> @@ -47,8 +47,8 @@ enum {
>  extern void rst_mem_switch_to_private(void);
>  /*
>   * Reports a cookie of a current shared buffer position, that
> - * can later be used in rst_mem_cpos() to find out the object
> - * pointer.
> + * can later be used in rst_mem_remap_ptr() to find out the object
> + * pointer in the restorer blob.
>   */
>  extern unsigned long rst_mem_cpos(int type);
>  extern void *rst_mem_remap_ptr(unsigned long pos, int type);
> @@ -58,8 +58,15 @@ extern void *rst_mem_remap_ptr(unsigned long pos, int type);
>   * last object can be freed (pop-ed from buffer).
>   */
>  extern void *rst_mem_alloc(unsigned long size, int type);
> -extern void *rst_mem_alloc_cont(unsigned long size, int type);
>  extern void rst_mem_free_last(int type);
> +
> +/* Word-align the current freelist pointer for the next allocation. If we don't
> + * align pointers, some futex and atomic operations can fail. Additionally,
> + * there are performance benefits to aligning arrays, so it is recommended to
> + * align before calling rst_mem_cpos() in most cases.
> + */
> +extern void rst_mem_align(int type);
> +
>  /*
>   * Routines to remap SHREMAP and PRIVATE into restorer address space
>   */
> diff --git a/rst-malloc.c b/rst-malloc.c
> index 8e17255..f4afcaf 100644
> --- a/rst-malloc.c
> +++ b/rst-malloc.c
> @@ -128,7 +128,7 @@ unsigned long rst_mem_cpos(int type)
>  {
>  	struct rst_mem_type_s *t = &rst_mems[type];
>  	BUG_ON(!t->remapable || !t->enabled);
> -	return ((void*) round_up((unsigned long)t->free_mem, sizeof(void *))) - t->buf;
> +	return t->free_mem - t->buf;
>  }
>  
>  void *rst_mem_remap_ptr(unsigned long pos, int type)
> @@ -138,7 +138,7 @@ void *rst_mem_remap_ptr(unsigned long pos, int type)
>  	return t->buf + pos;
>  }
>  
> -static void *__rst_mem_alloc(unsigned long size, int type)
> +void *rst_mem_alloc(unsigned long size, int type)
>  {
>  	struct rst_mem_type_s *t = &rst_mems[type];
>  	void *ret;
> @@ -158,24 +158,6 @@ static void *__rst_mem_alloc(unsigned long size, int type)
>  	return ret;
>  }
>  
> -void *rst_mem_alloc(unsigned long size, int type)
> -{
> -	struct rst_mem_type_s *t = &rst_mems[type];
> -
> -	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> -
> -	return __rst_mem_alloc(size, type);
> -}
> -
> -/* Allocate memory without gaps with a previous slice */
> -void *rst_mem_alloc_cont(unsigned long size, int type)
> -{
> -	struct rst_mem_type_s *t = &rst_mems[type];
> -	BUG_ON(!t->remapable);
> -
> -	return __rst_mem_alloc(size, type);
> -}
> -
>  void rst_mem_free_last(int type)
>  {
>  	struct rst_mem_type_s *t = &rst_mems[type];
> @@ -187,6 +169,12 @@ void rst_mem_free_last(int type)
>  	t->last = 0; /* next free_last would be no-op */
>  }
>  
> +void rst_mem_align(int type)
> +{
> +	struct rst_mem_type_s *t = &rst_mems[type];
> +	t->free_mem = (void *) round_up((unsigned long)t->free_mem, sizeof(void *));
> +}
> +
>  unsigned long rst_mem_lock(void)
>  {
>  	/*
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 8e17024..4d4cf83 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -678,6 +678,7 @@ int rst_tcp_socks_prep(void)
>  {
>  	struct inet_sk_info *ii;
>  
> +	rst_mem_align(RM_PRIVATE);
>  	rst_tcp_socks_cpos = rst_mem_cpos(RM_PRIVATE);
>  	list_for_each_entry(ii, &rst_tcp_repair_sockets, rlist) {
>  		struct rst_tcp_sock *rs;
> @@ -689,7 +690,7 @@ int rst_tcp_socks_prep(void)
>  		if (ii->sk_fd == -1)
>  			continue;
>  
> -		rs = rst_mem_alloc_cont(sizeof(*rs), RM_PRIVATE);
> +		rs = rst_mem_alloc(sizeof(*rs), RM_PRIVATE);
>  		if (!rs)
>  			return -1;
>  
> diff --git a/timerfd.c b/timerfd.c
> index b91e047..6ca1229 100644
> --- a/timerfd.c
> +++ b/timerfd.c
> @@ -115,11 +115,12 @@ int rst_timerfd_prep(void)
>  	struct timerfd_info *ti;
>  	struct restore_timerfd *t;
>  
> +	rst_mem_align(RM_PRIVATE);
>  	rst_timerfd_cpos = rst_mem_cpos(RM_PRIVATE);
>  	list_for_each_entry(ti, &rst_timerfds, rlist) {
>  		TimerfdEntry *tfe = ti->tfe;
>  
> -		t = rst_mem_alloc_cont(sizeof(*t), RM_PRIVATE);
> +		t = rst_mem_alloc(sizeof(*t), RM_PRIVATE);
>  		if (!t)
>  			return -1;
>  
> -- 
> 2.5.0
> 


More information about the CRIU mailing list