[CRIU] Re: [RFC PATCH] ipc: use kernel buffer for peeking messages

Pavel Emelyanov xemul at parallels.com
Tue Apr 17 10:06:48 EDT 2012


On 04/17/2012 05:51 PM, Stanislav Kinsbursky wrote:
> Copying data to user-space (which may sleep) is not allowed since queue is
> traversed under spin_lock(). This patch introduces temporary kernel buffer for
> collecting the whole queue, which then then copied to user space. Also,
> introduced peek_msg() helper, which is almost the same as store_msg() except it
> works with kernel space addresses.

No, I think that duplicating the memory used for storing a messages is a bad idea :(
Three ways this can be solved (don't know which one is better, please, read the code)

1. change the locking from spinlocks into mutexes
2. find some way for pinning the messages data into memory, then
   drop a lock, then copy_to_user, then unpin
3. rework the API completely to work more like MSG_PEEK + peek_off for sockets

> ---
>  ipc/compat.c  |    9 +++------
>  ipc/msg.c     |   29 ++++++++++++++++++-----------
>  ipc/msgutil.c |   24 ++++++++++++++++++++++++
>  ipc/util.h    |    1 +
>  4 files changed, 46 insertions(+), 17 deletions(-)
> 
> diff --git a/ipc/compat.c b/ipc/compat.c
> index bf31af7..0ae6ba7 100644
> --- a/ipc/compat.c
> +++ b/ipc/compat.c
> @@ -346,12 +346,9 @@ static long compat_do_msg_peek_all(void __user *dest, struct msg_msg *msg, size_
>  	if (bufsz < msgsz)
>  		return -E2BIG;
>  
> -	if (put_user(msg->m_type, &msgp->mtype))
> -		return -EFAULT;
> -	if (put_user(msg->m_ts, &msgp->msize))
> -		return -EFAULT;
> -	if (store_msg(msgp->mtext, msg, msg->m_ts))
> -		return -EFAULT;
> +	msgp->mtype = msg->m_type;
> +	msgp->msize = msg->m_ts;
> +	peek_msg(msgp->mtext, msg, msg->m_ts);
>  	return msgsz;
>  }
>  #else
> diff --git a/ipc/msg.c b/ipc/msg.c
> index e7d07c9..107e386 100644
> --- a/ipc/msg.c
> +++ b/ipc/msg.c
> @@ -763,9 +763,9 @@ static inline int convert_mode(long *msgtyp, int msgflg)
>  }
>  
>  #ifdef CONFIG_CHECKPOINT_RESTORE
> -static long do_msg_peek_all(void __user *dest, struct msg_msg *msg, size_t bufsz)
> +static long do_msg_peek_all(void *dest, struct msg_msg *msg, size_t bufsz)
>  {
> -	struct msgbuf_a __user *msgp = dest;
> +	struct msgbuf_a *msgp = dest;
>  	size_t msgsz;
>  
>  	/*
> @@ -781,12 +781,9 @@ static long do_msg_peek_all(void __user *dest, struct msg_msg *msg, size_t bufsz
>  	if (bufsz < msgsz)
>  		return -E2BIG;
>  
> -	if (put_user(msg->m_type, &msgp->mtype))
> -		return -EFAULT;
> -	if (put_user(msg->m_ts, &msgp->msize))
> -		return -EFAULT;
> -	if (store_msg(msgp->mtext, msg, msg->m_ts))
> -		return -EFAULT;
> +	msgp->mtype = msg->m_type;
> +	msgp->msize = msg->m_ts;
> +	peek_msg(msgp->mtext, msg, msg->m_ts);
>  	return msgsz;
>  }
>  #else
> @@ -820,6 +817,11 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
>  	struct ipc_namespace *ns;
>  #ifdef CONFIG_CHECKPOINT_RESTORE
>  	size_t arrsz = bufsz;
> +	void *kbuf, *kptr;
> +
> +	kbuf = kptr = kmalloc(bufsz, GFP_KERNEL);
> +	if (!kbuf)
> +		return -ENOMEM;
>  #endif
>  
>  	if (msqid < 0 || (long) bufsz < 0)
> @@ -862,12 +864,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
>  				} else if (msgflg & MSG_PEEK_ALL) {
>  					long ret;
>  
> -					ret = msg_fill(buf, msg, arrsz);
> +					ret = msg_fill(kptr, msg, arrsz);
>  					if (ret < 0) {
> +						kfree(kbuf);
>  						msg = ERR_PTR(ret);
>  						goto out_unlock;
>  					}
> -					buf += ret;
> +					kptr += ret;
>  					arrsz -= ret;
>  #endif
>  				} else {
> @@ -977,8 +980,12 @@ out_unlock:
>  		return PTR_ERR(msg);
>  
>  #ifdef CONFIG_CHECKPOINT_RESTORE
> -	if (msgflg & MSG_PEEK_ALL)
> +	if (msgflg & MSG_PEEK_ALL) {
> +		if (copy_to_user(buf, kbuf, bufsz - arrsz))
> +			return -EFAULT;
> +		kfree(kbuf);
>  		return bufsz - arrsz;
> +	}
>  #endif
>  
>  	bufsz = msg_fill(buf, msg, bufsz);
> diff --git a/ipc/msgutil.c b/ipc/msgutil.c
> index 5652101..0b57e75 100644
> --- a/ipc/msgutil.c
> +++ b/ipc/msgutil.c
> @@ -126,6 +126,30 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len)
>  	return 0;
>  }
>  
> +void peek_msg(void *dest, struct msg_msg *msg, int len)
> +{
> +	int alen;
> +	struct msg_msgseg *seg;
> +
> +	alen = len;
> +	if (alen > DATALEN_MSG)
> +		alen = DATALEN_MSG;
> +	memcpy(dest, msg + 1, alen);
> +
> +	len -= alen;
> +	dest = ((char __user *)dest) + alen;
> +	seg = msg->next;
> +	while (len > 0) {
> +		alen = len;
> +		if (alen > DATALEN_SEG)
> +			alen = DATALEN_SEG;
> +		memcpy(dest, seg + 1, alen);
> +		len -= alen;
> +		dest = ((char *)dest) + alen;
> +		seg = seg->next;
> +	}
> +}
> +
>  void free_msg(struct msg_msg *msg)
>  {
>  	struct msg_msgseg *seg;
> diff --git a/ipc/util.h b/ipc/util.h
> index 2bc6a9a..78c888c 100644
> --- a/ipc/util.h
> +++ b/ipc/util.h
> @@ -143,6 +143,7 @@ int ipc_parse_version (int *cmd);
>  extern void free_msg(struct msg_msg *msg);
>  extern struct msg_msg *load_msg(const void __user *src, int len);
>  extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
> +extern void peek_msg(void *dest, struct msg_msg *msg, int len);
>  
>  extern void recompute_msgmni(struct ipc_namespace *);
>  
> 



More information about the CRIU mailing list