[CRIU] [PATCH 1/2] IPC: message queue copy feature introduced
Pavel Emelyanov
xemul at parallels.com
Thu Aug 9 05:53:55 EDT 2012
On 08/09/2012 01:25 PM, Stanislav Kinsbursky wrote:
> 09.08.2012 12:47, Pavel Emelyanov пишет:
>> On 08/09/2012 12:40 PM, Stanislav Kinsbursky wrote:
>>> This patch is required for checkpoint/restore in userspace.
>>> IOW, c/r requires some way to get all pending IPC messages without deleting
>>> them from the queue (checkpoint can fail and in this case tasks will be resumed,
>>> so queue have to be valid).
>>> To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was
>>> introduced. Also, copy counter was added to msg_queue structure. It's set to
>>> zero by default and increases by one on each copy operation and decreased by
>>> one on each receive operation until reaches zero.
>>
>> I think we should specify the copy_cnt semantics more strictly. How does
>> it correlate with message types?
>>
>
> What do you mean?
> It doesn't correlate at all - syscall logic remains the same.
> I.e. if you specify message type, then you'll copy n-th message on desired type.
That's exactly what I asked -- this number is the n-th message of desired type,
thanks.
Next issue: I'm bothered by the amount of #ifdef-s over the code.
>>> If MSG_COPY is set, then kernel will allocate dummy message with passed size,
>>> and then use new copy_msg() helper function to copy desired message (instead of
>>> unlinking it from the queue).
>>>
>>> Signed-off-by: Stanislav Kinsbursky <skinsbursky at parallels.com>
>>> ---
>>> include/linux/msg.h | 7 +++++++
>>> ipc/msg.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
>>> ipc/msgutil.c | 38 ++++++++++++++++++++++++++++++++++++++
>>> ipc/util.h | 1 +
>>> 4 files changed, 94 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/include/linux/msg.h b/include/linux/msg.h
>>> index 9411b76..de739d1 100644
>>> --- a/include/linux/msg.h
>>> +++ b/include/linux/msg.h
>>> @@ -12,6 +12,10 @@
>>> #define MSG_NOERROR 010000 /* no error if message is too big */
>>> #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/
>>>
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> +#define MSG_COPY 040000 /* copy (not remove) all queue messages */
>>> +#endif
>>> +
>>> /* Obsolete, used only for backwards compatibility and libc5 compiles */
>>> struct msqid_ds {
>>> struct ipc_perm msg_perm;
>>> @@ -96,6 +100,9 @@ struct msg_queue {
>>> unsigned long q_qbytes; /* max number of bytes on queue */
>>> pid_t q_lspid; /* pid of last msgsnd */
>>> pid_t q_lrpid; /* last receive pid */
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + unsigned int q_copy_cnt; /* message number for copy operations */
>>> +#endif
>>>
>>> struct list_head q_messages;
>>> struct list_head q_receivers;
>>> diff --git a/ipc/msg.c b/ipc/msg.c
>>> index 08009f5..d4ca06f 100644
>>> --- a/ipc/msg.c
>>> +++ b/ipc/msg.c
>>> @@ -214,6 +214,9 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
>>> msq->q_cbytes = msq->q_qnum = 0;
>>> msq->q_qbytes = ns->msg_ctlmnb;
>>> msq->q_lspid = msq->q_lrpid = 0;
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + msq->q_copy_cnt = 0;
>>> +#endif
>>> INIT_LIST_HEAD(&msq->q_messages);
>>> INIT_LIST_HEAD(&msq->q_receivers);
>>> INIT_LIST_HEAD(&msq->q_senders);
>>> @@ -784,19 +787,38 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
>>> struct msg_msg *msg;
>>> int mode;
>>> struct ipc_namespace *ns;
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + struct msg_msg *copy = NULL;
>>> +#endif
>>>
>>> if (msqid < 0 || (long) bufsz < 0)
>>> return -EINVAL;
>>> mode = convert_mode(&msgtyp, msgflg);
>>> ns = current->nsproxy->ipc_ns;
>>> -
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + if (msgflg & MSG_COPY) {
>>> + /*
>>> + * Create dummy message to copy real message to.
>>> + */
>>> + copy = load_msg(buf, bufsz);
>>> + if (IS_ERR(copy))
>>> + return PTR_ERR(copy);
>>> + copy->m_ts = bufsz;
>>> + }
>>> +#endif
>>> msq = msg_lock_check(ns, msqid);
>>> - if (IS_ERR(msq))
>>> + if (IS_ERR(msq)) {
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + if (msgflg & MSG_COPY)
>>> + free_msg(copy);
>>> +#endif
>>> return PTR_ERR(msq);
>>> + }
>>>
>>> for (;;) {
>>> struct msg_receiver msr_d;
>>> struct list_head *tmp;
>>> + int msg_counter = 0;
>>>
>>> msg = ERR_PTR(-EACCES);
>>> if (ipcperms(ns, &msq->q_perm, S_IRUGO))
>>> @@ -816,12 +838,20 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
>>> walk_msg->m_type != 1) {
>>> msg = walk_msg;
>>> msgtyp = walk_msg->m_type - 1;
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + } else if (msgflg & MSG_COPY) {
>>> + if (msq->q_copy_cnt == msg_counter) {
>>> + msg = copy_msg(walk_msg, copy);
>>> + break;
>>> + }
>>> +#endif
>>> } else {
>>> msg = walk_msg;
>>> break;
>>> }
>>> }
>>> tmp = tmp->next;
>>> + msg_counter++;
>>> }
>>> if (!IS_ERR(msg)) {
>>> /*
>>> @@ -832,11 +862,21 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
>>> msg = ERR_PTR(-E2BIG);
>>> goto out_unlock;
>>> }
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + if (msgflg & MSG_COPY) {
>>> + msq->q_copy_cnt++;
>>> + goto out_unlock;
>>> + }
>>> +#endif
>>> list_del(&msg->m_list);
>>> msq->q_qnum--;
>>> msq->q_rtime = get_seconds();
>>> msq->q_lrpid = task_tgid_vnr(current);
>>> msq->q_cbytes -= msg->m_ts;
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + if (msq->q_copy_cnt)
>>> + msq->q_copy_cnt--;
>>> +#endif
>>> atomic_sub(msg->m_ts, &ns->msg_bytes);
>>> atomic_dec(&ns->msg_hdrs);
>>> ss_wakeup(&msq->q_senders, 0);
>>> @@ -915,8 +955,13 @@ out_unlock:
>>> break;
>>> }
>>> }
>>> - if (IS_ERR(msg))
>>> + if (IS_ERR(msg)) {
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> + if (msgflg & MSG_COPY)
>>> + free_msg(copy);
>>> +#endif
>>> return PTR_ERR(msg);
>>> + }
>>>
>>> bufsz = msg_handler(buf, msg, bufsz);
>>> free_msg(msg);
>>> diff --git a/ipc/msgutil.c b/ipc/msgutil.c
>>> index 26143d3..b281f5c 100644
>>> --- a/ipc/msgutil.c
>>> +++ b/ipc/msgutil.c
>>> @@ -100,7 +100,45 @@ out_err:
>>> free_msg(msg);
>>> return ERR_PTR(err);
>>> }
>>> +#ifdef CONFIG_CHECKPOINT_RESTORE
>>> +struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
>>> +{
>>> + struct msg_msgseg *dst_pseg, *src_pseg;
>>> + int len = src->m_ts;
>>> + int alen;
>>> +
>>> + BUG_ON(dst == NULL);
>>> + if (src->m_ts > dst->m_ts)
>>> + return ERR_PTR(-EINVAL);
>>> +
>>> + alen = len;
>>> + if (alen > DATALEN_MSG)
>>> + alen = DATALEN_MSG;
>>> +
>>> + dst->next = NULL;
>>> + dst->security = NULL;
>>>
>>> + memcpy(dst + 1, src + 1, alen);
>>> +
>>> + len -= alen;
>>> + dst_pseg = dst->next;
>>> + src_pseg = src->next;
>>> + while (len > 0) {
>>> + alen = len;
>>> + if (alen > DATALEN_SEG)
>>> + alen = DATALEN_SEG;
>>> + memcpy(dst_pseg + 1, src_pseg + 1, alen);
>>> + dst_pseg = dst_pseg->next;
>>> + len -= alen;
>>> + src_pseg = src_pseg->next;
>>> + }
>>> +
>>> + dst->m_type = src->m_type;
>>> + dst->m_ts = src->m_ts;
>>> +
>>> + return dst;
>>> +}
>>> +#endif
>>> int store_msg(void __user *dest, struct msg_msg *msg, int len)
>>> {
>>> int alen;
>>> diff --git a/ipc/util.h b/ipc/util.h
>>> index 2bc6a9a..c1e1d5c 100644
>>> --- a/ipc/util.h
>>> +++ b/ipc/util.h
>>> @@ -142,6 +142,7 @@ int ipc_parse_version (int *cmd);
>>>
>>> extern void free_msg(struct msg_msg *msg);
>>> extern struct msg_msg *load_msg(const void __user *src, int len);
>>> +extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
>>> extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
>>>
>>> extern void recompute_msgmni(struct ipc_namespace *);
>>>
>>> _______________________________________________
>>> CRIU mailing list
>>> CRIU at openvz.org
>>> https://openvz.org/mailman/listinfo/criu
>>
>
>
More information about the CRIU
mailing list