[CRIU] [PATCH 18/18] SCM: Dump and restore SCM_RIGHTs
Pavel Emelyanov
xemul at virtuozzo.com
Tue Jul 11 18:49:35 MSK 2017
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index e14fa06..b9ef49c 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -364,6 +364,14 @@ static int root_prepare_shared(void)
>> if (ret)
>> goto err;
>>
>> + /*
>> + * This should be called with all packets collected AND all
>> + * fdescs and fles prepared BUT post-prep-s not run.
>> + */
>> + ret = prepare_scms();
>> + if (ret)
>> + goto err;
>
> This should be called before add_fake_fds_masters():
>
> 1)It may add a master file there, and this master
> must be resolved in add_fake_fds_masters(). Otherwise
> the task, which is the owner of this just added master,
> may not have rights to create the master (imagine,
> scm file is a socket of a net_ns, which can't be
> assigned by the task);
>
> 2)Another case -- there was not a task, which has
> permittions to create a socket, and you added it
> in prepare_scms(). In this case, we mustn't add
> one more fle in add_fake_fds_masters() -- and
> if this function is called after prepare_scms(),
> it won't add anything. This will reduce number
> of fake files, we add.
OK, will move and add this text as code comment.
>> +
>> ret = run_post_prepare();
>> if (ret)
>> goto err;
>> diff --git a/criu/include/sockets.h b/criu/include/sockets.h
>> index 3fa8017..1bd5c67 100644
>> --- a/criu/include/sockets.h
>> +++ b/criu/include/sockets.h
>> @@ -38,6 +38,8 @@ extern int collect_sockets(struct ns_id *);
>> extern struct collect_image_info inet_sk_cinfo;
>> extern struct collect_image_info unix_sk_cinfo;
>> extern int fix_external_unix_sockets(void);
>> +extern int prepare_scms(void);
>> +extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
>>
>> extern struct collect_image_info netlink_sk_cinfo;
>>
>> diff --git a/criu/sk-queue.c b/criu/sk-queue.c
>> index 77e203e..953db66 100644
>> --- a/criu/sk-queue.c
>> +++ b/criu/sk-queue.c
>> @@ -18,9 +18,9 @@
>> #include "util.h"
>> #include "util-pie.h"
>> #include "sockets.h"
>> -
>> +#include "xmalloc.h"
>> #include "sk-queue.h"
>> -
>> +#include "files.h"
>> #include "protobuf.h"
>> #include "images/sk-packet.pb-c.h"
>>
>> @@ -28,6 +28,8 @@ struct sk_packet {
>> struct list_head list;
>> SkPacketEntry *entry;
>> char *data;
>> + unsigned scm_len;
>> + int *scm;
>> };
>>
>> static LIST_HEAD(packets_list);
>> @@ -37,12 +39,22 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
>> struct sk_packet *pkt = obj;
>>
>> pkt->entry = pb_msg(msg, SkPacketEntry);
>> -
>> + pkt->scm = NULL;
>> pkt->data = xmalloc(pkt->entry->length);
>> if (pkt->data ==NULL)
>> return -1;
>>
>> /*
>> + * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
>> + * only 1 of that kind is possible, thus not more than 1 SCMs
>> + * on a packet.
>> + */
>> + if (pkt->entry->n_scm > 1) {
>> + pr_err("More than 1 SCM is not possible\n");
>> + return -1;
>> + }
>> +
>> + /*
>> * NOTE: packet must be added to the tail. Otherwise sequence
>> * will be broken.
>> */
>> @@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
>> .collect = collect_one_packet,
>> };
>>
>> +static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
>> +{
>> + int nr_fds, *fds, i;
>> + void *buf;
>> + ScmEntry *scme;
>> +
>> + nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
>> + fds = (int *)CMSG_DATA(ch);
>> +
>> + buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
>> + if (!buf)
>> + return -1;
>> +
>> + scme = xptr_pull(&buf, ScmEntry);
>> + scm_entry__init(scme);
>> + scme->type = SCM_RIGHTS;
>> + scme->n_rights = nr_fds;
>> + scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
>> +
>> + for (i = 0; i < nr_fds; i++) {
>> + int ftyp;
>> +
>> + if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
>> + return -1;
>> +
>> + /*
>> + * Unix sent over Unix or Epoll with some other sh*t
>> + * sent over unix (maybe with this very unix polled)
>> + * are tricky and not supported for now. (XXX -- todo)
>> + */
>> + if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
>> + pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
>> + return -1;
>> + }
>> + }
>> +
>> + i = pe->n_scm++;
>> + if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
>> + return -1;
>> +
>> + pe->scm[i] = scme;
>> + return 0;
>> +}
>> +
>> /*
>> * Maximum size of the control messages. XXX -- is there any
>> * way to get this value out of the kernel?
>> @@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
>> static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
>> {
>> struct cmsghdr *ch;
>> + int n_rights = 0;
>>
>> for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
>> + if (ch->cmsg_type == SCM_RIGHTS) {
>> + if (n_rights) {
>> + /*
>> + * Even if user is sending more than one cmsg with
>> + * rights, kernel merges them alltogether on recv.
>> + */
>> + pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
>> + return -1;
>> + }
>> +
>> + if (dump_scm_rights(ch, pe))
>> + return -1;
>> +
>> + n_rights++;
>> + continue;
>> + }
>> +
>> pr_err("Control messages in queue, not supported\n");
>> return -1;
>> }
>> @@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
>> return 0;
>> }
>>
>> +static void release_cmsg(SkPacketEntry *pe)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < pe->n_scm; i++)
>> + xfree(pe->scm[i]);
>> + xfree(pe->scm);
>> +
>> + pe->n_scm = 0;
>> + pe->scm = NULL;
>> +}
>> +
>> int dump_sk_queue(int sock_fd, int sock_id)
>> {
>> SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
>> @@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
>> ret = -EIO;
>> goto err_set_sock;
>> }
>> +
>> + if (pe.scm)
>> + release_cmsg(&pe);
>> }
>> ret = 0;
>>
>> @@ -197,6 +286,20 @@ err_brk:
>> return ret;
>> }
>>
>> +static void close_scm_fds(struct sk_packet *pkt)
>> +{
>> + int i, *fds, n_fds;
>> + struct cmsghdr *ch = (struct cmsghdr *)pkt->scm;
>> +
>> + fds = (int *)CMSG_DATA(ch);
>> + n_fds = (ch->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
>> +
>> + for (i = 0; i < n_fds; i++) {
>> + if (close(fds[i]))
>> + pr_warn("scm: Error closing sent fd\n");
>> + }
>> +}
>> +
>> static int send_one_pkt(int fd, struct sk_packet *pkt)
>> {
>> int ret;
>> @@ -209,6 +312,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
>> iov.iov_base = pkt->data;
>> iov.iov_len = entry->length;
>>
>> + if (pkt->scm != NULL) {
>> + mh.msg_controllen = pkt->scm_len;
>> + mh.msg_control = pkt->scm;
>> + }
>> +
>> /*
>> * Don't try to use sendfile here, because it use sendpage() and
>> * all data are split on pages and a new skb is allocated for
>> @@ -229,6 +337,9 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
>> return -1;
>> }
>>
>> + if (pkt->scm != NULL)
>> + close_scm_fds(pkt);
>> +
>> return 0;
>> }
>>
>> @@ -264,3 +375,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
>> out:
>> return ret;
>> }
>
> The previous patches don't make restore_sk_queue() find a fle,
> which fd is used to set next packet in queue. This series does
> not support multi-queuer queues?
No. You're talking about "restore sender address" patches :) I remember that,
and that's a separate issue. Right now if we have several sockets sending
data to one, on restore we pick the first peer and make it restore the whole
queue.
> It seems, we don't need "unix: Split resolv and interconnect (v2)",
> it's just need to select a correct fd from task fles, and to use
> it in restore_sk_queue().
We do. Inside unix_note_scm_rights() I need to find a queuer for a queue
and this lookup is only possible after the resolve and interconnect split
I've made before.
>> +
>> +int prepare_scms(void)
>> +{
>> + struct sk_packet *pkt;
>> +
>> + pr_info("Preparing SCMs\n");
>> + list_for_each_entry(pkt, &packets_list, list) {
>> + SkPacketEntry *pe = pkt->entry;
>> + ScmEntry *se;
>> + struct cmsghdr *ch;
>> +
>> + if (!pe->n_scm)
>> + continue;
>> +
>> + se = pe->scm[0]; /* Only 1 SCM is possible */
>> +
>> + if (se->type == SCM_RIGHTS) {
>> + pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
>> + pkt->scm = xmalloc(pkt->scm_len);
>> + if (!pkt->scm)
>> + return -1;
>> +
>> + ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
>> + ch->cmsg_level = SOL_SOCKET;
>> + ch->cmsg_type = SCM_RIGHTS;
>> + ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
>> +
>> + if (unix_note_scm_rights(pe->id_for, se->rights,
>> + (int *)CMSG_DATA(ch), se->n_rights))
>> + return -1;
>> +
>> + continue;
>> + }
>> +
>> + pr_err("Unsupported scm %d in image\n", se->type);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
>> index 42ce1bb..165adc9 100644
>> --- a/criu/sk-unix.c
>> +++ b/criu/sk-unix.c
>> @@ -798,6 +798,7 @@ struct unix_sk_info {
>> struct file_desc d;
>> struct list_head connected; /* List of sockets, connected to me */
>> struct list_head node; /* To link in peer's connected list */
>> + struct list_head scm_fles;
>>
>> /*
>> * For DGRAM sockets with queues, we should only restore the queue
>> @@ -809,6 +810,11 @@ struct unix_sk_info {
>> u8 listen:1;
>> };
>>
>> +struct scm_fle {
>> + struct list_head l;
>> + struct fdinfo_list_entry *fle;
>> +};
>> +
>> #define USK_PAIR_MASTER 0x1
>> #define USK_PAIR_SLAVE 0x2
>>
>> @@ -824,6 +830,116 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
>> return NULL;
>> }
>>
>> +static struct unix_sk_info *find_queuer_for(int id)
>> +{
>> + struct unix_sk_info *ui;
>> +
>> + list_for_each_entry(ui, &unix_sockets, list) {
>> + if (ui->queuer == id)
>> + return ui;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
>> +{
>> + struct unix_sk_info *ui;
>> + struct pstree_item *owner;
>> + int i;
>> +
>> + ui = find_queuer_for(id_for);
>> + if (!ui) {
>> + pr_err("Can't find sender for %d\n", id_for);
>> + return -1;
>> + }
>> +
>> + pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
>> + /*
>> + * This is the task that will restore this socket
>> + */
>> + owner = file_master(&ui->d)->task;
>> +
>> + pr_info("-> will set up deps\n");
>> + /*
>> + * The ui will send data to the rights receiver. Add a fake fle
>> + * for the file and a dependency.
>> + */
>> + for (i = 0; i < n_ids; i++) {
>> + struct file_desc *tgt;
>> + struct fdinfo_list_entry *fle;
>> + struct scm_fle *sfle;
>> + FdinfoEntry *e;
>> + int fd;
>> +
>> + tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
>> + if (!tgt) {
>> + pr_err("Can't find fdesc to send\n");
>> + return -1;
>> + }
>> +
>> + pr_info("`- Found %d file\n", file_ids[i]);
>> + fd = find_unused_fd(owner, -1);
>
> If I haven't missed something, I don't see, where you try
> to found the fle in task's file descriptor to use it for
> queuing. It seems, we add a fake fle for every packet in
> a queue. If it's so, it's not performance-good, and firstly
> we should try to check, that task already owns the fle instead
> of that.
Yes, I made this deliberately. Adding one more descriptor for a file
will take us several syscalls more. Fortunately queues with scms will
not be a frequent case.
>> +
>> + fle = try_file_master(tgt);
>> + if (fle) {
>> + e = dup_fdinfo(fle->fe, fd, 0);
>> + if (!e) {
>> + pr_err("Can't duplicate fdinfo for scm\n");
>> + return -1;
>> + }
>> + } else {
>> + /*
>> + * This can happen if the file in question is
>> + * sent over the socket and closed. In this case
>> + * we need to ... invent a new one!
>> + */
>> +
>> + e = xmalloc(sizeof(*e));
>> + if (!e)
>> + return -1;
>> +
>> + fdinfo_entry__init(e);
>> + e->id = tgt->id;
>> + e->type = tgt->ops->type;
>> + e->fd = fd;
>> + e->flags = 0;
>
> I'd add the code around fdinfo_entry__init() in separate function
> as we already have 3 places like it.
OK, will do. What are the others?
>> + }
>> +
>> + pr_info("scm: add %d -> %d.fd[%d]\n", tgt->id, vpid(owner), fd);
>> + sfle = xmalloc(sizeof(*sfle));
>> + if (!sfle)
>> + return -1;
>> +
>> + sfle->fle = collect_fd_to(vpid(owner), e, rsti(owner), tgt, false);
>> + if (!sfle->fle) {
>> + pr_err("Can't request new fle for scm\n");
>> + return -1;
>> + }
>> +
>> + list_add_tail(&sfle->l, &ui->scm_fles);
>> + fds[i] = fd;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int chk_restored_scms(struct unix_sk_info *ui)
>> +{
>> + struct scm_fle *sf, *n;
>> +
>> + list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
>> + if (sf->fle->stage != FLE_RESTORED)
>> + return 1;
>
> It's need to wait for sf->fle->bound here instead.
Why?
> I suppose, the above introduces some circular dependencies for cases
> with several sockets having epoll, which is listening them.
Nope :) On dump I explicitly ban unix sockets and epolls sent via unix
sockets. So we can only wait here for some other files, that cannot wait us.
>> +
>> + /* Optimization for the next pass */
>> + list_del(&sf->l);
>> + xfree(sf);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int wake_connected_sockets(struct unix_sk_info *ui)
>> {
>> struct fdinfo_list_entry *fle;
>> @@ -1322,12 +1438,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
>> struct unix_sk_info *ui;
>> int ret;
>>
>> + ui = container_of(d, struct unix_sk_info, d);
>> +
>> + /* FIXME -- only queue restore may be postponed */
>> + if (chk_restored_scms(ui)) {
>> + pr_info("scm: Wait for tgt to restore\n");
>> + return 1;
>> + }
>
> It's too strong limitation, we delay another tasks here.
Another tasks? No, we just ask files restore loop to go and try to open
some other files.
> We may open the socket and to serve it out, and we do
> not need wait queuers for these actions.
Not always. There are several cases when we do socketpair() then restore
a queue then close one of the ends immediately.
> It's better to wait for the master fle of packet right
> before we restore the queue, and to use it from our
> task fle list.
Master may sit in some other's list.
>> +
>> fle = file_master(d);
>> if (fle->stage >= FLE_OPEN)
>> return post_open_unix_sk(d, fle->fe->fd);
>>
>> - ui = container_of(d, struct unix_sk_info, d);
>> -
>> if (inherited_fd(d, new_fd)) {
>> ui->ue->uflags |= USK_INHERIT;
>> ret = *new_fd >= 0 ? 0 : -1;
>> @@ -1440,6 +1562,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>> ui->listen = 0;
>> INIT_LIST_HEAD(&ui->connected);
>> INIT_LIST_HEAD(&ui->node);
>> + INIT_LIST_HEAD(&ui->scm_fles);
>> ui->flags = 0;
>> fixup_sock_net_ns_id(&ui->ue->ns_id, &ui->ue->has_ns_id);
>>
>> diff --git a/images/sk-packet.proto b/images/sk-packet.proto
>> index 27b48e4..009b461 100644
>> --- a/images/sk-packet.proto
>> +++ b/images/sk-packet.proto
>> @@ -1,8 +1,14 @@
>> syntax = "proto2";
>>
>> +message scm_entry {
>> + required uint32 type = 1;
>> + repeated uint32 rights = 2;
>> +}
>> +
>> message sk_packet_entry {
>> required uint32 id_for = 1;
>> required uint32 length = 2;
>> // optional bytes addr = 3;
>> // optional sk_ucred_entry ucred = 128;
>> + repeated scm_entry scm = 4;
>> }
>> --
>> 2.1.4
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
> .
>
More information about the CRIU
mailing list