[CRIU] [PATCH 07/11] lazy-pages: make handle_requests also poll for listening socket
Pavel Emelyanov
xemul at virtuozzo.com
Tue Apr 12 05:08:14 PDT 2016
On 04/12/2016 02:27 PM, Mike Rapoport wrote:
> On Tue, Apr 12, 2016 at 02:06:22PM +0300, Pavel Emelyanov wrote:
>> On 04/11/2016 09:19 AM, Mike Rapoport wrote:
>>> Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
>>> ---
>>> criu/uffd.c | 119 +++++++++++++++++++++++++++++++++++++++---------------------
>>> 1 file changed, 77 insertions(+), 42 deletions(-)
>>>
>>> diff --git a/criu/uffd.c b/criu/uffd.c
>>> index 87c2602..fece56e 100644
>>> --- a/criu/uffd.c
>>> +++ b/criu/uffd.c
>>> @@ -545,32 +545,56 @@ static int handle_user_fault(int fd, struct list_head *uffd_list, void *dest)
>>> return 0;
>>> }
>>>
>>> +static int handle_socket_conn(int listen, struct sockaddr_un *saddr,
>>> + struct list_head *uffd_list)
>>> +{
>>> + int uffd;
>>> + int uffd_flags;
>>> +
>>> + uffd = ud_open(listen, saddr);
>>> + if (uffd < 0) {
>>> + pr_perror("uffd open error");
>>> + return -1;
>>> + }
>>> +
>>> + pr_debug("uffd is 0x%d\n", uffd);
>>> + uffd_flags = fcntl(uffd, F_GETFD, NULL);
>>> + pr_debug("uffd_flags are 0x%x\n", uffd_flags);
>>> + if (fcntl(uffd, F_SETFD, uffd_flags | O_NONBLOCK))
>>> + return -1;
>>> +
>>> + /*
>>> + * Find the memory pages belonging to the restored process so
>>> + * that it is trackable when all pages have been transferred.
>>> + */
>>> + if ((total_pages = find_vmas(uffd_list)) == -1)
>>> + return -1;
>>> +
>>> + return uffd;
>>> +}
>>> +
>>> #define POLL_TIMEOUT 5000
>>>
>>> -static int handle_requests(int fd)
>>> +static int handle_requests(int listen, struct sockaddr_un *saddr)
>>> {
>>> struct pollfd *pollfd;
>>> - int nr_pollfd;
>>> + int nr_pollfd = 1;
>>> + int max_pollfd;
>>> + int timeout = -1;
>>> int ret = -1;
>>> unsigned long ps;
>>> void *dest;
>>> + int i;
>>>
>>> LIST_HEAD(uffd_list);
>>>
>>> - /*
>>> - * Find the memory pages belonging to the restored process
>>> - * so that it is trackable when all pages have been transferred.
>>> - */
>>> - if ((total_pages = find_vmas(&uffd_list)) == -1)
>>> - return -1;
>>> -
>>> /* All operations will be done on page size */
>>> ps = page_size();
>>> dest = malloc(ps);
>>>
>>> /* allocate pollfd per task + pollfd for listen */
>>> - nr_pollfd = task_entries->nr_tasks + 1;
>>> - pollfd = malloc(sizeof(*pollfd) * nr_pollfd);
>>> + max_pollfd = task_entries->nr_tasks + 1;
>>> + pollfd = malloc(sizeof(*pollfd) * max_pollfd);
>>> if (!pollfd || !dest) {
>>> pr_err("memory allocation failed\n");
>>> ret = -1;
>>> @@ -578,12 +602,10 @@ static int handle_requests(int fd)
>>> }
>>>
>>> /*
>>> - * use nr_pollfd to set number of pollfd's being polled
>>> - * FIXME: eventually nr_pollfd should be nr_tasks + 1, and the
>>> - * first fd to poll on should that of listen
>>> + * use nr_pollfd to set number of pollfd's being polled, the
>>> + * first pollfd is always for the listening socket
>>> */
>>> - nr_pollfd = 1;
>>> - pollfd[0].fd = fd;
>>> + pollfd[0].fd = listen;
>>> pollfd[0].events = POLLIN;
>>>
>>> while (1) {
>>> @@ -592,7 +614,7 @@ static int handle_requests(int fd)
>>> * no uffd pages are requested the code switches to
>>> * copying the remaining pages.
>>> */
>>> - ret = poll(pollfd, nr_pollfd, POLL_TIMEOUT);
>>> + ret = poll(pollfd, nr_pollfd, timeout);
>>
>> But there should be strict split between the time we expect for new connections
>> on this socket versus when pagefaults start arriving. Because first we restore
>> the tree and send uffd sockets to this daemon and _then_ we resume the tasks and
>> from now on no new connections may appear, but PF-s start popping up from uffds.
>>
>> Since we know this, why do we need to complicate this and poll for all the
>> sockets together?
>
> I had an impression that the uffd is sent to the daemon at the stage when
> tasks are already running concurrently. If I was wrong here and the tasks
> are resumed after all the uffd's are sent to the daemon, then we indeed
> don't need to poll for uffd's and sockets together.
Yes, we first fork() all the stuff and send the uffds out, then tasks are resumed
and start generating PF-s. The problem here is that this "resume" point is not
reported to uffd :) so he can only guess this moment.
-- Pavel
>>> pr_debug("poll() rc: 0x%x\n", ret);
>>> if (ret < 0) {
>>> pr_perror("polling failed");
>>> @@ -603,16 +625,42 @@ static int handle_requests(int fd)
>>> break;
>>> }
>>>
>>> - ret = handle_user_fault(fd, &uffd_list, dest);
>>> - if (ret < 0)
>>> - goto out;
>>> + if (pollfd[0].revents & POLLIN) {
>>> + int fd = handle_socket_conn(listen, saddr, &uffd_list);
>>> + if (fd < 0)
>>> + goto out;
>>> + pollfd[nr_pollfd].fd = fd;
>>> + pollfd[nr_pollfd].events = POLLIN;
>>> + nr_pollfd++;
>>> + BUG_ON(nr_pollfd > max_pollfd);
>>> +
>>> + /*
>>> + * once the first uffd is register wait POLL_TIMEOUT
>>> + * before starting bulk page trasfer
>>> + */
>>> + timeout = POLL_TIMEOUT;
>>> +
>>> + continue;
>>> + }
>>> +
>>> + for (i = 1; i < nr_pollfd; i++) {
>>> + if (pollfd[i].revents & POLLIN) {
>>> + ret = handle_user_fault(pollfd[i].fd,
>>> + &uffd_list, dest);
>>> + if (ret < 0)
>>> + goto out;
>>> + }
>>> + }
>>> }
>>> - pr_debug("Handle remaining pages\n");
>>> - ret = handle_remaining_pages(fd, &uffd_list, dest);
>>> - if (ret < 0) {
>>> - pr_err("Error during remaining page copy\n");
>>> - ret = 1;
>>> - goto out;
>>> +
>>> + for (i = 1; i < nr_pollfd; i++) {
>>> + pr_debug("Handle remaining pages\n");
>>> + ret = handle_remaining_pages(pollfd[i].fd, &uffd_list, dest);
>>> + if (ret < 0) {
>>> + pr_err("Error during remaining page copy\n");
>>> + ret = 1;
>>> + goto out;
>>> + }
>>> }
>>>
>>> pr_debug("With UFFD transferred pages: (%ld/%ld)\n", uffd_copied_pages, total_pages);
>>> @@ -626,9 +674,10 @@ static int handle_requests(int fd)
>>> ret = 0;
>>>
>>> out:
>>> + for (i = nr_pollfd - 1; i > 0; i--)
>>> + close(pollfd[i].fd);
>>> free(pollfd);
>>> free(dest);
>>> - close(fd);
>>> return ret;
>>>
>>> }
>>> @@ -657,8 +706,6 @@ static int lazy_pages_prepare_pstree(void)
>>> int cr_lazy_pages()
>>> {
>>> int listen;
>>> - int uffd;
>>> - int uffd_flags;
>>> int ret;
>>> struct sockaddr_un saddr;
>>>
>>> @@ -679,19 +726,7 @@ int cr_lazy_pages()
>>> return -1;
>>> }
>>>
>>> - uffd = ud_open(listen, &saddr);
>>> - if (uffd < 0) {
>>> - pr_perror("uffd open error");
>>> - return -1;
>>> - }
>>> -
>>> - pr_debug("uffd is 0x%d\n", uffd);
>>> - uffd_flags = fcntl(uffd, F_GETFD, NULL);
>>> - pr_debug("uffd_flags are 0x%x\n", uffd_flags);
>>> - if (fcntl(uffd, F_SETFD, uffd_flags | O_NONBLOCK))
>>> - return -1;
>>> -
>>> - ret = handle_requests(uffd);
>>> + ret = handle_requests(listen, &saddr);
>>> close(listen);
>>>
>>> return ret;
>>>
>>
>
> .
>
More information about the CRIU
mailing list