[CRIU] [PATCH 11/11] lazy-pages: kill POLL_TIMEOUT

Mike Rapoport rppt at linux.vnet.ibm.com
Sun Mar 25 16:27:31 MSK 2018


In the current model we haven't started the background page transfer until
POLL_TIMEOUT time has elapsed since the last uffd or socket event. If the
restored process will do memory access one in (POLL_TIMEOUT - eplsilon) the
filling of its memory can take ages.

This patch changes them model in the following way:
* poll for the events indefinitely until the restore is complete
* the restore completion event causes reset of the poll timeout to zero and
* starts the background transfers
* after each transfer we return to check if there are any uffd events to
handle

Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
 criu/uffd.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/criu/uffd.c b/criu/uffd.c
index 85c8a5c..7a9e97e 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -1124,13 +1124,10 @@ static void lazy_pages_summary(struct lazy_pages_info *lpi)
 #endif
 }
 
-#define POLL_TIMEOUT 1000
-
 static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
 {
 	struct lazy_pages_info *lpi, *n;
-	/* FIXME -- timeout should decrease over time...  */
-	int poll_timeout = POLL_TIMEOUT;
+	int poll_timeout = -1;
 	int ret;
 
 	for (;;) {
@@ -1140,19 +1137,13 @@ static int handle_requests(int epollfd, struct epoll_event *events, int nr_fds)
 		if (ret > 0) {
 			if (complete_forks(epollfd, &events, &nr_fds))
 				return -1;
-			continue;
+			if (!restore_finished)
+				continue;
 		}
 
-		/* don't start backround fetch before restore is finished */
-		if (!restore_finished)
-			continue;
-
-		if (poll_timeout)
-			pr_debug("Start handling remaining pages\n");
-
 		poll_timeout = 0;
 		list_for_each_entry_safe(lpi, n, &lpis, l) {
-			if (!list_empty(&lpi->iovs)) {
+			if (!list_empty(&lpi->iovs) && list_empty(&lpi->reqs)) {
 				ret = xfer_pages(lpi);
 				if (ret < 0)
 					goto out;
@@ -1240,7 +1231,7 @@ static int lazy_sk_read_event(struct epoll_rfd *rfd)
 
 	restore_finished = true;
 
-	return 0;
+	return 1;
 }
 
 static int lazy_sk_hangup_event(struct epoll_rfd *rfd)
-- 
2.7.4



More information about the CRIU mailing list