[CRIU] [RFC PATCH 11/20] criu/plugin: Support larger memory footprints
Felix Kuehling
Felix.Kuehling at amd.com
Sat May 1 04:58:36 MSK 2021
From: David Yat Sin <david.yatsin at amd.com>
Use fopen/fwrite/fread to be able to handle bigger files.
Try to open /dev/kfd earlier during restore, so we can fail earlier if
amdgpu driver is not loaded.
Memcopy data directly into bo_info_test structure during dump to reduce
number of memcpy.
Moved some variable declarations so that the variables are only valid
during the scope where they are used.
Signed-off-by: David Yat Sin <david.yatsin at amd.com>
---
test/others/ext-kfd/kfd_plugin.c | 168 ++++++++++++++++---------------
1 file changed, 89 insertions(+), 79 deletions(-)
diff --git a/test/others/ext-kfd/kfd_plugin.c b/test/others/ext-kfd/kfd_plugin.c
index 1059daac7..5c4809649 100644
--- a/test/others/ext-kfd/kfd_plugin.c
+++ b/test/others/ext-kfd/kfd_plugin.c
@@ -77,6 +77,69 @@ int open_drm_render_device(int minor)
return fd;
}
+int write_file(const char *file_path, const void *buf, const size_t buf_len)
+{
+ int fd;
+ FILE *fp;
+ size_t len_wrote;
+
+ fd = openat(criu_get_image_dir(), file_path, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0) {
+ pr_perror("Cannot open %s", file_path);
+ return -EPERM;
+ }
+
+ fp = fdopen(fd, "w");
+ if (!fp) {
+ pr_perror("Cannot fdopen %s", file_path);
+ return -EPERM;
+ }
+
+ len_wrote = fwrite(buf, 1, buf_len, fp);
+ if (len_wrote != buf_len) {
+ pr_perror("Unable to write %s (wrote:%ld buf_len:%ld)\n", file_path, len_wrote, buf_len);
+ fclose(fp);
+ return -EIO;
+ }
+
+ pr_info("Wrote file:%s (%ld bytes)\n", file_path, buf_len);
+ /* this will also close fd */
+ fclose(fp);
+ return 0;
+}
+
+int read_file(const char *file_path, void *buf, const size_t buf_len)
+{
+ int fd;
+ FILE *fp;
+ size_t len_read;
+
+ fd = openat(criu_get_image_dir(), file_path, O_RDONLY);
+ if (fd < 0) {
+ pr_perror("Cannot open %s", file_path);
+ return -ENOENT;
+ }
+
+ fp = fdopen(fd, "r");
+ if (!fp) {
+ pr_perror("Cannot fdopen %s", file_path);
+ return -EPERM;
+ }
+
+ len_read = fread(buf, 1, buf_len, fp);
+ if (len_read != buf_len) {
+ pr_perror("Unable to read %s\n", file_path);
+ fclose(fp);
+ return -EIO;
+ }
+
+ pr_info("Read file:%s (%ld bytes)\n", file_path, buf_len);
+
+ /* this will also close fd */
+ fclose(fp);
+ return 0;
+}
+
/* Call ioctl, restarting if it is interrupted */
int kmtIoctl(int fd, unsigned long request, void *arg)
{
@@ -218,14 +281,13 @@ int kfd_plugin_dump_file(int fd, int id)
struct kfd_ioctl_criu_dumper_args args = {0};
struct kfd_criu_bo_buckets *bo_bucket_ptr;
struct kfd_criu_q_bucket *q_bucket_ptr;
- int img_fd, ret, len, mem_fd, drm_fd;
+ int ret, drm_fd;
char img_path[PATH_MAX];
struct stat st, st_kfd;
unsigned char *buf;
char fd_path[128];
- uint8_t *local_buf;
- char *fname;
void *addr;
+ size_t len;
printf("kfd_plugin: Enter cr_plugin_dump_file()- ID = 0x%x\n", id);
ret = 0;
@@ -256,26 +318,17 @@ int kfd_plugin_dump_file(int fd, int id)
rd.minor_number = minor(st.st_rdev);
snprintf(img_path, sizeof(img_path), "renderDXXX.%d.img", id);
- img_fd = openat(criu_get_image_dir(), img_path, O_WRONLY | O_CREAT, 0600);
- if (img_fd < 0) {
- pr_perror("Can't open %s", img_path);
- return -1;
- }
-
len = criu_render_node__get_packed_size(&rd);
buf = xmalloc(len);
if (!buf)
return -ENOMEM;
criu_render_node__pack(&rd, buf);
- ret = write(img_fd, buf, len);
-
- if (ret != len) {
- pr_perror("Unable to write in %s", img_path);
+ ret = write_file(img_path, buf, len);
+ if (ret)
ret = -1;
- }
+
xfree(buf);
- close(img_fd);
/* Need to return success here so that criu can call plugins for
* renderD nodes */
@@ -399,13 +452,6 @@ int kfd_plugin_dump_file(int fd, int id)
(e->bo_info_test[i])->idr_handle = (bo_bucket_ptr)[i].idr_handle;
(e->bo_info_test[i])->user_addr = (bo_bucket_ptr)[i].user_addr;
- local_buf = xmalloc((bo_bucket_ptr)[i].bo_size);
- if (!local_buf) {
- pr_err("failed to allocate memory for BO rawdata\n");
- ret = -1;
- goto failed;
- }
-
if ((bo_bucket_ptr)[i].bo_alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
pr_info("VRAM BO Found\n");
}
@@ -418,6 +464,9 @@ int kfd_plugin_dump_file(int fd, int id)
KFD_IOC_ALLOC_MEM_FLAGS_VRAM ||
(bo_bucket_ptr)[i].bo_alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ char *fname;
+ int mem_fd;
+
if ((e->bo_info_test[i])->bo_alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) {
pr_info("kfd_plugin: large bar read possible\n");
@@ -464,7 +513,7 @@ int kfd_plugin_dump_file(int fd, int id)
}
pr_info("Try to read file now\n");
- if (read(mem_fd, local_buf,
+ if (read(mem_fd, e->bo_info_test[i]->bo_rawdata.data,
(e->bo_info_test[i])->bo_size) !=
(e->bo_info_test[i])->bo_size) {
pr_perror("Can't read buffer\n");
@@ -472,17 +521,7 @@ int kfd_plugin_dump_file(int fd, int id)
goto failed;
}
- pr_info("log initial few bytes of the raw data for this BO\n");
- for (int i = 0; i < 10; i ++)
- {
- plugin_log_msg("0x%llx\n",((__u64*)local_buf)[i]);
- }
-
close(mem_fd);
- memcpy((e->bo_info_test[i])->bo_rawdata.data,
- (uint8_t*)local_buf,
- (e->bo_info_test[i])->bo_size);
- xfree(local_buf);
} /* PROCPIDMEM read done */
}
}
@@ -565,36 +604,25 @@ int kfd_plugin_dump_file(int fd, int id)
snprintf(img_path, sizeof(img_path), "kfd.%d.img", id);
pr_info("kfd_plugin: img_path = %s", img_path);
- img_fd = openat(criu_get_image_dir(), img_path, O_WRONLY | O_CREAT, 0600);
- if (img_fd < 0) {
- pr_perror("Can't open %s", img_path);
- ret = -1;
- goto failed;
- }
len = criu_kfd__get_packed_size(e);
- pr_info("kfd_plugin: Len = %d\n", len);
+ pr_info("kfd_plugin: Len = %ld\n", len);
buf = xmalloc(len);
if (!buf) {
pr_perror("failed to allocate memory\n");
- close(img_fd);
ret = -ENOMEM;
goto failed;
}
criu_kfd__pack(e, buf);
- ret = write(img_fd, buf, len);
- if (ret != len) {
- pr_perror("Unable to write in %s", img_path);
+ ret = write_file(img_path, buf, len);
+ if (ret != len)
ret = -1;
- goto exit;
- }
-exit:
+
xfree(buf);
- close(img_fd);
failed:
xfree(devinfo_bucket_ptr);
xfree(bo_bucket_ptr);
@@ -609,7 +637,7 @@ CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_EXT_FILE, kfd_plugin_dump_file)
int kfd_plugin_restore_file(int id)
{
struct kfd_criu_devinfo_bucket *devinfo_bucket_ptr = NULL;
- int img_fd, len, fd, mem_fd;
+ int fd;
struct kfd_ioctl_criu_restorer_args args = {0};
struct kfd_criu_bo_buckets *bo_bucket_ptr;
struct kfd_criu_q_bucket *q_bucket_ptr;
@@ -625,8 +653,8 @@ int kfd_plugin_restore_file(int id)
pr_info("kfd_plugin: Initialized kfd plugin restorer with ID = %d\n", id);
snprintf(img_path, sizeof(img_path), "kfd.%d.img", id);
- img_fd = openat(criu_get_image_dir(), img_path, O_RDONLY, 0600);
- if (img_fd < 0) {
+
+ if (stat(img_path, &filestat) == -1) {
pr_perror("open(%s)", img_path);
/* This is restorer plugin for renderD nodes. Since criu doesn't
@@ -637,11 +665,6 @@ int kfd_plugin_restore_file(int id)
* restore both, the kfd plugin gets called first.
*/
snprintf(img_path, sizeof(img_path), "renderDXXX.%d.img", id);
- img_fd = openat(criu_get_image_dir(), img_path, O_RDONLY, 0600);
- if (img_fd < 0) {
- pr_perror("open(%s)", img_path);
- return -ENOTSUP;
- }
if (stat(img_path, &filestat) == -1)
{
@@ -656,16 +679,13 @@ int kfd_plugin_restore_file(int id)
return -ENOMEM;
}
- len = read(img_fd, buf, filestat.st_size);
- if (len <= 0) {
+ if (read_file(img_path, buf, filestat.st_size)) {
pr_perror("Unable to read from %s", img_path);
xfree(buf);
- close(img_fd);
return -1;
}
- close(img_fd);
- rd = criu_render_node__unpack(NULL, len, buf);
+ rd = criu_render_node__unpack(NULL, filestat.st_size, buf);
if (rd == NULL) {
pr_perror("Unable to parse the KFD message %d", id);
xfree(buf);
@@ -684,32 +704,21 @@ int kfd_plugin_restore_file(int id)
pr_perror("failed to open kfd in plugin");
return -1;
}
-
pr_info("kfd_plugin: Opened kfd, fd = %d\n", fd);
-
-
- if (stat(img_path, &filestat) == -1)
- {
- pr_perror("Failed to read file stats\n");
- return -1;
- }
pr_info("kfd img file size on disk = %ld\n", filestat.st_size);
buf = xmalloc(filestat.st_size);
if (!buf) {
pr_perror("Failed to allocate memory\n");
- close(img_fd);
return -ENOMEM;
}
- len = read(img_fd, buf, filestat.st_size);
- if (len <= 0) {
- pr_perror("Unable to read from %s", img_path);
+
+ if (read_file(img_path, buf, filestat.st_size)) {
xfree(buf);
- close(img_fd);
return -1;
}
- close(img_fd);
- e = criu_kfd__unpack(NULL, len, buf);
+
+ e = criu_kfd__unpack(NULL, filestat.st_size, buf);
if (e == NULL) {
pr_err("Unable to parse the KFD message %#x", id);
xfree(buf);
@@ -922,11 +931,12 @@ int kfd_plugin_restore_file(int id)
(e->bo_info_test[i])->bo_size);
munmap(addr, e->bo_info_test[i]->bo_size);
} else {
+ int mem_fd;
/* Use indirect host data path via /proc/pid/mem
* on small pci bar GPUs or for Buffer Objects
* that don't have HostAccess permissions.
*/
- pr_info("kfd_plugin: using PROCPIDMEM to restore BO contents\n");
+ plugin_log_msg("kfd_plugin: using PROCPIDMEM to restore BO contents\n");
addr = mmap(NULL,
(e->bo_info_test[i])->bo_size,
PROT_NONE,
@@ -955,7 +965,7 @@ int kfd_plugin_restore_file(int id)
goto clean;
}
- pr_perror("Opened %s file for pid = %d\n", fname, e->pid);
+ plugin_log_msg("Opened %s file for pid = %d\n", fname, e->pid);
free (fname);
if (lseek (mem_fd, (off_t) addr, SEEK_SET) == -1) {
@@ -965,7 +975,7 @@ int kfd_plugin_restore_file(int id)
goto clean;
}
- pr_perror("Attempt writting now\n");
+ plugin_log_msg("Attempt writting now\n");
if (write(mem_fd, e->bo_info_test[i]->bo_rawdata.data,
(e->bo_info_test[i])->bo_size) !=
(e->bo_info_test[i])->bo_size) {
--
2.17.1
More information about the CRIU
mailing list