[CRIU] [PATCH] Support dmabuf with amdgpu

Fri Jan 31 22:00:36 MSK 2025

This patch, in combination with an accompanying kernel patch,
adds support for amdgpu dmabuf IPC with CRIU.

It includes
- Updates to the amdgpu_drm.h header file to match the kernel.
- Inclusion of the kfd_ioctl. h header file to allow the plugin
	to call the new amdgpu CRIU ioctl.
- Plugin file restore can now retry; new files_ext.c code to
	support this case.
- amdgpu plugin now checks for shared bos during checkpoint
	by finding the gem handle for each bo; introduces
	dependency on 3-year-old libdrm change.
- Unpause step is now its own callback (DUMP_DEVICE_LATE) instead
	of triggered by counting drm files.
- amdgpu plugin restores bos by designating one process as the
	exporter; that process will acquire a dmabuf_fd for that
	bo and send it to the other processes to be imported.
- amdgpu plugin tracks which bos it has restored; it will signal
	a retry if it needs to restore an imported bo but
	the corresponding export bo has not been restored.
- New service_fd for transferring dmabuf fds. There is a function
	that plugins can call to send out a dmabuf fd, and a
	callback that will notify plugins that a dmabuf fd
	has been received over the socket.
- New mechanism for finding fds to dup the received dmabuf fds to
	that won't conflict with other restore fds. Would like
	to unify this with find_unused_fd_pid.

Signed-off-by: David Francis <David.Francis at amd.com>
---
 criu/cr-dump.c                      |    3 +
 criu/cr-restore.c                   |    1 +
 criu/files-ext.c                    |   10 +-
 criu/files.c                        |  113 ++
 criu/include/criu-plugin.h          |    8 +-
 criu/include/files.h                |    2 +
 criu/include/servicefd.h            |    2 +
 criu/pie/restorer.c                 |    4 +
 criu/plugin.c                       |    1 +
 criu/servicefd.c                    |   12 +
 plugins/amdgpu/amdgpu_drm.h         | 1638 +++++++++++++++++++++++++++
 plugins/amdgpu/amdgpu_plugin.c      |  238 ++--
 plugins/amdgpu/amdgpu_plugin_drm.c  |  368 +++++-
 plugins/amdgpu/amdgpu_plugin_drm.h  |    9 +
 plugins/amdgpu/amdgpu_plugin_util.c |  157 ++-
 plugins/amdgpu/amdgpu_plugin_util.h |   49 +-
 plugins/amdgpu/criu-amdgpu.proto    |   18 +
 plugins/amdgpu/kfd_ioctl.h          | 1455 +++++++++++++++++++-----
 18 files changed, 3700 insertions(+), 388 deletions(-)
 create mode 100644 plugins/amdgpu/amdgpu_drm.h

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 1bc5d934f..34e756c7f 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -2225,6 +2225,9 @@ int cr_dump_tasks(pid_t pid)
 			goto err;
 	}
 
+	if(run_plugins(DUMP_DEVICE_LATE, pid))
+		goto err;
+
 	if (parent_ie) {
 		inventory_entry__free_unpacked(parent_ie, NULL);
 		parent_ie = NULL;
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index ddca6b8ec..c7c74fc29 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3470,6 +3470,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	close_image_dir();
 	close_proc();
 	close_service_fd(TRANSPORT_FD_OFF);
+	close_service_fd(DMABUF_FD_OFF);
 	close_service_fd(CR_PROC_FD_OFF);
 	close_service_fd(ROOT_FD_OFF);
 	close_service_fd(USERNSD_SK);
diff --git a/criu/files-ext.c b/criu/files-ext.c
index 95ec8e37c..4cc99d921 100644
--- a/criu/files-ext.c
+++ b/criu/files-ext.c
@@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 {
 	struct ext_file_info *xfi;
 	int fd;
+	bool retry_needed;
 
 	xfi = container_of(d, struct ext_file_info, d);
 
-	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id);
+	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed);
 	if (fd < 0) {
 		pr_err("Unable to restore %#x\n", xfi->xfe->id);
 		return -1;
@@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 	if (restore_fown(fd, xfi->xfe->fown))
 		return -1;
 
-	*new_fd = fd;
-	return 0;
+	if (!retry_needed)
+		*new_fd = fd;
+	else
+		*new_fd = -1;
+	return retry_needed;
 }
 
 static struct file_desc_ops ext_desc_ops = {
diff --git a/criu/files.c b/criu/files.c
index 31e705bcc..5ca41e8c9 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -62,6 +62,8 @@
 static struct hlist_head file_desc_hash[FDESC_HASH_SIZE];
 /* file_desc's, which fle is not owned by a process, that is able to open them */
 static LIST_HEAD(fake_master_head);
+/* processes that have a file from a plugin that can use shared dmabuf_fds */
+static LIST_HEAD(dmabuf_processes);
 
 static u32 max_file_desc_id = 0;
 
@@ -831,10 +833,35 @@ static void collect_desc_fle(struct fdinfo_list_entry *new_le, struct file_desc
 	}
 }
 
+static int add_pid_to_dmabuf_list(int pid)
+{
+	struct fdinfo_list_entry *le;
+
+	list_for_each_entry(le, &dmabuf_processes, desc_list)
+		if (le->pid == pid)
+			return 0;
+
+	le = alloc_fle(pid, NULL);
+
+	if (!le)
+		return -ENOMEM;
+
+	list_add(&le->desc_list, &dmabuf_processes);
+
+	return 0;
+}
+
 struct fdinfo_list_entry *collect_fd_to(int pid, FdinfoEntry *e, struct rst_info *rst_info, struct file_desc *fdesc,
 					bool fake, bool force_master)
 {
 	struct fdinfo_list_entry *new_le;
+	int ret;
+
+	if (fdesc->ops->type == FD_TYPES__EXT) {
+		ret = add_pid_to_dmabuf_list(pid);
+		if (ret)
+			return NULL;
+	}
 
 	new_le = alloc_fle(pid, e);
 	if (new_le) {
@@ -983,6 +1010,14 @@ static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
 	*addr->sun_path = '\0';
 }
 
+static void dmabuf_socket_name_gen(struct sockaddr_un *addr, int *len, int pid)
+{
+	addr->sun_family = AF_UNIX;
+	snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-dmabuf-%d-%" PRIx64, pid, criu_run_id);
+	*len = SUN_LEN(addr);
+	*addr->sun_path = '\0';
+}
+
 static bool task_fle(struct pstree_item *task, struct fdinfo_list_entry *fle)
 {
 	struct fdinfo_list_entry *tmp;
@@ -1028,6 +1063,45 @@ static int recv_fd_from_peer(struct fdinfo_list_entry *fle)
 	return 0;
 }
 
+static int recv_dmabuf_fds(void)
+{
+	int fd, newfd, ret, tsock, handle;
+
+	tsock = get_service_fd(DMABUF_FD_OFF);
+
+	while (true) {
+		ret = __recv_fds(tsock, &fd, 1, (void *)&handle, sizeof(handle), MSG_DONTWAIT);
+
+		if (ret == -EAGAIN || ret == -EWOULDBLOCK)
+			return 1;
+		else if (ret)
+			return -1;
+
+		newfd = get_unused_high_fd();
+
+		reopen_fd_as(newfd, fd);
+
+		run_plugins(DMABUF_FD, handle, newfd);
+	}
+
+	return 0;
+}
+
+static int send_dmabuf_fd_to_peer(int handle, int fd, int pid)
+{
+	struct sockaddr_un saddr;
+	int len, sock, ret;
+
+	sock = get_service_fd(DMABUF_FD_OFF);
+
+	dmabuf_socket_name_gen(&saddr, &len, pid);
+	pr_info("\t\tSend dmabuf fd %d for handle %d to %s\n", fd, handle, saddr.sun_path + 1);
+	ret = send_fds(sock, &saddr, len, &fd, 1, (void *)&handle, sizeof(handle));
+	if (ret < 0)
+		return -1;
+	return set_fds_event(pid);
+}
+
 static int send_fd_to_peer(int fd, struct fdinfo_list_entry *fle)
 {
 	struct sockaddr_un saddr;
@@ -1132,6 +1206,25 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
 	return 0;
 }
 
+int serve_out_dmabuf_fd(int handle, int fd)
+{
+	int ret;
+	struct fdinfo_list_entry *fle;
+
+	list_for_each_entry(fle, &dmabuf_processes, desc_list) {
+		ret = send_dmabuf_fd_to_peer(handle, fd, fle->pid);
+
+		if (ret) {
+			pr_err("Can't sent fd %d to %d\n", fd, fle->pid);
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
 static int open_fd(struct fdinfo_list_entry *fle)
 {
 	struct file_desc *d = fle->desc;
@@ -1212,6 +1305,7 @@ static int open_fdinfos(struct pstree_item *me)
 	do {
 		progress = again = false;
 		clear_fds_event();
+		recv_dmabuf_fds();
 
 		list_for_each_entry_safe(fle, tmp, list, ps_list) {
 			st = fle->stage;
@@ -1707,6 +1801,25 @@ int open_transport_socket(void)
 
 	if (install_service_fd(TRANSPORT_FD_OFF, sock) < 0)
 		goto out;
+
+
+	sock = socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+	if (sock < 0) {
+		pr_perror("Can't create socket");
+		goto out;
+	}
+
+	dmabuf_socket_name_gen(&saddr, &slen, pid);
+	if (bind(sock, (struct sockaddr *)&saddr, slen) < 0) {
+		pr_perror("Can't bind dmabuf socket %s", saddr.sun_path + 1);
+		close(sock);
+		goto out;
+	}
+
+	if (install_service_fd(DMABUF_FD_OFF, sock) < 0)
+		goto out;
+
+
 	ret = 0;
 out:
 	return ret;
diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h
index 392ea9f53..7a6adb928 100644
--- a/criu/include/criu-plugin.h
+++ b/criu/include/criu-plugin.h
@@ -60,6 +60,10 @@ enum {
 
 	CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
 
+	CR_PLUGIN_HOOK__DUMP_DEVICE_LATE = 12,
+
+	CR_PLUGIN_HOOK__DMABUF_FD = 13,
+
 	CR_PLUGIN_HOOK__MAX
 };
 
@@ -68,7 +72,7 @@ enum {
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id);
-DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
@@ -78,6 +82,8 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, int id);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DMABUF_FD, int handle, int fd);
 
 enum {
 	CR_PLUGIN_STAGE__DUMP,
diff --git a/criu/include/files.h b/criu/include/files.h
index 31ebb0ca0..c3d8439df 100644
--- a/criu/include/files.h
+++ b/criu/include/files.h
@@ -195,5 +195,7 @@ extern int open_transport_socket(void);
 extern int set_fds_event(pid_t virt);
 extern void wait_fds_event(void);
 
+int serve_out_dmabuf_fd(int handle, int fd);
+
 int find_unused_fd_pid(pid_t pid);
 #endif /* __CR_FILES_H__ */
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index 4265d94ed..780767013 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -28,6 +28,7 @@ enum sfd_type {
 	USERNSD_SK,	  /* Socket for usernsd */
 	NS_FD_OFF,	  /* Node's net namespace fd */
 	TRANSPORT_FD_OFF, /* to transfer file descriptors */
+	DMABUF_FD_OFF,
 	RPC_SK_OFF,
 	FDSTORE_SK_OFF,
 
@@ -47,5 +48,6 @@ extern int install_service_fd(enum sfd_type type, int fd);
 extern int close_service_fd(enum sfd_type type);
 extern void __close_service_fd(enum sfd_type type);
 extern int clone_service_fd(struct pstree_item *me);
+extern int get_unused_high_fd(void);
 
 #endif /* __CR_SERVICE_FD_H__ */
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 6d048c3f1..3a56ed210 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1920,6 +1920,10 @@ __visible long __export_restore_task(struct task_restore_args *args)
 
 		for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
 			if (vma_entry->madv & (1ul << m)) {
+
+				if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
+					continue;
+
 				ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m);
 				if (ret) {
 					pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) "
diff --git a/criu/plugin.c b/criu/plugin.c
index 65e79a069..965be2c11 100644
--- a/criu/plugin.c
+++ b/criu/plugin.c
@@ -59,6 +59,7 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
 	__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
 	__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
 	__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
+	__assign_hook(DUMP_DEVICE_LATE, "cr_plugin_dump_device_late");
 
 #undef __assign_hook
 
diff --git a/criu/servicefd.c b/criu/servicefd.c
index 06a8d3eba..9bd7c76a1 100644
--- a/criu/servicefd.c
+++ b/criu/servicefd.c
@@ -25,6 +25,7 @@ int service_fd_rlim_cur;
 
 /* Base of current process service fds set */
 static int service_fd_base;
+static int next_high_fd;
 
 /* Id of current process in shared fdt */
 static int service_fd_id = 0;
@@ -53,6 +54,7 @@ const char *sfd_type_name(enum sfd_type type)
 		[USERNSD_SK] = __stringify_1(USERNSD_SK),
 		[NS_FD_OFF] = __stringify_1(NS_FD_OFF),
 		[TRANSPORT_FD_OFF] = __stringify_1(TRANSPORT_FD_OFF),
+		[DMABUF_FD_OFF] = __stringify_1(DMABUF_FD_OFF),
 		[RPC_SK_OFF] = __stringify_1(RPC_SK_OFF),
 		[FDSTORE_SK_OFF] = __stringify_1(FDSTORE_SK_OFF),
 		[SERVICE_FD_MAX] = __stringify_1(SERVICE_FD_MAX),
@@ -312,5 +314,15 @@ int clone_service_fd(struct pstree_item *me)
 	service_fd_id = id;
 	ret = 0;
 
+	next_high_fd = service_fd_base + 1024;
+
 	return ret;
 }
+
+int get_unused_high_fd(void)
+{
+	if (next_high_fd > service_fd_rlim_cur)
+		return -1;
+	next_high_fd += 1;
+	return next_high_fd - 1;
+}
diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h
new file mode 100644
index 000000000..c6766fe5c
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_drm.h
@@ -0,0 +1,1638 @@
+/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kevin E. Martin <martin at valinux.com>
+ *    Gareth Hughes <gareth at valinux.com>
+ *    Keith Whitwell <keith at tungstengraphics.com>
+ */
+
+#ifndef __AMDGPU_DRM_H__
+#define __AMDGPU_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_AMDGPU_GEM_CREATE		0x00
+#define DRM_AMDGPU_GEM_MMAP		0x01
+#define DRM_AMDGPU_CTX			0x02
+#define DRM_AMDGPU_BO_LIST		0x03
+#define DRM_AMDGPU_CS			0x04
+#define DRM_AMDGPU_INFO			0x05
+#define DRM_AMDGPU_GEM_METADATA		0x06
+#define DRM_AMDGPU_GEM_WAIT_IDLE	0x07
+#define DRM_AMDGPU_GEM_VA		0x08
+#define DRM_AMDGPU_WAIT_CS		0x09
+#define DRM_AMDGPU_GEM_OP		0x10
+#define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_VM			0x13
+#define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
+#define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
+#define DRM_AMDGPU_CRIU_OP		0x19
+
+#define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
+#define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
+#define DRM_IOCTL_AMDGPU_CTX		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx)
+#define DRM_IOCTL_AMDGPU_BO_LIST	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list)
+#define DRM_IOCTL_AMDGPU_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs)
+#define DRM_IOCTL_AMDGPU_INFO		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info)
+#define DRM_IOCTL_AMDGPU_GEM_METADATA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata)
+#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle)
+#define DRM_IOCTL_AMDGPU_GEM_VA		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va)
+#define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
+#define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
+#define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
+#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
+#define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
+#define DRM_IOCTL_AMDGPU_CRIU_OP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_OP, struct drm_amdgpu_criu_args)
+
+/**
+ * DOC: memory domains
+ *
+ * %AMDGPU_GEM_DOMAIN_CPU	System memory that is not GPU accessible.
+ * Memory in this pool could be swapped out to disk if there is pressure.
+ *
+ * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
+ * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
+ * pages of system memory, allows GPU access system memory in a linearized
+ * fashion.
+ *
+ * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
+ * carved out by the BIOS.
+ *
+ * %AMDGPU_GEM_DOMAIN_GDS	Global on-chip data storage used to share data
+ * across shader threads.
+ *
+ * %AMDGPU_GEM_DOMAIN_GWS	Global wave sync, used to synchronize the
+ * execution of all the waves on a device.
+ *
+ * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
+ * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL	Doorbell. It is an MMIO region for
+ * signalling user mode queues.
+ */
+#define AMDGPU_GEM_DOMAIN_CPU		0x1
+#define AMDGPU_GEM_DOMAIN_GTT		0x2
+#define AMDGPU_GEM_DOMAIN_VRAM		0x4
+#define AMDGPU_GEM_DOMAIN_GDS		0x8
+#define AMDGPU_GEM_DOMAIN_GWS		0x10
+#define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL	0x40
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA | \
+					 AMDGPU_GEM_DOMAIN_DOORBELL)
+
+/* Flag that CPU access will be required for the case of VRAM domain */
+#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
+/* Flag that CPU access will not work, this VRAM domain is invisible */
+#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS		(1 << 1)
+/* Flag that USWC attributes should be used for GTT */
+#define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
+/* Flag that the memory should be in VRAM and cleared */
+#define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC. It should never
+ * be used by user space applications.
+ */
+#define AMDGPU_GEM_CREATE_CP_MQD_GFX9		(1 << 8)
+/* Flag that BO may contain sensitive data that must be wiped before
+ * releasing the memory
+ */
+#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
+/* Flag that BO will be encrypted and that the TMZ bit should be
+ * set in the PTEs when mapping this buffer via GPUVM or
+ * accessing it with various hw blocks
+ */
+#define AMDGPU_GEM_CREATE_ENCRYPTED		(1 << 10)
+/* Flag that BO will be used only in preemptible context, which does
+ * not require GTT memory accounting
+ */
+#define AMDGPU_GEM_CREATE_PREEMPTIBLE		(1 << 11)
+/* Flag that BO can be discarded under memory pressure without keeping the
+ * content.
+ */
+#define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
+/* Flag that BO is shared coherently between multiple devices or CPU threads.
+ * May depend on GPU instructions to flush caches to system scope explicitly.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_COHERENT		(1 << 13)
+/* Flag that BO should not be cached by GPU. Coherent without having to flush
+ * GPU caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT		(1 << 15)
+/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */
+#define AMDGPU_GEM_CREATE_GFX12_DCC		(1 << 16)
+
+struct drm_amdgpu_gem_create_in  {
+	/** the requested memory size */
+	__u64 bo_size;
+	/** physical start_addr alignment in bytes for some HW requirements */
+	__u64 alignment;
+	/** the requested memory domains */
+	__u64 domains;
+	/** allocation flags */
+	__u64 domain_flags;
+};
+
+struct drm_amdgpu_gem_create_out  {
+	/** returned GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_gem_create {
+	struct drm_amdgpu_gem_create_in		in;
+	struct drm_amdgpu_gem_create_out	out;
+};
+
+/** Opcode to create new residency list.  */
+#define AMDGPU_BO_LIST_OP_CREATE	0
+/** Opcode to destroy previously created residency list */
+#define AMDGPU_BO_LIST_OP_DESTROY	1
+/** Opcode to update resource information in the list */
+#define AMDGPU_BO_LIST_OP_UPDATE	2
+
+struct drm_amdgpu_bo_list_in {
+	/** Type of operation */
+	__u32 operation;
+	/** Handle of list or 0 if we want to create one */
+	__u32 list_handle;
+	/** Number of BOs in list  */
+	__u32 bo_number;
+	/** Size of each element describing BO */
+	__u32 bo_info_size;
+	/** Pointer to array describing BOs */
+	__u64 bo_info_ptr;
+};
+
+struct drm_amdgpu_bo_list_entry {
+	/** Handle of BO */
+	__u32 bo_handle;
+	/** New (if specified) BO priority to be used during migration */
+	__u32 bo_priority;
+};
+
+struct drm_amdgpu_bo_list_out {
+	/** Handle of resource list  */
+	__u32 list_handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_bo_list {
+	struct drm_amdgpu_bo_list_in in;
+	struct drm_amdgpu_bo_list_out out;
+};
+
+/* context related */
+#define AMDGPU_CTX_OP_ALLOC_CTX	1
+#define AMDGPU_CTX_OP_FREE_CTX	2
+#define AMDGPU_CTX_OP_QUERY_STATE	3
+#define AMDGPU_CTX_OP_QUERY_STATE2	4
+#define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
+#define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
+
+/* GPU reset status */
+#define AMDGPU_CTX_NO_RESET		0
+/* this the context caused it */
+#define AMDGPU_CTX_GUILTY_RESET		1
+/* some other context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET	2
+/* unknown cause */
+#define AMDGPU_CTX_UNKNOWN_RESET	3
+
+/* indicate gpu reset occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET    (1<<0)
+/* indicate vram lost occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
+/* indicate some job from this context once cause gpu hang */
+#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
+
+/* Context priority level */
+#define AMDGPU_CTX_PRIORITY_UNSET       -2048
+#define AMDGPU_CTX_PRIORITY_VERY_LOW    -1023
+#define AMDGPU_CTX_PRIORITY_LOW         -512
+#define AMDGPU_CTX_PRIORITY_NORMAL      0
+/*
+ * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires
+ * CAP_SYS_NICE or DRM_MASTER
+*/
+#define AMDGPU_CTX_PRIORITY_HIGH        512
+#define AMDGPU_CTX_PRIORITY_VERY_HIGH   1023
+
+/* select a stable profiling pstate for perfmon tools */
+#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK  0xf
+#define AMDGPU_CTX_STABLE_PSTATE_NONE  0
+#define AMDGPU_CTX_STABLE_PSTATE_STANDARD  1
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK  2
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
+#define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+
+struct drm_amdgpu_ctx_in {
+	/** AMDGPU_CTX_OP_* */
+	__u32	op;
+	/** Flags */
+	__u32	flags;
+	__u32	ctx_id;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+};
+
+union drm_amdgpu_ctx_out {
+		struct {
+			__u32	ctx_id;
+			__u32	_pad;
+		} alloc;
+
+		struct {
+			/** For future use, no flags defined so far */
+			__u64	flags;
+			/** Number of resets caused by this context so far. */
+			__u32	hangs;
+			/** Reset status since the last call of the ioctl. */
+			__u32	reset_status;
+		} state;
+
+		struct {
+			__u32	flags;
+			__u32	_pad;
+		} pstate;
+};
+
+union drm_amdgpu_ctx {
+	struct drm_amdgpu_ctx_in in;
+	union drm_amdgpu_ctx_out out;
+};
+
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated with this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	__u32 _pad;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32 _pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID	1
+#define AMDGPU_VM_OP_UNRESERVE_VMID	2
+
+struct drm_amdgpu_vm_in {
+	/** AMDGPU_VM_OP_* */
+	__u32	op;
+	__u32	flags;
+};
+
+struct drm_amdgpu_vm_out {
+	/** For future use, no flags defined so far */
+	__u64	flags;
+};
+
+union drm_amdgpu_vm {
+	struct drm_amdgpu_vm_in in;
+	struct drm_amdgpu_vm_out out;
+};
+
+/* sched ioctl */
+#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
+
+struct drm_amdgpu_sched_in {
+	/* AMDGPU_SCHED_OP_* */
+	__u32	op;
+	__u32	fd;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+	__u32   ctx_id;
+};
+
+union drm_amdgpu_sched {
+	struct drm_amdgpu_sched_in in;
+};
+
+/*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+ * perform any operation.
+ */
+#define AMDGPU_GEM_USERPTR_READONLY	(1 << 0)
+#define AMDGPU_GEM_USERPTR_ANONONLY	(1 << 1)
+#define AMDGPU_GEM_USERPTR_VALIDATE	(1 << 2)
+#define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
+
+struct drm_amdgpu_gem_userptr {
+	__u64		addr;
+	__u64		size;
+	/* AMDGPU_GEM_USERPTR_* */
+	__u32		flags;
+	/* Resulting GEM handle */
+	__u32		handle;
+};
+
+/* SI-CI-VI: */
+/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
+#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
+#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
+#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
+#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
+#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
+#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
+#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
+#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
+#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
+#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
+#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
+#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
+#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
+#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
+
+/* GFX9 - GFX11: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
+#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
+#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
+#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
+#define AMDGPU_TILING_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_SCANOUT_MASK			0x1
+
+/* GFX12 and later: */
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT			0
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK			0x7
+/* These are DCC recompression setting for memory management: */
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT	3
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK	0x3 /* 0:64B, 1:128B, 2:256B */
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT		5
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK		0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT		8
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK		0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
+
+/* Set/Get helpers for tiling flags. */
+#define AMDGPU_TILING_SET(field, value) \
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+#define AMDGPU_TILING_GET(value, field) \
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+
+#define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
+#define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
+
+/** The same structure is shared for input/output */
+struct drm_amdgpu_gem_metadata {
+	/** GEM Object handle */
+	__u32	handle;
+	/** Do we want get or set metadata */
+	__u32	op;
+	struct {
+		/** For future use, no flags defined so far */
+		__u64	flags;
+		/** family specific tiling info */
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
+	} data;
+};
+
+struct drm_amdgpu_gem_mmap_in {
+	/** the GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_gem_mmap_out {
+	/** mmap offset from the vma offset manager */
+	__u64 addr_ptr;
+};
+
+union drm_amdgpu_gem_mmap {
+	struct drm_amdgpu_gem_mmap_in   in;
+	struct drm_amdgpu_gem_mmap_out out;
+};
+
+struct drm_amdgpu_gem_wait_idle_in {
+	/** GEM object handle */
+	__u32 handle;
+	/** For future use, no flags defined so far */
+	__u32 flags;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+};
+
+struct drm_amdgpu_gem_wait_idle_out {
+	/** BO status:  0 - BO is idle, 1 - BO is busy */
+	__u32 status;
+	/** Returned current memory domain */
+	__u32 domain;
+};
+
+union drm_amdgpu_gem_wait_idle {
+	struct drm_amdgpu_gem_wait_idle_in  in;
+	struct drm_amdgpu_gem_wait_idle_out out;
+};
+
+struct drm_amdgpu_wait_cs_in {
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equals ~0ull means wait for the latest sequence number
+         */
+	__u64 handle;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+};
+
+struct drm_amdgpu_wait_cs_out {
+	/** CS status:  0 - CS completed, 1 - CS still busy */
+	__u64 status;
+};
+
+union drm_amdgpu_wait_cs {
+	struct drm_amdgpu_wait_cs_in in;
+	struct drm_amdgpu_wait_cs_out out;
+};
+
+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
+#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
+#define AMDGPU_GEM_OP_SET_PLACEMENT		1
+
+/* Sets or returns a value associated with a buffer. */
+struct drm_amdgpu_gem_op {
+	/** GEM object handle */
+	__u32	handle;
+	/** AMDGPU_GEM_OP_* */
+	__u32	op;
+	/** Input or return value */
+	__u64	value;
+};
+
+#define AMDGPU_VA_OP_MAP			1
+#define AMDGPU_VA_OP_UNMAP			2
+#define AMDGPU_VA_OP_CLEAR			3
+#define AMDGPU_VA_OP_REPLACE			4
+
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE		(1 << 0)
+
+/* Mapping flags */
+/* readable mapping */
+#define AMDGPU_VM_PAGE_READABLE		(1 << 1)
+/* writable mapping */
+#define AMDGPU_VM_PAGE_WRITEABLE	(1 << 2)
+/* executable mapping, new for VI */
+#define AMDGPU_VM_PAGE_EXECUTABLE	(1 << 3)
+/* partially resident texture */
+#define AMDGPU_VM_PAGE_PRT		(1 << 4)
+/* MTYPE flags use bit 5 to 8 */
+#define AMDGPU_VM_MTYPE_MASK		(0xf << 5)
+/* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
+#define AMDGPU_VM_MTYPE_DEFAULT		(0 << 5)
+/* Use Non Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_NC		(1 << 5)
+/* Use Write Combine MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_WC		(2 << 5)
+/* Use Cache Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_CC		(3 << 5)
+/* Use UnCached MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_UC		(4 << 5)
+/* Use Read Write MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_RW		(5 << 5)
+/* don't allocate MALL */
+#define AMDGPU_VM_PAGE_NOALLOC		(1 << 9)
+
+struct drm_amdgpu_gem_va {
+	/** GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+	/** AMDGPU_VA_OP_* */
+	__u32 operation;
+	/** AMDGPU_VM_PAGE_* */
+	__u32 flags;
+	/** va address to assign . Must be correctly aligned.*/
+	__u64 va_address;
+	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
+	__u64 offset_in_bo;
+	/** Specify mapping size. Must be correctly aligned. */
+	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
+};
+
+#define AMDGPU_HW_IP_GFX          0
+#define AMDGPU_HW_IP_COMPUTE      1
+#define AMDGPU_HW_IP_DMA          2
+#define AMDGPU_HW_IP_UVD          3
+#define AMDGPU_HW_IP_VCE          4
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_VCN_DEC      6
+/*
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
+ * both encoding and decoding jobs.
+ */
+#define AMDGPU_HW_IP_VCN_ENC      7
+#define AMDGPU_HW_IP_VCN_JPEG     8
+#define AMDGPU_HW_IP_VPE          9
+#define AMDGPU_HW_IP_NUM          10
+
+#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
+
+#define AMDGPU_CHUNK_ID_IB		0x01
+#define AMDGPU_CHUNK_ID_FENCE		0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW   0x0a
+
+struct drm_amdgpu_cs_chunk {
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
+};
+
+struct drm_amdgpu_cs_in {
+	/** Rendering context id */
+	__u32		ctx_id;
+	/**  Handle of resource list associated with CS */
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		flags;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
+};
+
+struct drm_amdgpu_cs_out {
+	__u64 handle;
+};
+
+union drm_amdgpu_cs {
+	struct drm_amdgpu_cs_in in;
+	struct drm_amdgpu_cs_out out;
+};
+
+/* Specify flags to be used for IB */
+
+/* This IB should be submitted to CE */
+#define AMDGPU_IB_FLAG_CE	(1<<0)
+
+/* Preamble flag, which means the IB could be dropped if no context switch */
+#define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
+
+/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
+#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
+
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
+/* Flag the IB as secure (TMZ)
+ */
+#define AMDGPU_IB_FLAGS_SECURE  (1 << 5)
+
+/* Tell KMD to flush and invalidate caches
+ */
+#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC  (1 << 6)
+
+struct drm_amdgpu_cs_chunk_ib {
+	__u32 _pad;
+	/** AMDGPU_IB_FLAG_* */
+	__u32 flags;
+	/** Virtual address to begin IB execution */
+	__u64 va_start;
+	/** Size of submission */
+	__u32 ib_bytes;
+	/** HW IP to submit to */
+	__u32 ip_type;
+	/** HW IP index of the same type to submit to  */
+	__u32 ip_instance;
+	/** Ring index to submit to */
+	__u32 ring;
+};
+
+struct drm_amdgpu_cs_chunk_dep {
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
+};
+
+struct drm_amdgpu_cs_chunk_fence {
+	__u32 handle;
+	__u32 offset;
+};
+
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
+
+union drm_amdgpu_fence_to_handle {
+	struct {
+		struct drm_amdgpu_fence fence;
+		__u32 what;
+		__u32 pad;
+	} in;
+	struct {
+		__u32 handle;
+	} out;
+};
+
+struct drm_amdgpu_cs_chunk_data {
+	union {
+		struct drm_amdgpu_cs_chunk_ib		ib_data;
+		struct drm_amdgpu_cs_chunk_fence	fence_data;
+	};
+};
+
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW         0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+	__u64 shadow_va;
+	__u64 csa_va;
+	__u64 gds_va;
+	__u64 flags;
+};
+
+/*
+ *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
+ *
+ */
+#define AMDGPU_IDS_FLAGS_FUSION         0x1
+#define AMDGPU_IDS_FLAGS_PREEMPTION     0x2
+#define AMDGPU_IDS_FLAGS_TMZ            0x4
+#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
+
+/* indicate if acceleration can be working */
+#define AMDGPU_INFO_ACCEL_WORKING		0x00
+/* get the crtc_id from the mode object id? */
+#define AMDGPU_INFO_CRTC_FROM_ID		0x01
+/* query hw IP info */
+#define AMDGPU_INFO_HW_IP_INFO			0x02
+/* query hw IP instance count for the specified type */
+#define AMDGPU_INFO_HW_IP_COUNT			0x03
+/* timestamp for GL_ARB_timer_query */
+#define AMDGPU_INFO_TIMESTAMP			0x05
+/* Query the firmware version */
+#define AMDGPU_INFO_FW_VERSION			0x0e
+	/* Subquery id: Query VCE firmware version */
+	#define AMDGPU_INFO_FW_VCE		0x1
+	/* Subquery id: Query UVD firmware version */
+	#define AMDGPU_INFO_FW_UVD		0x2
+	/* Subquery id: Query GMC firmware version */
+	#define AMDGPU_INFO_FW_GMC		0x03
+	/* Subquery id: Query GFX ME firmware version */
+	#define AMDGPU_INFO_FW_GFX_ME		0x04
+	/* Subquery id: Query GFX PFP firmware version */
+	#define AMDGPU_INFO_FW_GFX_PFP		0x05
+	/* Subquery id: Query GFX CE firmware version */
+	#define AMDGPU_INFO_FW_GFX_CE		0x06
+	/* Subquery id: Query GFX RLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC		0x07
+	/* Subquery id: Query GFX MEC firmware version */
+	#define AMDGPU_INFO_FW_GFX_MEC		0x08
+	/* Subquery id: Query SMC firmware version */
+	#define AMDGPU_INFO_FW_SMC		0x0a
+	/* Subquery id: Query SDMA firmware version */
+	#define AMDGPU_INFO_FW_SDMA		0x0b
+	/* Subquery id: Query PSP SOS firmware version */
+	#define AMDGPU_INFO_FW_SOS		0x0c
+	/* Subquery id: Query PSP ASD firmware version */
+	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
+	/* Subquery id: Query DMCU firmware version */
+	#define AMDGPU_INFO_FW_DMCU		0x12
+	#define AMDGPU_INFO_FW_TA		0x13
+	/* Subquery id: Query DMCUB firmware version */
+	#define AMDGPU_INFO_FW_DMCUB		0x14
+	/* Subquery id: Query TOC firmware version */
+	#define AMDGPU_INFO_FW_TOC		0x15
+	/* Subquery id: Query CAP firmware version */
+	#define AMDGPU_INFO_FW_CAP		0x16
+	/* Subquery id: Query GFX RLCP firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCP		0x17
+	/* Subquery id: Query GFX RLCV firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCV		0x18
+	/* Subquery id: Query MES_KIQ firmware version */
+	#define AMDGPU_INFO_FW_MES_KIQ		0x19
+	/* Subquery id: Query MES firmware version */
+	#define AMDGPU_INFO_FW_MES		0x1a
+	/* Subquery id: Query IMU firmware version */
+	#define AMDGPU_INFO_FW_IMU		0x1b
+	/* Subquery id: Query VPE firmware version */
+	#define AMDGPU_INFO_FW_VPE		0x1c
+
+/* number of bytes moved for TTM migration */
+#define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
+/* the used VRAM size */
+#define AMDGPU_INFO_VRAM_USAGE			0x10
+/* the used GTT size */
+#define AMDGPU_INFO_GTT_USAGE			0x11
+/* Information about GDS, etc. resource configuration */
+#define AMDGPU_INFO_GDS_CONFIG			0x13
+/* Query information about VRAM and GTT domains */
+#define AMDGPU_INFO_VRAM_GTT			0x14
+/* Query information about register in MMR address space*/
+#define AMDGPU_INFO_READ_MMR_REG		0x15
+/* Query information about device: rev id, family, etc. */
+#define AMDGPU_INFO_DEV_INFO			0x16
+/* visible vram usage */
+#define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
+/* Query memory about VRAM and GTT domains */
+#define AMDGPU_INFO_MEMORY			0x19
+/* Query vce clock table */
+#define AMDGPU_INFO_VCE_CLOCK_TABLE		0x1A
+/* Query vbios related information */
+#define AMDGPU_INFO_VBIOS			0x1B
+	/* Subquery id: Query vbios size */
+	#define AMDGPU_INFO_VBIOS_SIZE		0x1
+	/* Subquery id: Query vbios image */
+	#define AMDGPU_INFO_VBIOS_IMAGE		0x2
+	/* Subquery id: Query vbios info */
+	#define AMDGPU_INFO_VBIOS_INFO		0x3
+/* Query UVD handles */
+#define AMDGPU_INFO_NUM_HANDLES			0x1C
+/* Query sensor related information */
+#define AMDGPU_INFO_SENSOR			0x1D
+	/* Subquery id: Query GPU shader clock */
+	#define AMDGPU_INFO_SENSOR_GFX_SCLK		0x1
+	/* Subquery id: Query GPU memory clock */
+	#define AMDGPU_INFO_SENSOR_GFX_MCLK		0x2
+	/* Subquery id: Query GPU temperature */
+	#define AMDGPU_INFO_SENSOR_GPU_TEMP		0x3
+	/* Subquery id: Query GPU load */
+	#define AMDGPU_INFO_SENSOR_GPU_LOAD		0x4
+	/* Subquery id: Query average GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER	0x5
+	/* Subquery id: Query northbridge voltage */
+	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
+	/* Subquery id: Query graphics voltage */
+	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+	/* Subquery id: Query GPU stable pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK		0x8
+	/* Subquery id: Query GPU stable pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK		0x9
+	/* Subquery id: Query GPU peak pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK			0xa
+	/* Subquery id: Query GPU peak pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK			0xb
+	/* Subquery id: Query input GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER	0xc
+/* Number of VRAM page faults on CPU access. */
+#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
+#define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES	0x20
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC			(1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA			(1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX			(1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB			(1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB			(1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE			(1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP			(1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI			(1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF			(1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN			(1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM			(1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0			(1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1			(1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE			(1 << 13)
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS			0x21
+	/* Subquery id: Decode */
+	#define AMDGPU_INFO_VIDEO_CAPS_DECODE		0
+	/* Subquery id: Encode */
+	#define AMDGPU_INFO_VIDEO_CAPS_ENCODE		1
+/* Query the max number of IBs per gang per submission */
+#define AMDGPU_INFO_MAX_IBS			0x22
+/* query last page fault info */
+#define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
+
+#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
+#define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
+#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT	8
+#define AMDGPU_INFO_MMR_SH_INDEX_MASK	0xff
+
+struct drm_amdgpu_query_fw {
+	/** AMDGPU_INFO_FW_* */
+	__u32 fw_type;
+	/**
+	 * Index of the IP if there are more IPs of
+	 * the same type.
+	 */
+	__u32 ip_instance;
+	/**
+	 * Index of the engine. Whether this is used depends
+	 * on the firmware type. (e.g. MEC, SDMA)
+	 */
+	__u32 index;
+	__u32 _pad;
+};
+
+/* Input structure for the INFO ioctl */
+struct drm_amdgpu_info {
+	/* Where the return value will be stored */
+	__u64 return_pointer;
+	/* The size of the return value. Just like "size" in "snprintf",
+	 * it limits how many bytes the kernel can write. */
+	__u32 return_size;
+	/* The query request id. */
+	__u32 query;
+
+	union {
+		struct {
+			__u32 id;
+			__u32 _pad;
+		} mode_crtc;
+
+		struct {
+			/** AMDGPU_HW_IP_* */
+			__u32 type;
+			/**
+			 * Index of the IP if there are more IPs of the same
+			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
+			 */
+			__u32 ip_instance;
+		} query_hw_ip;
+
+		struct {
+			__u32 dword_offset;
+			/** number of registers to read */
+			__u32 count;
+			__u32 instance;
+			/** For future use, no flags defined so far */
+			__u32 flags;
+		} read_mmr_reg;
+
+		struct drm_amdgpu_query_fw query_fw;
+
+		struct {
+			__u32 type;
+			__u32 offset;
+		} vbios_info;
+
+		struct {
+			__u32 type;
+		} sensor_info;
+
+		struct {
+			__u32 type;
+		} video_cap;
+	};
+};
+
+struct drm_amdgpu_info_gds {
+	/** GDS GFX partition size */
+	__u32 gds_gfx_partition_size;
+	/** GDS compute partition size */
+	__u32 compute_partition_size;
+	/** total GDS memory size */
+	__u32 gds_total_size;
+	/** GWS size per GFX partition */
+	__u32 gws_per_gfx_partition;
+	/** GSW size per compute partition */
+	__u32 gws_per_compute_partition;
+	/** OA size per GFX partition */
+	__u32 oa_per_gfx_partition;
+	/** OA size per compute partition */
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_info_vram_gtt {
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
+};
+
+struct drm_amdgpu_heap_info {
+	/** max. physical memory */
+	__u64 total_heap_size;
+
+	/** Theoretical max. available memory in the given heap */
+	__u64 usable_heap_size;
+
+	/**
+	 * Number of bytes allocated in the heap. This includes all processes
+	 * and private allocations in the kernel. It changes when new buffers
+	 * are allocated, freed, and moved. It cannot be larger than
+	 * heap_size.
+	 */
+	__u64 heap_usage;
+
+	/**
+	 * Theoretical possible max. size of buffer which
+	 * could be allocated in the given heap
+	 */
+	__u64 max_allocation;
+};
+
+struct drm_amdgpu_memory_info {
+	struct drm_amdgpu_heap_info vram;
+	struct drm_amdgpu_heap_info cpu_accessible_vram;
+	struct drm_amdgpu_heap_info gtt;
+};
+
+struct drm_amdgpu_info_firmware {
+	__u32 ver;
+	__u32 feature;
+};
+
+struct drm_amdgpu_info_vbios {
+	__u8 name[64];
+	__u8 vbios_pn[64];
+	__u32 version;
+	__u32 pad;
+	__u8 vbios_ver_str[32];
+	__u8 date[32];
+};
+
+#define AMDGPU_VRAM_TYPE_UNKNOWN 0
+#define AMDGPU_VRAM_TYPE_GDDR1 1
+#define AMDGPU_VRAM_TYPE_DDR2  2
+#define AMDGPU_VRAM_TYPE_GDDR3 3
+#define AMDGPU_VRAM_TYPE_GDDR4 4
+#define AMDGPU_VRAM_TYPE_GDDR5 5
+#define AMDGPU_VRAM_TYPE_HBM   6
+#define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
+#define AMDGPU_VRAM_TYPE_GDDR6 9
+#define AMDGPU_VRAM_TYPE_DDR5  10
+#define AMDGPU_VRAM_TYPE_LPDDR4 11
+#define AMDGPU_VRAM_TYPE_LPDDR5 12
+
+struct drm_amdgpu_info_device {
+	/** PCI Device ID */
+	__u32 device_id;
+	/** Internal chip revision: A0, A1, etc.) */
+	__u32 chip_rev;
+	__u32 external_rev;
+	/** Revision id in PCI Config space */
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
+	/* in KHz */
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
+	/* cu information */
+	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
+	/** Render backend pipe mask. One render backend is CB+DB. */
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	/* PCIe version (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_gen;
+	__u64 ids_flags;
+	/** Starting virtual address for UMDs. */
+	__u64 virtual_address_offset;
+	/** The maximum virtual address */
+	__u64 virtual_address_max;
+	/** Required alignment of virtual addresses. */
+	__u32 virtual_address_alignment;
+	/** Page table entry - fragment size */
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
+	/** constant engine ram size*/
+	__u32 ce_ram_size;
+	/** video memory type info*/
+	__u32 vram_type;
+	/** video memory bit width*/
+	__u32 vram_bit_width;
+	/* vce harvesting instance */
+	__u32 vce_harvest_config;
+	/* gfx double offchip LDS buffers */
+	__u32 gc_double_offchip_lds_buf;
+	/* NGG Primitive Buffer */
+	__u64 prim_buf_gpu_addr;
+	/* NGG Position Buffer */
+	__u64 pos_buf_gpu_addr;
+	/* NGG Control Sideband */
+	__u64 cntl_sb_buf_gpu_addr;
+	/* NGG Parameter Cache */
+	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	/* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_num_lanes;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
+	/** Starting high virtual address for UMDs. */
+	__u64 high_va_offset;
+	/** The maximum high virtual address */
+	__u64 high_va_max;
+	/* gfx10 pa_sc_tile_steering_override */
+	__u32 pa_sc_tile_steering_override;
+	/* disabled TCCs */
+	__u64 tcc_disabled_mask;
+	__u64 min_engine_clock;
+	__u64 min_memory_clock;
+	/* The following fields are only set on gfx11+, older chips set 0. */
+	__u32 tcp_cache_size;       /* AKA GL0, VMEM cache */
+	__u32 num_sqc_per_wgp;
+	__u32 sqc_data_cache_size;  /* AKA SMEM cache */
+	__u32 sqc_inst_cache_size;
+	__u32 gl1c_cache_size;
+	__u32 gl2c_cache_size;
+	__u64 mall_size;            /* AKA infinity cache */
+	/* high 32 bits of the rb pipes mask */
+	__u32 enabled_rb_pipes_mask_hi;
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_hw_ip {
+	/** Version of h/w IP */
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
+	/** Capabilities */
+	__u64  capabilities_flags;
+	/** command buffer address start alignment*/
+	__u32  ib_start_alignment;
+	/** command buffer size alignment*/
+	__u32  ib_size_alignment;
+	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
+	__u32  available_rings;
+	/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
+	__u32  ip_discovery_version;
+};
+
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
+struct drm_amdgpu_info_num_handles {
+	/** Max handles as supported by firmware for UVD */
+	__u32  uvd_max_handles;
+	/** Handles currently in use for UVD */
+	__u32  uvd_used_handles;
+};
+
+#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES		6
+
+struct drm_amdgpu_info_vce_clock_table_entry {
+	/** System clock */
+	__u32 sclk;
+	/** Memory clock */
+	__u32 mclk;
+	/** VCE clock */
+	__u32 eclk;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_vce_clock_table {
+	struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES];
+	__u32 num_valid_entries;
+	__u32 pad;
+};
+
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2			0
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4			1
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1			2
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC		3
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC			4
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG			5
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9			6
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1			7
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT			8
+
+struct drm_amdgpu_info_video_codec_info {
+	__u32 valid;
+	__u32 max_width;
+	__u32 max_height;
+	__u32 max_pixels_per_frame;
+	__u32 max_level;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_video_caps {
+	struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
+};
+
+#define AMDGPU_VMHUB_TYPE_MASK			0xff
+#define AMDGPU_VMHUB_TYPE_SHIFT			0
+#define AMDGPU_VMHUB_TYPE_GFX			0
+#define AMDGPU_VMHUB_TYPE_MM0			1
+#define AMDGPU_VMHUB_TYPE_MM1			2
+#define AMDGPU_VMHUB_IDX_MASK			0xff00
+#define AMDGPU_VMHUB_IDX_SHIFT			8
+
+struct drm_amdgpu_info_gpuvm_fault {
+	__u64 addr;
+	__u32 status;
+	__u32 vmhub;
+};
+
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
+/*
+ * Supported GPU families
+ */
+#define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
+#define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
+#define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
+#define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
+#define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */
+#define AMDGPU_FAMILY_RV			142 /* Raven */
+#define AMDGPU_FAMILY_NV			143 /* Navi10 */
+#define AMDGPU_FAMILY_VGH			144 /* Van Gogh */
+#define AMDGPU_FAMILY_GC_11_0_0			145 /* GC 11.0.0 */
+#define AMDGPU_FAMILY_YC			146 /* Yellow Carp */
+#define AMDGPU_FAMILY_GC_11_0_1			148 /* GC 11.0.1 */
+#define AMDGPU_FAMILY_GC_10_3_6			149 /* GC 10.3.6 */
+#define AMDGPU_FAMILY_GC_10_3_7			151 /* GC 10.3.7 */
+#define AMDGPU_FAMILY_GC_11_5_0			150 /* GC 11.5.0 */
+#define AMDGPU_FAMILY_GC_12_0_0			152 /* GC 12.0.0 */
+
+/* FIXME wrong namespace! */
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+
+/* CRIU ioctl
+ *
+ * When checkpointing a process, the CRIU amdgpu plugin will perform:
+ * 1. INFO op to get information about state that needs to be saved. This
+ *    pauses execution until the checkpoint is done.
+ * 2. CHECKPOINT op to save state (BOs for now, TODO: CS contexts)
+ * 3. UNPAUSE op to resume execution when the checkpoint is done.
+ *
+ * When restoring a process, the CRIU amdgpu plugin will perform:
+ *
+ * 1. RESTORE op to restore state
+ * 2. RESUME op to restore userptr mappings (TODO)
+ */
+enum drm_amdgpu_criu_op {
+    AMDGPU_CRIU_OP_PROCESS_INFO,
+    AMDGPU_CRIU_OP_CHECKPOINT,
+    AMDGPU_CRIU_OP_UNPAUSE,
+    AMDGPU_CRIU_OP_RESTORE,
+    AMDGPU_CRIU_OP_RESUME,
+};
+
+struct drm_amdgpu_criu_args {
+    __u64 bos; /* user pointer to bos array */
+    __u64 priv_data; /* user pointer to private data */
+    __u64 priv_data_size;
+    __u32 num_bos;
+    __u32 num_objs;
+    __u32 pid;
+    __u32 op;
+    __u8 is_retry: 1;
+};
+
+struct drm_amdgpu_criu_bo_bucket {
+    __u64 addr;
+    __u64 size;
+    __u64 offset;
+    __u64 restored_offset;    /* During restore, updated offset for BO */
+    __u64 alloc_flags;
+    __u32 preferred_domains;
+    __u32 dmabuf_fd;
+	__u8 is_import: 1;
+	__u8 skip: 1;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 96c086162..dd169c07c 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -49,13 +49,6 @@ struct vma_metadata {
 
 /************************************ Global Variables ********************************************/
 
-/**
- * FD of KFD device used to checkpoint. On a multi-process
- * tree the order of checkpointing goes from parent to child
- * and so on - so saving the FD will not be overwritten
- */
-static int kfd_checkpoint_fd;
-
 static LIST_HEAD(update_vma_info_list);
 
 size_t kfd_max_buffer_size;
@@ -79,7 +72,7 @@ int kmtIoctl(int fd, unsigned long request, void *arg)
 		/* In case pthread_atfork didn't catch it, this will
 		 * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
 		 */
-		pr_perror("KFD file descriptor not valid in this process");
+		pr_perror("KFD file descriptor not valid in this process: %d\n", fd);
 	return ret;
 }
 
@@ -110,7 +103,9 @@ static int allocate_device_entries(CriuKfd *e, int num_of_devices)
 	}
 
 	for (int i = 0; i < num_of_devices; i++) {
-		KfdDeviceEntry *entry = xzalloc(sizeof(*entry));
+		KfdDeviceEntry *entry;
+
+		entry = xzalloc(sizeof(*entry));
 
 		if (!entry) {
 			pr_err("Failed to allocate entry\n");
@@ -503,11 +498,11 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va,
 	amdgpu_bo_free(h_bo);
 }
 
-static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
 						void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
-						uint64_t max_copy_size, enum sdma_op_type type)
+						uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free)
 {
-	uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
+	uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
 	uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size;
 	amdgpu_va_handle h_va_src, h_va_dst, h_va_ib;
 	amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib;
@@ -520,10 +515,8 @@ static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
 	uint32_t expired;
 	amdgpu_context_handle h_ctx;
 	uint32_t *ib = NULL;
-	int j, err, shared_fd, packets_per_buffer;
+	int j, err, packets_per_buffer;
 
-	shared_fd = bo_bucket.dmabuf_fd;
-	size = bo_bucket.size;
 	buffer_bo_size = min(size, buffer_size);
 	packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1;
 	src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size;
@@ -734,7 +727,8 @@ err_dst_bo_map:
 	if (err)
 		pr_perror("dest range free failed");
 err_dst_va:
-	err = amdgpu_bo_free(h_bo_dst);
+	if (!do_not_free)
+		err = amdgpu_bo_free(h_bo_dst);
 	if (err)
 		pr_perror("dest bo free failed");
 err_dst_bo_prep:
@@ -822,8 +816,9 @@ void *dump_bo_contents(void *_thread_data)
 		num_bos++;
 
 		/* perform sDMA based vram copy */
-		ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
-				   SDMA_OP_VRAM_READ);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_READ, false);
+
 		if (ret) {
 			pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
@@ -920,8 +915,8 @@ void *restore_bo_contents(void *_thread_data)
 
 		num_bos++;
 
-		ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
-				   SDMA_OP_VRAM_WRITE);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, false);
 		if (ret) {
 			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
@@ -1007,28 +1002,39 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha
 	return 0;
 }
 
-static int unpause_process(int fd)
+int amdgpu_unpause_processes(int pid)
 {
 	int ret = 0;
 	struct kfd_ioctl_criu_args args = { 0 };
+	struct list_head *l = get_dumped_fds();
+	struct dumped_fd *st;
+
+	list_for_each_entry(st, l, l) {
+		if (st->is_drm) {
+			ret = amdgpu_plugin_drm_unpause_file(st->fd);
+			if (ret) {
+				pr_perror("Failed to unpause drm device file");
+				goto exit;
+			}
+			close(st->fd);
+		} else {
+			args.op = KFD_CRIU_OP_UNPAUSE;
 
-	args.op = KFD_CRIU_OP_UNPAUSE;
-
-	ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args);
-	if (ret) {
-		pr_perror("Failed to unpause process");
-		goto exit;
+			ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args);
+			if (ret) {
+				pr_perror("Failed to unpause process");
+				goto exit;
+			}
+		}
 	}
 
-	// Reset the KFD FD
-	kfd_checkpoint_fd = -1;
-	sys_close_drm_render_devices(&src_topology);
-
 exit:
 	pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret);
+	clear_dumped_fds();
 
 	return ret;
 }
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, amdgpu_unpause_processes)
 
 static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets,
 			CriuKfd *e)
@@ -1072,6 +1078,8 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 {
 	struct thread_data *thread_datas;
 	int ret = 0, i;
+	amdgpu_device_handle h_dev;
+	uint32_t major, minor;
 
 	pr_debug("Dumping %d BOs\n", args->num_bos);
 
@@ -1095,6 +1103,21 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 		boinfo->size = bo_bucket->size;
 		boinfo->offset = bo_bucket->offset;
 		boinfo->alloc_flags = bo_bucket->alloc_flags;
+
+		ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev);
+
+		boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd);
+
+		amdgpu_device_deinitialize(h_dev);
+
+		boinfo->is_import = bo_bucket->is_import | shared_bo_has_exporter(boinfo->handle);
+	}
+	for (i = 0; i < e->num_of_bos; i++) {
+		KfdBoEntry *boinfo = e->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, boinfo->is_import);
+		if (ret)
+			goto exit;
 	}
 
 	for (int i = 0; i < e->num_of_gpus; i++) {
@@ -1215,11 +1238,10 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		return -1;
 	}
 
-	/* Initialize number of device files that will be checkpointed */
-	init_gpu_count(&src_topology);
-
 	/* Check whether this plugin was called for kfd or render nodes */
 	if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
+		char buffer[64];
+		char path[64];
 
 		/* This is RenderD dumper plugin, for now just save renderD
 		 * minor number to be used during restore. In later phases this
@@ -1229,11 +1251,11 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		if (ret)
 			return ret;
 
-		/* Invoke unpause process if needed */
-		decrement_checkpoint_count();
-		if (checkpoint_is_complete()) {
-			ret = unpause_process(kfd_checkpoint_fd);
-		}
+		ret = record_dumped_fd(fd, true);
+		if (ret)
+			return ret;
+		sprintf(path, "/proc/self/fd/%d", fd);
+		readlink(path, buffer, 64);
 
 		/* Need to return success here so that criu can call plugins for renderD nodes */
 		return ret;
@@ -1331,14 +1353,11 @@ int amdgpu_plugin_dump_file(int fd, int id)
 
 	xfree(buf);
 
-exit:
-	/* Restore all queues if conditions permit */
-	kfd_checkpoint_fd = fd;
-	decrement_checkpoint_count();
-	if (checkpoint_is_complete()) {
-		ret = unpause_process(fd);
-	}
+        ret = record_dumped_fd(fd, false);
+        if (ret)
+               goto exit;
 
+exit:
 	xfree((void *)args.devices);
 	xfree((void *)args.bos);
 	xfree((void *)args.priv_data);
@@ -1409,6 +1428,7 @@ exit:
 static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 {
 	struct kfd_criu_bo_bucket *bo_buckets;
+	bool retry_needed = false;
 
 	pr_debug("Restoring %ld BOs\n", e->num_of_bos);
 
@@ -1422,18 +1442,59 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 	for (int i = 0; i < args->num_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
 		KfdBoEntry *bo_entry = e->bo_entries[i];
+		int dmabuf_fd = -1;
 
-		bo_bucket->gpu_id = bo_entry->gpu_id;
 		bo_bucket->addr = bo_entry->addr;
+
+		if (work_already_completed(bo_entry->handle, bo_entry->gpu_id)) {
+			bo_bucket->skip = 1;
+		} else if (bo_entry->handle != -1) {
+			if (bo_entry->is_import) {
+				dmabuf_fd = dmabuf_fd_for_handle(bo_entry->handle);
+				if (dmabuf_fd == -1) {
+					bo_bucket->skip = 1;
+					retry_needed = true;
+				}
+			}
+		}
+
+		bo_bucket->is_import = bo_entry->is_import;
+
+		bo_bucket->gpu_id = bo_entry->gpu_id;
 		bo_bucket->size = bo_entry->size;
 		bo_bucket->offset = bo_entry->offset;
 		bo_bucket->alloc_flags = bo_entry->alloc_flags;
+		bo_bucket->dmabuf_fd = dmabuf_fd;
 
 		plugin_log_msg("BO [%d] gpu_id:%x addr:%llx size:%llx offset:%llx\n", i, bo_bucket->gpu_id,
 			       bo_bucket->addr, bo_bucket->size, bo_bucket->offset);
+
 	}
 
 	pr_info("Restore BOs Ok\n");
+
+	return retry_needed;
+}
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd)
+{
+	struct vma_metadata *vma_md;
+
+	vma_md = xmalloc(sizeof(*vma_md));
+	if (!vma_md) {
+		return -ENOMEM;
+	}
+
+	memset(vma_md, 0, sizeof(*vma_md));
+
+	vma_md->old_pgoff = offset;
+	vma_md->vma_entry = addr;
+
+	vma_md->new_pgoff = restored_offset;
+	vma_md->fd = fd;
+
+	list_add_tail(&vma_md->list, &update_vma_info_list);
+
 	return 0;
 }
 
@@ -1452,6 +1513,9 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
 		struct tp_node *tp_node;
 
+		if (bo_bucket->skip)
+			continue;
+
 		if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT |
 					      KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP | KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)) {
 			struct vma_metadata *vma_md;
@@ -1481,10 +1545,6 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 			vma_md->new_pgoff = bo_bucket->restored_offset;
 			vma_md->fd = node_get_drm_render_device(tp_node);
 
-			plugin_log_msg("adding vma_entry:addr:0x%lx old-off:0x%lx "
-				       "new_off:0x%lx new_minor:%d\n",
-				       vma_md->vma_entry, vma_md->old_pgoff, vma_md->new_pgoff, vma_md->new_minor);
-
 			list_add_tail(&vma_md->list, &update_vma_info_list);
 		}
 	}
@@ -1551,7 +1611,7 @@ exit:
 	return ret;
 }
 
-int amdgpu_plugin_restore_file(int id)
+int amdgpu_plugin_restore_file(int id, bool *retry_needed)
 {
 	int ret = 0, fd;
 	char img_path[PATH_MAX];
@@ -1562,6 +1622,8 @@ int amdgpu_plugin_restore_file(int id)
 	size_t img_size;
 	FILE *img_fp = NULL;
 
+	*retry_needed = false;
+
 	if (plugin_disabled)
 		return -ENOTSUP;
 
@@ -1609,8 +1671,6 @@ int amdgpu_plugin_restore_file(int id)
 		}
 		fclose(img_fp);
 
-		pr_info("render node gpu_id = 0x%04x\n", rd->gpu_id);
-
 		target_gpu_id = maps_get_dest_gpu(&restore_maps, rd->gpu_id);
 		if (!target_gpu_id) {
 			fd = -ENODEV;
@@ -1626,8 +1686,18 @@ int amdgpu_plugin_restore_file(int id)
 		pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
 
 		fd = node_get_drm_render_device(tp_node);
-		if (fd < 0)
+		if (fd < 0) {
 			pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor);
+			return -1;
+		}
+
+		ret = amdgpu_plugin_drm_restore_file(fd, rd);
+		if (ret == 1)
+			*retry_needed = true;
+		if (ret < 0) {
+			fd = ret;
+			goto fail;
+		}
 	fail:
 		criu_render_node__free_unpacked(rd, NULL);
 		xfree(buf);
@@ -1639,12 +1709,19 @@ int amdgpu_plugin_restore_file(int id)
 		 * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
 		 * tp_node.
 		 */
-		fd = dup(fd);
-		if (fd == -1) {
-			pr_perror("unable to duplicate the render fd");
-			return -1;
+
+		if (fd < 0)
+			return fd;
+
+		if (!(*retry_needed)) {
+			fd = dup(fd);
+			if (fd == -1) {
+				pr_perror("unable to duplicate the render fd");
+				return -1;
+			}
+			return fd;
 		}
-		return fd;
+		return 0;
 	}
 
 	fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@@ -1688,13 +1765,16 @@ int amdgpu_plugin_restore_file(int id)
 	 * This way, we know that the file descriptors we store will not conflict with file descriptors inside core
 	 * CRIU.
 	 */
-	fd_next = find_unused_fd_pid(e->pid);
-	if (fd_next <= 0) {
-		pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
-		ret = -EINVAL;
-		goto exit;
+	if (fd_next == -1) {
+		fd_next = find_unused_fd_pid(e->pid);
+		if (fd_next <= 0) {
+			pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
+			ret = -EINVAL;
+			goto exit;
+		}
 	}
 
+
 	ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology);
 	if (ret) {
 		pr_err("Failed to convert stored device information to topology\n");
@@ -1714,17 +1794,21 @@ int amdgpu_plugin_restore_file(int id)
 		goto exit;
 	}
 
+
 	ret = restore_devices(&args, e);
 	if (ret)
 		goto exit;
 
 	ret = restore_bos(&args, e);
-	if (ret)
+	if (ret == 1)
+		*retry_needed = true;
+	else if (ret)
 		goto exit;
 
 	args.num_objects = e->num_of_objects;
 	args.priv_data_size = e->priv_data.len;
 	args.priv_data = (uintptr_t)e->priv_data.data;
+	args.is_retry = work_already_completed(-1, -1);
 
 	args.op = KFD_CRIU_OP_RESTORE;
 	if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) {
@@ -1733,6 +1817,22 @@ int amdgpu_plugin_restore_file(int id)
 		goto exit;
 	}
 
+	ret = record_completed_work(-1, -1);
+	if (ret < 0)
+		goto exit;
+	for (int i = 0; i < args.num_bos; i++) {
+		struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i];
+		KfdBoEntry *bo_entry = e->bo_entries[i];
+
+		if (!bo_bucket->skip && !work_already_completed(bo_entry->handle, bo_entry->gpu_id)) {
+			ret = record_completed_work(bo_entry->handle, bo_entry->gpu_id);
+			if (ret < 0)
+				goto exit;
+			if (!bo_entry->is_import)
+				serve_out_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd);
+		}
+	}
+
 	ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e);
 	if (ret)
 		goto exit;
@@ -1758,6 +1858,10 @@ exit:
 }
 CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, amdgpu_plugin_restore_file)
 
+int amdgpu_plugin_dmabuf_fd_update(int handle, int fd) {
+	return record_shared_dmabuf_fd(handle, fd);
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DMABUF_FD, amdgpu_plugin_dmabuf_fd_update)
 /* return 0 if no match found
  * return -1 for error.
  * return 1 if vmap map must be adjusted.
@@ -1857,6 +1961,8 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
 		}
 	}
 
+	clear_completed_work_and_dmabuf_fds();
+
 	close(fd);
 	return exit_code;
 }
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c
index d54cd937d..030392a85 100644
--- a/plugins/amdgpu/amdgpu_plugin_drm.c
+++ b/plugins/amdgpu/amdgpu_plugin_drm.c
@@ -19,6 +19,7 @@
 
 #include <dirent.h>
 #include "common/list.h"
+#include "files.h"
 
 #include "criu-amdgpu.pb-c.h"
 
@@ -27,12 +28,79 @@
 
 #include "xmalloc.h"
 #include "criu-log.h"
-#include "kfd_ioctl.h"
+#include "amdgpu_drm.h"
 #include "amdgpu_plugin_drm.h"
 #include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
 
 
+#include "util.h"
+#include "common/scm.h"
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd)
+{
+	uint32_t handle;
+	int fd = amdgpu_device_get_fd(h_dev);
+
+	if (dmabuf_fd == -1) {
+		return -1;
+	}
+
+	drmPrimeFDToHandle(fd, dmabuf_fd, &handle);
+
+	return handle;
+}
+
+int drmIoctl(int fd, unsigned long request, void *arg)
+{
+	int ret, max_retries = 200;
+
+	do {
+		ret = ioctl(fd, request, arg);
+	} while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (ret == -1 && errno == EBADF)
+		/* In case pthread_atfork didn't catch it, this will
+		 * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
+		 */
+		pr_perror("KFD file descriptor not valid in this process");
+	return ret;
+}
+
+static int allocate_bo_entries(CriuRenderNode *e, int num_bos)
+{
+	e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos);
+	if (!e->bo_entries) {
+		pr_err("Failed to allocate bo_info\n");
+		return -ENOMEM;
+	}
+
+	for (int i = 0; i < num_bos; i++) {
+		DrmBoEntry *entry = xzalloc(sizeof(*entry));
+
+		if (!entry) {
+			pr_err("Failed to allocate botest\n");
+			return -ENOMEM;
+		}
+
+		drm_bo_entry__init(entry);
+
+		e->bo_entries[i] = entry;
+		e->n_bo_entries++;
+	}
+	return 0;
+}
+
+static void free_e(CriuRenderNode *e)
+{
+	for (int i = 0; i < e->n_bo_entries; i++) {
+		if (e->bo_entries[i])
+			xfree(e->bo_entries[i]);
+	}
+
+	xfree(e);
+}
+
 int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
 {
 	char path[PATH_MAX];
@@ -60,19 +128,195 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
 	return 0;
 }
 
+static int restore_bo_contents_drm(int drm_render_minor, pid_t pid, int drm_fd, uint64_t num_of_bos, struct drm_amdgpu_criu_bo_bucket *bo_buckets)
+{
+	size_t image_size = 0, total_bo_size = 0, max_bo_size = 0, buffer_size;
+	struct amdgpu_gpu_info gpu_info = { 0 };
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	uint32_t major, minor;
+	FILE *bo_contents_fp = NULL;
+	void *buffer = NULL;
+	char img_path[40];
+	int num_bos = 0;
+	int i, ret = 0;
+
+	ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_perror("failed to initialize device");
+		goto exit;
+	}
+	plugin_log_msg("libdrm initialized successfully\n");
+
+	ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
+		goto exit;
+	}
+
+	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+								   SDMA_LINEAR_COPY_MAX_SIZE - 1;
+
+	for (i = 0; i < num_of_bos; i++) {
+		if (bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+			total_bo_size += bo_buckets[i].size;
+
+			if (bo_buckets[i].size > max_bo_size)
+				max_bo_size = bo_buckets[i].size;
+		}
+	}
+
+	buffer_size = max_bo_size;
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
+	if (!buffer) {
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	for (i = 0; i < num_of_bos; i++) {
+
+		if (!(bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
+			continue;
+
+		if (bo_buckets[i].addr == -1)
+			continue;
+
+		num_bos++;
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, pid, drm_render_minor, i);
+		bo_contents_fp = open_img_file(img_path, false, &image_size);
+
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, true);
+		if (ret) {
+			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
+			break;
+		}
+		plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+	}
+
+exit:
+	for (int i = 0; i < num_of_bos; i++) {
+		if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
+			close(bo_buckets[i].dmabuf_fd);
+	}
+
+	xfree(buffer);
+
+	amdgpu_device_deinitialize(h_dev);
+	return ret;
+}
 
 int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
 {
-	CriuRenderNode rd = CRIU_RENDER_NODE__INIT;
-	struct tp_node *tp_node;
+	CriuRenderNode *rd = NULL;
 	char path[PATH_MAX];
 	unsigned char *buf;
 	int minor;
 	int len;
 	int ret;
+	struct drm_amdgpu_criu_args args = {0};
+	size_t image_size;
+	struct tp_node *tp_node;
+
+	rd = xmalloc(sizeof(*rd));
+	if (!rd) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	criu_render_node__init(rd);
 
 	/* Get the topology node of the DRM device */
 	minor = minor(drm->st_rdev);
+	rd->drm_render_minor = minor;
+
+	args.op = AMDGPU_CRIU_OP_PROCESS_INFO;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call process info ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	rd->pid = args.pid;
+	rd->num_of_bos = args.num_bos;
+	rd->num_of_objects = args.num_objs;
+	ret = allocate_bo_entries(rd, args.num_bos);
+	if (ret)
+		goto exit;
+
+	args.bos = (uintptr_t)xzalloc((args.num_bos * sizeof(struct drm_amdgpu_criu_bo_bucket)));
+	if (!args.bos) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	args.priv_data = (uintptr_t)xzalloc((args.priv_data_size));
+	if (!args.priv_data) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	args.op = AMDGPU_CRIU_OP_CHECKPOINT;
+	ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args);
+	if (ret) {
+		pr_perror("Failed to call dumper (process) ioctl");
+		goto exit;
+	}
+
+	rd->priv_data.data = (void *)args.priv_data;
+	rd->priv_data.len = args.priv_data_size;
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket bo_bucket = ((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		uint32_t major, minor;
+		amdgpu_device_handle h_dev;
+		void *buffer = NULL;
+		char img_path[40];
+		FILE *bo_contents_fp = NULL;
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		boinfo->addr = bo_bucket.addr;
+		boinfo->size = bo_bucket.size;
+		boinfo->offset = bo_bucket.offset;
+		boinfo->alloc_flags = bo_bucket.alloc_flags;
+		boinfo->preferred_domains = bo_bucket.preferred_domains;
+
+		ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->pid, rd->drm_render_minor, i); //TODO: needs to be unique by process and by device, and recoverable by restore
+		bo_contents_fp = open_img_file(img_path, true, &image_size);
+
+		posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), bo_bucket.size);
+
+		ret = sdma_copy_bo(bo_bucket.dmabuf_fd, bo_bucket.size, bo_contents_fp, buffer, bo_bucket.size, h_dev, 0x1000,
+				   SDMA_OP_VRAM_READ, false);
+
+		boinfo->handle = get_gem_handle(h_dev, bo_bucket.dmabuf_fd);
+		boinfo->is_import = bo_bucket.is_import | shared_bo_has_exporter(boinfo->handle);
+
+		if (bo_bucket.dmabuf_fd != KFD_INVALID_FD)
+			close(bo_bucket.dmabuf_fd);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+
+		ret = amdgpu_device_deinitialize(h_dev);
+		if (ret)
+			goto exit;
+	}
+	for (int i = 0; i < args.num_bos; i++) {
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, boinfo->is_import);
+		if (ret)
+			goto exit;
+	}
+
 	tp_node = sys_get_node_by_render_minor(&src_topology, minor);
 	if (!tp_node) {
 		pr_err("Failed to find a device with minor number = %d\n", minor);
@@ -80,21 +324,129 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
 	}
 
 	/* Get the GPU_ID of the DRM device */
-	rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
-	if (!rd.gpu_id) {
-		pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id);
+	rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
+	if (!rd->gpu_id) {
+		pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id);
 		return -ENODEV;
 	}
 
-	len = criu_render_node__get_packed_size(&rd);
+	len = criu_render_node__get_packed_size(rd);
 	buf = xmalloc(len);
 	if (!buf)
 		return -ENOMEM;
 
-	criu_render_node__pack(&rd, buf);
+	criu_render_node__pack(rd, buf);
 
 	snprintf(path, sizeof(path), IMG_DRM_FILE, id);
 	ret = write_img_file(path, buf, len);
+
+	exit:
+	xfree((void *)args.bos);
+	xfree((void *)args.priv_data);
 	xfree(buf);
+	free_e(rd);
+	return ret;
+}
+
+int amdgpu_plugin_drm_unpause_file(int fd) {
+	struct drm_amdgpu_criu_args args = {0};
+	int ret = 0;
+
+	args.op = AMDGPU_CRIU_OP_UNPAUSE;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call unpause ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	exit:
 	return ret;
 }
+
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd)
+{
+	struct drm_amdgpu_criu_args args = {0};
+	int ret = 0;
+	bool retry_needed = false;
+
+	args.num_bos = rd->num_of_bos;
+	args.num_objs = rd->num_of_objects;
+	args.priv_data = (uint64_t)rd->priv_data.data;
+	args.priv_data_size = rd->priv_data.len;
+	args.bos = (uint64_t)xzalloc(sizeof(struct drm_amdgpu_criu_bo_bucket) * rd->num_of_bos);
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+		int dmabuf_fd = -1;
+
+		bo_bucket->addr = boinfo->addr;
+
+		if (work_already_completed(boinfo->handle, rd->drm_render_minor)) {
+			bo_bucket->skip = 1;
+		} else if (boinfo->handle != -1) {
+			if (boinfo->is_import) {
+				dmabuf_fd = dmabuf_fd_for_handle(boinfo->handle);
+				if (dmabuf_fd == -1) {
+					bo_bucket->skip = 1;
+					retry_needed = true;
+				}
+			}
+		}
+
+		bo_bucket->is_import = boinfo->is_import;
+
+		bo_bucket->dmabuf_fd = dmabuf_fd;
+		bo_bucket->size = boinfo->size;
+		bo_bucket->offset = boinfo->offset;
+		bo_bucket->alloc_flags = boinfo->alloc_flags;
+		bo_bucket->preferred_domains = boinfo->preferred_domains;
+	}
+
+	args.op = AMDGPU_CRIU_OP_RESTORE;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call restore ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		if (!bo_bucket->skip && !work_already_completed(boinfo->handle, rd->drm_render_minor)) {
+			ret = record_completed_work(boinfo->handle, rd->drm_render_minor);
+			if (ret)
+				goto exit;
+			if (!boinfo->is_import) {
+				serve_out_dmabuf_fd(boinfo->handle, bo_bucket->dmabuf_fd);
+			}
+		}
+	}
+	ret = record_completed_work(-1, rd->drm_render_minor);
+	if (ret)
+		goto exit;
+
+	if (args.num_bos > 0) {
+
+		for (int i = 0; i < args.num_bos; i++) {
+			struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+
+			if (!bo_bucket->skip)
+				ret = save_vma_updates(bo_bucket->offset, bo_bucket->addr, bo_bucket->restored_offset, fd);
+			if (ret < 0)
+				goto exit;
+		}
+
+		ret = restore_bo_contents_drm(rd->drm_render_minor, rd->pid, fd, args.num_bos, (struct drm_amdgpu_criu_bo_bucket *)args.bos);
+		if (ret)
+			goto exit;
+	}
+
+
+	exit:
+	if (ret < 0)
+		return ret;
+
+	return retry_needed;
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h
index 6f0c1a9a6..be91912b0 100644
--- a/plugins/amdgpu/amdgpu_plugin_drm.h
+++ b/plugins/amdgpu/amdgpu_plugin_drm.h
@@ -11,6 +11,7 @@
 #include "amdgpu_plugin_topology.h"
 
 
+
 /**
  * Determines if VMA's of input file descriptor belong to amdgpu's
  * DRM device and are therefore supported
@@ -24,5 +25,13 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm);
  */
 int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm);
 
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd);
+
+int amdgpu_plugin_drm_unpause_file(int fd);
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd);
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id);
+
 #endif		/* __AMDGPU_PLUGIN_DRM_H__ */
 
diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c
index a165fc9cd..3a4b0652f 100644
--- a/plugins/amdgpu/amdgpu_plugin_util.c
+++ b/plugins/amdgpu/amdgpu_plugin_util.c
@@ -39,7 +39,12 @@
 #include "amdgpu_plugin_topology.h"
 
 /* Tracks number of device files that need to be checkpointed */
-static int dev_file_cnt = 0;
+
+
+static LIST_HEAD(dumped_fds);
+static LIST_HEAD(shared_bos);
+static LIST_HEAD(shared_dmabuf_fds);
+static LIST_HEAD(completed_work);
 
 /* Helper structures to encode device topology of SRC and DEST platforms */
 struct tp_system src_topology;
@@ -49,37 +54,149 @@ struct tp_system dest_topology;
 struct device_maps checkpoint_maps;
 struct device_maps restore_maps;
 
-bool checkpoint_is_complete()
-{
-	return (dev_file_cnt == 0);
+int record_dumped_fd(int fd, bool is_drm) {
+	int newfd = dup(fd);
+
+	if (newfd < 0)
+		return newfd;
+	struct dumped_fd *st = malloc(sizeof(struct dumped_fd));
+	if (!st)
+		return -1;
+	st->fd = newfd;
+	st->is_drm = is_drm;
+	list_add(&st->l, &dumped_fds);
+
+	return 0;
 }
 
-void decrement_checkpoint_count()
-{
-	dev_file_cnt--;
+struct list_head *get_dumped_fds() {
+	return &dumped_fds;
 }
 
-void init_gpu_count(struct tp_system *topo)
-{
-	if (dev_file_cnt != 0)
-		return;
+bool shared_bo_has_exporter(int handle) {
+	struct shared_bo *bo;
+
+	if (handle == -1) {
+		return false;
+	}
 
-	/* We add ONE to include checkpointing of KFD device */
-	dev_file_cnt = 1 + topology_gpu_count(topo);
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return bo->has_exporter;
+		}
+	}
+
+	return false;
 }
 
-int read_fp(FILE *fp, void *buf, const size_t buf_len)
-{
-	size_t len_read;
+int record_shared_bo(int handle, bool is_imported) {
+	struct shared_bo *bo;
 
-	len_read = fread(buf, 1, buf_len, fp);
-	if (len_read != buf_len) {
-		pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len);
-		return -EIO;
+	if (handle == -1)
+		return 0;
+
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return 0;
+		}
 	}
+	bo = malloc(sizeof(struct shared_bo));
+	if (!bo)
+		return -1;
+	bo->handle = handle;
+	bo->has_exporter = !is_imported;
+	list_add(&bo->l, &shared_bos);
+
 	return 0;
 }
 
+int record_shared_dmabuf_fd(int handle, int dmabuf_fd) {
+	struct shared_dmabuf *bo;
+
+	bo = malloc(sizeof(struct shared_dmabuf));
+	if(!bo)
+		return -1;
+	bo->handle = handle;
+	bo->dmabuf_fd = dmabuf_fd;
+	list_add(&bo->l, &shared_dmabuf_fds);
+
+	return 0;
+}
+
+int dmabuf_fd_for_handle(int handle) {
+	struct shared_dmabuf *bo;
+
+	list_for_each_entry(bo, &shared_dmabuf_fds, l) {
+		if (bo->handle == handle) {
+			return bo->dmabuf_fd;
+		}
+	}
+
+	return -1;
+}
+
+int record_completed_work(int handle, int id) {
+	struct restore_completed_work *work;
+
+	work = malloc(sizeof(struct restore_completed_work));
+	if (!work)
+		return -1;
+	work->handle = handle;
+	work->id = id;
+	list_add(&work->l, &completed_work);
+
+	return 0;
+}
+
+bool work_already_completed(int handle, int id) {
+	struct restore_completed_work *work;
+
+	list_for_each_entry(work, &completed_work, l) {
+		if (work->handle == handle && work->id == id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void clear_completed_work_and_dmabuf_fds() {
+	while (!list_empty(&shared_dmabuf_fds)) {
+		struct shared_dmabuf *st = list_first_entry(&shared_dmabuf_fds, struct shared_dmabuf, l);
+		list_del(&st->l);
+		close(st->dmabuf_fd);
+		free(st);
+	}
+
+	while (!list_empty(&completed_work)) {
+		struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l);
+		list_del(&st->l);
+		free(st);
+	}
+}
+
+void clear_dumped_fds() {
+	while (!list_empty(&dumped_fds)) {
+		struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l);
+		list_del(&st->l);
+		close(st->fd);
+		free(st);
+	}
+}
+
+int read_fp(FILE *fp, void *buf, const size_t buf_len)
+{
+       size_t len_read;
+
+       len_read = fread(buf, 1, buf_len, fp);
+       if (len_read != buf_len) {
+               pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len);
+               return -EIO;
+
+        }
+       return 0;
+}
+
 int write_fp(FILE *fp, const void *buf, const size_t buf_len)
 {
 	size_t len_write;
diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h
index aacca3a28..d64d18a02 100644
--- a/plugins/amdgpu/amdgpu_plugin_util.h
+++ b/plugins/amdgpu/amdgpu_plugin_util.h
@@ -1,6 +1,8 @@
 #ifndef __AMDGPU_PLUGIN_UTIL_H__
 #define __AMDGPU_PLUGIN_UTIL_H__
 
+#include <libdrm/amdgpu.h>
+
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #endif
@@ -52,7 +54,7 @@
 #define IMG_DRM_FILE			"amdgpu-renderD-%d.img"
 
 /* Name of file having serialized data of DRM device buffer objects (BOs) */
-#define IMG_DRM_PAGES_FILE		"amdgpu-drm-pages-%d-%04x.img"
+#define IMG_DRM_PAGES_FILE		"amdgpu-drm-pages-%d-%d-%04x.img"
 
 /* Helper macros to Checkpoint and Restore a ROCm file */
 #define HSAKMT_SHM_PATH			"/dev/shm/hsakmt_shared_mem"
@@ -73,6 +75,30 @@ enum sdma_op_type {
 	SDMA_OP_VRAM_WRITE,
 };
 
+struct dumped_fd {
+	struct list_head l;
+	int fd;
+	bool is_drm;
+};
+
+struct shared_bo {
+	struct list_head l;
+	int handle;
+	bool has_exporter;
+};
+
+struct shared_dmabuf {
+	struct list_head l;
+	int handle;
+	int dmabuf_fd;
+};
+
+struct restore_completed_work {
+	struct list_head l;
+	int handle;
+	int id;
+};
+
 /* Helper structures to encode device topology of SRC and DEST platforms */
 extern struct tp_system src_topology;
 extern struct tp_system dest_topology;
@@ -97,10 +123,25 @@ int read_file(const char *file_path, void *buf, const size_t buf_len);
 int write_img_file(char *path, const void *buf, const size_t buf_len);
 FILE *open_img_file(char *path, bool write, size_t *size);
 
-bool checkpoint_is_complete();
-void decrement_checkpoint_count();
-void init_gpu_count(struct tp_system *topology);
+int record_dumped_fd(int fd, bool is_drm);
+struct list_head *get_dumped_fds();
+void clear_dumped_fds();
+
+bool shared_bo_has_exporter(int handle);
+int record_shared_bo(int handle, bool is_imported);
+
+int record_shared_dmabuf_fd(int handle, int dmabuf_fd);
+int dmabuf_fd_for_handle(int handle);
+
+int record_completed_work(int handle, int id);
+bool work_already_completed(int handle, int id);
+
+void clear_completed_work_and_dmabuf_fds();
 
 void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list);
 
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
+						void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
+						uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free);
+
 #endif		/* __AMDGPU_PLUGIN_UTIL_H__ */
diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto
index 078b67650..55b48a1c2 100644
--- a/plugins/amdgpu/criu-amdgpu.proto
+++ b/plugins/amdgpu/criu-amdgpu.proto
@@ -46,6 +46,8 @@ message kfd_bo_entry {
 	required uint64 offset = 3;
 	required uint32 alloc_flags = 4;
 	required uint32 gpu_id = 5;
+	required uint32 handle = 6;
+	required uint32 is_import = 7;
 }
 
 message criu_kfd {
@@ -61,6 +63,22 @@ message criu_kfd {
 	required bytes priv_data = 10;
 }
 
+message drm_bo_entry {
+	required uint64 addr = 1;
+	required uint64 size = 2;
+	required uint64 offset = 3;
+	required uint64 alloc_flags = 4;
+	required uint32 preferred_domains = 5;
+	required uint32 handle = 6;
+	required uint32 is_import = 7;
+}
+
 message criu_render_node {
 	required uint32 gpu_id = 1;
+	required uint32 pid = 2;
+	required uint32 drm_render_minor = 3;
+	required uint64 num_of_bos = 4;
+	repeated drm_bo_entry bo_entries = 5;
+	required uint32 num_of_objects = 6;
+	required bytes priv_data = 7;
 }
diff --git a/plugins/amdgpu/kfd_ioctl.h b/plugins/amdgpu/kfd_ioctl.h
index 1a3bcea95..8c3f3a518 100644
--- a/plugins/amdgpu/kfd_ioctl.h
+++ b/plugins/amdgpu/kfd_ioctl.h
@@ -23,7 +23,7 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <libdrm/drm.h>
+#include <drm/drm.h>
 #include <linux/ioctl.h>
 
 /*
@@ -34,84 +34,128 @@
  * - 1.6 - Query clear flags in SVM get_attr API
  * - 1.7 - Checkpoint Restore (CRIU) API
  * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
+ * - 1.9 - Add available memory ioctl
+ * - 1.10 - Add SMI profiler event log
+ * - 1.11 - Add unified memory for ctx save/restore area
+ * - 1.12 - Add DMA buf export ioctl
+ * - 1.13 - Add debugger API
+ * - 1.14 - Update kfd_event_data
+ * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
+ * - 1.16 - Add contiguous VRAM allocation flag
+ * - 1.17 - Add SDMA queue creation with target SDMA engine ID
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 8
+#define KFD_IOCTL_MINOR_VERSION 17
 
 struct kfd_ioctl_get_version_args {
-	uint32_t major_version; /* from KFD */
-	uint32_t minor_version; /* from KFD */
+	__u32 major_version;	/* from KFD */
+	__u32 minor_version;	/* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
-#define KFD_IOC_QUEUE_TYPE_COMPUTE     0x0
-#define KFD_IOC_QUEUE_TYPE_SDMA	       0x1
-#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
-#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI   0x3
+#define KFD_IOC_QUEUE_TYPE_COMPUTE		0x0
+#define KFD_IOC_QUEUE_TYPE_SDMA			0x1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL		0x2
+#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI		0x3
+#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID	0x4
 
-#define KFD_MAX_QUEUE_PERCENTAGE 100
-#define KFD_MAX_QUEUE_PRIORITY	 15
+#define KFD_MAX_QUEUE_PERCENTAGE	100
+#define KFD_MAX_QUEUE_PRIORITY		15
 
 struct kfd_ioctl_create_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
-	uint64_t write_pointer_address; /* from KFD */
-	uint64_t read_pointer_address;	/* from KFD */
-	uint64_t doorbell_offset;	/* from KFD */
-
-	uint32_t ring_size;	   /* to KFD */
-	uint32_t gpu_id;	   /* to KFD */
-	uint32_t queue_type;	   /* to KFD */
-	uint32_t queue_percentage; /* to KFD */
-	uint32_t queue_priority;   /* to KFD */
-	uint32_t queue_id;	   /* from KFD */
-
-	uint64_t eop_buffer_address;	   /* to KFD */
-	uint64_t eop_buffer_size;	   /* to KFD */
-	uint64_t ctx_save_restore_address; /* to KFD */
-	uint32_t ctx_save_restore_size;	   /* to KFD */
-	uint32_t ctl_stack_size;	   /* to KFD */
+	__u64 ring_base_address;	/* to KFD */
+	__u64 write_pointer_address;	/* from KFD */
+	__u64 read_pointer_address;	/* from KFD */
+	__u64 doorbell_offset;	/* from KFD */
+
+	__u32 ring_size;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_type;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
+	__u32 queue_id;		/* from KFD */
+
+	__u64 eop_buffer_address;	/* to KFD */
+	__u64 eop_buffer_size;	/* to KFD */
+	__u64 ctx_save_restore_address; /* to KFD */
+	__u32 ctx_save_restore_size;	/* to KFD */
+	__u32 ctl_stack_size;		/* to KFD */
+	__u32 sdma_engine_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	uint32_t queue_id; /* to KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	uint64_t ring_base_address; /* to KFD */
+	__u64 ring_base_address;	/* to KFD */
 
-	uint32_t queue_id;	   /* to KFD */
-	uint32_t ring_size;	   /* to KFD */
-	uint32_t queue_percentage; /* to KFD */
-	uint32_t queue_priority;   /* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
 };
 
 struct kfd_ioctl_set_cu_mask_args {
-	uint32_t queue_id;    /* to KFD */
-	uint32_t num_cu_mask; /* to KFD */
-	uint64_t cu_mask_ptr; /* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 num_cu_mask;		/* to KFD */
+	__u64 cu_mask_ptr;		/* to KFD */
 };
 
 struct kfd_ioctl_get_queue_wave_state_args {
-	uint64_t ctl_stack_address;   /* to KFD */
-	uint32_t ctl_stack_used_size; /* from KFD */
-	uint32_t save_area_used_size; /* from KFD */
-	uint32_t queue_id;	      /* to KFD */
-	uint32_t pad;
+	__u64 ctl_stack_address;	/* to KFD */
+	__u32 ctl_stack_used_size;	/* from KFD */
+	__u32 save_area_used_size;	/* from KFD */
+	__u32 queue_id;			/* to KFD */
+	__u32 pad;
+};
+
+struct kfd_ioctl_get_available_memory_args {
+	__u64 available;	/* from KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
+};
+
+struct kfd_dbg_device_info_entry {
+	__u64 exception_status;
+	__u64 lds_base;
+	__u64 lds_limit;
+	__u64 scratch_base;
+	__u64 scratch_limit;
+	__u64 gpuvm_base;
+	__u64 gpuvm_limit;
+	__u32 gpu_id;
+	__u32 location_id;
+	__u32 vendor_id;
+	__u32 device_id;
+	__u32 revision_id;
+	__u32 subsystem_vendor_id;
+	__u32 subsystem_device_id;
+	__u32 fw_version;
+	__u32 gfx_target_version;
+	__u32 simd_count;
+	__u32 max_waves_per_simd;
+	__u32 array_count;
+	__u32 simd_arrays_per_engine;
+	__u32 num_xcc;
+	__u32 capability;
+	__u32 debug_prop;
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
-#define KFD_IOC_CACHE_POLICY_COHERENT	 0
+#define KFD_IOC_CACHE_POLICY_COHERENT 0
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	uint64_t alternate_aperture_base; /* to KFD */
-	uint64_t alternate_aperture_size; /* to KFD */
+	__u64 alternate_aperture_base;	/* to KFD */
+	__u64 alternate_aperture_size;	/* to KFD */
 
-	uint32_t gpu_id;	   /* to KFD */
-	uint32_t default_policy;   /* to KFD */
-	uint32_t alternate_policy; /* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;			/* to KFD */
+	__u32 default_policy;		/* to KFD */
+	__u32 alternate_policy;		/* to KFD */
+	__u32 pad;
 };
 
 /*
@@ -122,24 +166,24 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	uint64_t gpu_clock_counter;    /* from KFD */
-	uint64_t cpu_clock_counter;    /* from KFD */
-	uint64_t system_clock_counter; /* from KFD */
-	uint64_t system_clock_freq;    /* from KFD */
+	__u64 gpu_clock_counter;	/* from KFD */
+	__u64 cpu_clock_counter;	/* from KFD */
+	__u64 system_clock_counter;	/* from KFD */
+	__u64 system_clock_freq;	/* from KFD */
 
-	uint32_t gpu_id; /* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_process_device_apertures {
-	uint64_t lds_base;	/* from KFD */
-	uint64_t lds_limit;	/* from KFD */
-	uint64_t scratch_base;	/* from KFD */
-	uint64_t scratch_limit; /* from KFD */
-	uint64_t gpuvm_base;	/* from KFD */
-	uint64_t gpuvm_limit;	/* from KFD */
-	uint32_t gpu_id;	/* from KFD */
-	uint32_t pad;
+	__u64 lds_base;		/* from KFD */
+	__u64 lds_limit;		/* from KFD */
+	__u64 scratch_base;		/* from KFD */
+	__u64 scratch_limit;		/* from KFD */
+	__u64 gpuvm_base;		/* from KFD */
+	__u64 gpuvm_limit;		/* from KFD */
+	__u32 gpu_id;		/* from KFD */
+	__u32 pad;
 };
 
 /*
@@ -149,122 +193,125 @@ struct kfd_process_device_apertures {
  */
 #define NUM_OF_SUPPORTED_GPUS 7
 struct kfd_ioctl_get_process_apertures_args {
-	struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS]; /* from KFD */
+	struct kfd_process_device_apertures
+			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_process_apertures_new_args {
 	/* User allocated. Pointer to struct kfd_process_device_apertures
 	 * filled in by Kernel
 	 */
-	uint64_t kfd_process_device_apertures_ptr;
-	/* to KFD - indicates amount of memory present in kfd_process_device_apertures_ptr
+	__u64 kfd_process_device_apertures_ptr;
+	/* to KFD - indicates amount of memory present in
+	 *  kfd_process_device_apertures_ptr
 	 * from KFD - Number of entries filled by KFD.
 	 */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
-#define MAX_ALLOWED_NUM_POINTS	  100
-#define MAX_ALLOWED_AW_BUFF_SIZE  4096
-#define MAX_ALLOWED_WAC_BUFF_SIZE 128
+#define MAX_ALLOWED_NUM_POINTS    100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE  128
 
 struct kfd_ioctl_dbg_register_args {
-	uint32_t gpu_id; /* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	uint32_t gpu_id; /* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	uint64_t content_ptr;	    /* a pointer to the actual content */
-	uint32_t gpu_id;	    /* to KFD */
-	uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	uint64_t content_ptr;	    /* a pointer to the actual content */
-	uint32_t gpu_id;	    /* to KFD */
-	uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
-#define KFD_INVALID_FD 0xffffffff
+#define KFD_INVALID_FD     0xffffffff
 
 /* Matching HSA_EVENTTYPE */
-#define KFD_IOC_EVENT_SIGNAL		0
-#define KFD_IOC_EVENT_NODECHANGE	1
-#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
-#define KFD_IOC_EVENT_HW_EXCEPTION	3
-#define KFD_IOC_EVENT_SYSTEM_EVENT	4
-#define KFD_IOC_EVENT_DEBUG_EVENT	5
-#define KFD_IOC_EVENT_PROFILE_EVENT	6
-#define KFD_IOC_EVENT_QUEUE_EVENT	7
-#define KFD_IOC_EVENT_MEMORY		8
-
-#define KFD_IOC_WAIT_RESULT_COMPLETE 0
-#define KFD_IOC_WAIT_RESULT_TIMEOUT  1
-#define KFD_IOC_WAIT_RESULT_FAIL     2
-
-#define KFD_SIGNAL_EVENT_LIMIT 4096
+#define KFD_IOC_EVENT_SIGNAL			0
+#define KFD_IOC_EVENT_NODECHANGE		1
+#define KFD_IOC_EVENT_DEVICESTATECHANGE		2
+#define KFD_IOC_EVENT_HW_EXCEPTION		3
+#define KFD_IOC_EVENT_SYSTEM_EVENT		4
+#define KFD_IOC_EVENT_DEBUG_EVENT		5
+#define KFD_IOC_EVENT_PROFILE_EVENT		6
+#define KFD_IOC_EVENT_QUEUE_EVENT		7
+#define KFD_IOC_EVENT_MEMORY			8
+
+#define KFD_IOC_WAIT_RESULT_COMPLETE		0
+#define KFD_IOC_WAIT_RESULT_TIMEOUT		1
+#define KFD_IOC_WAIT_RESULT_FAIL		2
+
+#define KFD_SIGNAL_EVENT_LIMIT			4096
 
 /* For kfd_event_data.hw_exception_data.reset_type. */
-#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET  0
-#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
+#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET	0
+#define KFD_HW_EXCEPTION_PER_ENGINE_RESET	1
 
 /* For kfd_event_data.hw_exception_data.reset_cause. */
-#define KFD_HW_EXCEPTION_GPU_HANG 0
-#define KFD_HW_EXCEPTION_ECC	  1
+#define KFD_HW_EXCEPTION_GPU_HANG	0
+#define KFD_HW_EXCEPTION_ECC		1
 
 /* For kfd_hsa_memory_exception_data.ErrorType */
-#define KFD_MEM_ERR_NO_RAS	    0
-#define KFD_MEM_ERR_SRAM_ECC	    1
-#define KFD_MEM_ERR_POISON_CONSUMED 2
-#define KFD_MEM_ERR_GPU_HANG	    3
+#define KFD_MEM_ERR_NO_RAS		0
+#define KFD_MEM_ERR_SRAM_ECC		1
+#define KFD_MEM_ERR_POISON_CONSUMED	2
+#define KFD_MEM_ERR_GPU_HANG		3
 
 struct kfd_ioctl_create_event_args {
-	uint64_t event_page_offset;  /* from KFD */
-	uint32_t event_trigger_data; /* from KFD - signal events only */
-	uint32_t event_type;	     /* to KFD */
-	uint32_t auto_reset;	     /* to KFD */
-	uint32_t node_id;	     /* to KFD - only valid for certain event types */
-	uint32_t event_id;	     /* from KFD */
-	uint32_t event_slot_index;   /* from KFD */
+	__u64 event_page_offset;	/* from KFD */
+	__u32 event_trigger_data;	/* from KFD - signal events only */
+	__u32 event_type;		/* to KFD */
+	__u32 auto_reset;		/* to KFD */
+	__u32 node_id;		/* to KFD - only valid for certain
+							event types */
+	__u32 event_id;		/* from KFD */
+	__u32 event_slot_index;	/* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	uint32_t event_id; /* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	uint32_t event_id; /* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	uint32_t event_id; /* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_memory_exception_failure {
-	uint32_t NotPresent; /* Page not present or supervisor privilege */
-	uint32_t ReadOnly;   /* Write access to a read-only page */
-	uint32_t NoExecute;  /* Execute access to a page marked NX */
-	uint32_t imprecise;  /* Can't determine the exact fault address */
+	__u32 NotPresent;	/* Page not present or supervisor privilege */
+	__u32 ReadOnly;	/* Write access to a read-only page */
+	__u32 NoExecute;	/* Execute access to a page marked NX */
+	__u32 imprecise;	/* Can't determine the	exact fault address */
 };
 
 /* memory exception data */
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	uint64_t va;
-	uint32_t gpu_id;
-	uint32_t ErrorType; /* 0 = no RAS error,
+	__u64 va;
+	__u32 gpu_id;
+	__u32 ErrorType; /* 0 = no RAS error,
 			  * 1 = ECC_SRAM,
 			  * 2 = Link_SYNFLOOD (poison),
 			  * 3 = GPU hang (not attributable to a specific cause),
@@ -274,85 +321,98 @@ struct kfd_hsa_memory_exception_data {
 
 /* hw exception data */
 struct kfd_hsa_hw_exception_data {
-	uint32_t reset_type;
-	uint32_t reset_cause;
-	uint32_t memory_lost;
-	uint32_t gpu_id;
+	__u32 reset_type;
+	__u32 reset_cause;
+	__u32 memory_lost;
+	__u32 gpu_id;
+};
+
+/* hsa signal event data */
+struct kfd_hsa_signal_event_data {
+	__u64 last_event_age;	/* to and from KFD */
 };
 
 /* Event data */
 struct kfd_event_data {
 	union {
+		/* From KFD */
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 		struct kfd_hsa_hw_exception_data hw_exception_data;
-	};			  /* From KFD */
-	uint64_t kfd_event_data_ext; /* pointer to an extension structure for future exception types */
-	uint32_t event_id;	     /* to KFD */
-	uint32_t pad;
+		/* To and From KFD */
+		struct kfd_hsa_signal_event_data signal_event_data;
+	};
+	__u64 kfd_event_data_ext;	/* pointer to an extension structure
+					   for future exception types */
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;   /* pointed to struct kfd_event_data array, to KFD */
-	uint32_t num_events;   /* to KFD */
-	uint32_t wait_for_all; /* to KFD */
-	uint32_t timeout;      /* to KFD */
-	uint32_t wait_result;  /* from KFD */
+	__u64 events_ptr;		/* pointed to struct
+					   kfd_event_data array, to KFD */
+	__u32 num_events;		/* to KFD */
+	__u32 wait_for_all;		/* to KFD */
+	__u32 timeout;		/* to KFD */
+	__u32 wait_result;		/* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {
-	uint64_t va_addr; /* to KFD */
-	uint32_t gpu_id;  /* to KFD */
-	uint32_t pad;
+	__u64 va_addr;	/* to KFD */
+	__u32 gpu_id;	/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_tile_config_args {
 	/* to KFD: pointer to tile array */
-	uint64_t tile_config_ptr;
+	__u64 tile_config_ptr;
 	/* to KFD: pointer to macro tile array */
-	uint64_t macro_tile_config_ptr;
+	__u64 macro_tile_config_ptr;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	uint32_t num_tile_configs;
+	__u32 num_tile_configs;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	uint32_t num_macro_tile_configs;
-
-	uint32_t gpu_id;	 /* to KFD */
-	uint32_t gb_addr_config; /* from KFD */
-	uint32_t num_banks;	 /* from KFD */
-	uint32_t num_ranks;	 /* from KFD */
-
-	/* struct size can be extended later if needed without breaking ABI compatibility */
+	__u32 num_macro_tile_configs;
+
+	__u32 gpu_id;		/* to KFD */
+	__u32 gb_addr_config;	/* from KFD */
+	__u32 num_banks;		/* from KFD */
+	__u32 num_ranks;		/* from KFD */
+	/* struct size can be extended later if needed
+	 * without breaking ABI compatibility
+	 */
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-	uint64_t tba_addr; /* to KFD */
-	uint64_t tma_addr; /* to KFD */
-	uint32_t gpu_id;   /* to KFD */
-	uint32_t pad;
+	__u64 tba_addr;		/* to KFD */
+	__u64 tma_addr;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_acquire_vm_args {
-	uint32_t drm_fd; /* to KFD */
-	uint32_t gpu_id; /* to KFD */
+	__u32 drm_fd;	/* to KFD */
+	__u32 gpu_id;	/* to KFD */
 };
 
 /* Allocation flags: memory types */
-#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM	   (1 << 0)
-#define KFD_IOC_ALLOC_MEM_FLAGS_GTT	   (1 << 1)
-#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR	   (1 << 2)
-#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL   (1 << 3)
-#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
+#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
+#define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
+#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
+#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP	(1 << 4)
 /* Allocation flags: attributes/access options */
-#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE      (1 << 31)
-#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE    (1 << 30)
-#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC	      (1 << 29)
-#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
-#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
-#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT      (1 << 26)
-#define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED      (1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
+#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC		(1 << 29)
+#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE	(1 << 28)
+#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
+#define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED	(1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT	(1 << 24)
+#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS	(1 << 23)
 
 /* Allocate memory for later SVM (shared virtual memory) mapping.
  *
@@ -367,12 +427,12 @@ struct kfd_ioctl_acquire_vm_args {
  * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
  */
 struct kfd_ioctl_alloc_memory_of_gpu_args {
-	uint64_t va_addr;     /* to KFD */
-	uint64_t size;	      /* to KFD */
-	uint64_t handle;      /* from KFD */
-	uint64_t mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
-	uint32_t gpu_id;      /* to KFD */
-	uint32_t flags;
+	__u64 va_addr;		/* to KFD */
+	__u64 size;		/* to KFD */
+	__u64 handle;		/* from KFD */
+	__u64 mmap_offset;	/* to KFD (userptr), from KFD (mmap offset) */
+	__u32 gpu_id;		/* to KFD */
+	__u32 flags;
 };
 
 /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
@@ -380,13 +440,13 @@ struct kfd_ioctl_alloc_memory_of_gpu_args {
  * @handle: memory handle returned by alloc
  */
 struct kfd_ioctl_free_memory_of_gpu_args {
-	uint64_t handle; /* to KFD */
+	__u64 handle;		/* to KFD */
 };
 
 /* Map memory to one or more GPUs
  *
  * @handle:                memory handle returned by alloc
- * @device_ids_array_ptr:  array of gpu_ids (uint32_t per device)
+ * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
  * @n_devices:             number of devices in the array
  * @n_success:             number of devices mapped successfully
  *
@@ -399,10 +459,10 @@ struct kfd_ioctl_free_memory_of_gpu_args {
  * n_devices.
  */
 struct kfd_ioctl_map_memory_to_gpu_args {
-	uint64_t handle;	       /* to KFD */
-	uint64_t device_ids_array_ptr; /* to KFD */
-	uint32_t n_devices;	       /* to KFD */
-	uint32_t n_success;	       /* to/from KFD */
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
 };
 
 /* Unmap memory from one or more GPUs
@@ -410,10 +470,10 @@ struct kfd_ioctl_map_memory_to_gpu_args {
  * same arguments as for mapping
  */
 struct kfd_ioctl_unmap_memory_from_gpu_args {
-	uint64_t handle;	       /* to KFD */
-	uint64_t device_ids_array_ptr; /* to KFD */
-	uint32_t n_devices;	       /* to KFD */
-	uint32_t n_success;	       /* to/from KFD */
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
 };
 
 /* Allocate GWS for specific queue
@@ -424,49 +484,167 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
  *               only support contiguous GWS allocation
  */
 struct kfd_ioctl_alloc_queue_gws_args {
-	uint32_t queue_id;  /* to KFD */
-	uint32_t num_gws;   /* to KFD */
-	uint32_t first_gws; /* from KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 num_gws;		/* to KFD */
+	__u32 first_gws;	/* from KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_dmabuf_info_args {
-	uint64_t size;		/* from KFD */
-	uint64_t metadata_ptr;	/* to KFD */
-	uint32_t metadata_size; /* to KFD (space allocated by user)
-			      * from KFD (actual metadata size)
-			      */
-	uint32_t gpu_id;	/* from KFD */
-	uint32_t flags;		/* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
-	uint32_t dmabuf_fd;	/* to KFD */
+	__u64 size;		/* from KFD */
+	__u64 metadata_ptr;	/* to KFD */
+	__u32 metadata_size;	/* to KFD (space allocated by user)
+				 * from KFD (actual metadata size)
+				 */
+	__u32 gpu_id;	/* from KFD */
+	__u32 flags;		/* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
+	__u32 dmabuf_fd;	/* to KFD */
 };
 
 struct kfd_ioctl_import_dmabuf_args {
-	uint64_t va_addr;   /* to KFD */
-	uint64_t handle;    /* from KFD */
-	uint32_t gpu_id;    /* to KFD */
-	uint32_t dmabuf_fd; /* to KFD */
+	__u64 va_addr;	/* to KFD */
+	__u64 handle;	/* from KFD */
+	__u32 gpu_id;	/* to KFD */
+	__u32 dmabuf_fd;	/* to KFD */
+};
+
+struct kfd_ioctl_export_dmabuf_args {
+	__u64 handle;		/* to KFD */
+	__u32 flags;		/* to KFD */
+	__u32 dmabuf_fd;	/* from KFD */
 };
 
 /*
  * KFD SMI(System Management Interface) events
  */
 enum kfd_smi_event {
-	KFD_SMI_EVENT_NONE = 0,	   /* not used */
+	KFD_SMI_EVENT_NONE = 0, /* not used */
 	KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
 	KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
 	KFD_SMI_EVENT_GPU_PRE_RESET = 3,
 	KFD_SMI_EVENT_GPU_POST_RESET = 4,
+	KFD_SMI_EVENT_MIGRATE_START = 5,
+	KFD_SMI_EVENT_MIGRATE_END = 6,
+	KFD_SMI_EVENT_PAGE_FAULT_START = 7,
+	KFD_SMI_EVENT_PAGE_FAULT_END = 8,
+	KFD_SMI_EVENT_QUEUE_EVICTION = 9,
+	KFD_SMI_EVENT_QUEUE_RESTORE = 10,
+	KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+
+	/*
+	 * max event number, as a flag bit to get events from all processes,
+	 * this requires super user permission, otherwise will not be able to
+	 * receive event from any process. Without this flag to receive events
+	 * from same process.
+	 */
+	KFD_SMI_EVENT_ALL_PROCESS = 64
+};
+
+/* The reason of the page migration event */
+enum KFD_MIGRATE_TRIGGERS {
+	KFD_MIGRATE_TRIGGER_PREFETCH,		/* Prefetch to GPU VRAM or system memory */
+	KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,	/* GPU page fault recover */
+	KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,	/* CPU page fault recover */
+	KFD_MIGRATE_TRIGGER_TTM_EVICTION	/* TTM eviction */
+};
+
+/* The reason of user queue evition event */
+enum KFD_QUEUE_EVICTION_TRIGGERS {
+	KFD_QUEUE_EVICTION_TRIGGER_SVM,		/* SVM buffer migration */
+	KFD_QUEUE_EVICTION_TRIGGER_USERPTR,	/* userptr movement */
+	KFD_QUEUE_EVICTION_TRIGGER_TTM,		/* TTM move buffer */
+	KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,	/* GPU suspend */
+	KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,	/* CRIU checkpoint */
+	KFD_QUEUE_EVICTION_CRIU_RESTORE		/* CRIU restore */
+};
+
+/* The reason of unmap buffer from GPU event */
+enum KFD_SVM_UNMAP_TRIGGERS {
+	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,	/* MMU notifier CPU buffer movement */
+	KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */
+	KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU	/* Unmap to free the buffer */
 };
 
-#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i)-1))
-#define KFD_SMI_EVENT_MSG_SIZE		 96
+#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
+#define KFD_SMI_EVENT_MSG_SIZE	96
 
 struct kfd_ioctl_smi_events_args {
-	uint32_t gpuid;	  /* to KFD */
-	uint32_t anon_fd; /* from KFD */
+	__u32 gpuid;	/* to KFD */
+	__u32 anon_fd;	/* from KFD */
 };
 
+/*
+ * SVM event tracing via SMI system management interface
+ *
+ * Open event file descriptor
+ *    use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file
+ *    descriptor to receive SMI events.
+ *    If calling with sudo permission, then file descriptor can be used to receive
+ *    SVM events from all processes, otherwise, to only receive SVM events of same
+ *    process.
+ *
+ * To enable the SVM event
+ *    Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap
+ *    mask to start record the event to the kfifo, use bitmap mask combination
+ *    for multiple events. New event mask will overwrite the previous event mask.
+ *    KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo
+ *    permisson to receive SVM events from all process.
+ *
+ * To receive the event
+ *    Application can poll file descriptor to wait for the events, then read event
+ *    from the file into a buffer. Each event is one line string message, starting
+ *    with the event id, then the event specific information.
+ *
+ * To decode event information
+ *    The following event format string macro can be used with sscanf to decode
+ *    the specific event information.
+ *    event triggers: the reason to generate the event, defined as enum for unmap,
+ *    eviction and migrate events.
+ *    node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory.
+ *    addr: user mode address, in pages
+ *    size: in pages
+ *    pid: the process ID to generate the event
+ *    ns: timestamp in nanosecond-resolution, starts at system boot time but
+ *        stops during suspend
+ *    migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
+ *    rw: 'W' for write page fault, 'R' for read page fault
+ *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
+ *    error_code: migrate failure error code, 0 if no error
+ */
+#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
+		"%x %s\n", (reset_seq_num), (reset_cause)
+
+#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\
+		"%llx:%llx\n", (bitmask), (counter)
+
+#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\
+		"%x:%s\n", (pid), (task_name)
+
+#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\
+		"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
+
+#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\
+		"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
+
+#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\
+		preferred_loc, migrate_trigger)\
+		"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
+		(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
+
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, error_code) \
+		"%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\
+		(from), (to), (migrate_trigger), (error_code)
+
+#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
+		"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
+
+#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\
+		"%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
+
+#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\
+		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
+		(node), (unmap_trigger)
+
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *
@@ -503,40 +681,43 @@ enum kfd_criu_op {
  * @priv_data_size:	[in/out] Size of priv_data in bytes
  * @num_devices:	[in/out] Number of GPUs used by process. Size of @devices array.
  * @num_bos		[in/out] Number of BOs used by process. Size of @bos array.
- * @num_objects:	[in/out] Number of objects used by process. Objects are opaque to user application.
+ * @num_objects:	[in/out] Number of objects used by process. Objects are opaque to
+ *				 user application.
  * @pid:		[in/out] PID of the process being checkpointed
  * @op			[in] Type of operation (kfd_criu_op)
  *
  * Return: 0 on success, -errno on failure
  */
 struct kfd_ioctl_criu_args {
-	uint64_t devices;	 /* Used during ops: CHECKPOINT, RESTORE */
-	uint64_t bos;		 /* Used during ops: CHECKPOINT, RESTORE */
-	uint64_t priv_data;	 /* Used during ops: CHECKPOINT, RESTORE */
-	uint64_t priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */
-	uint32_t num_devices;	 /* Used during ops: PROCESS_INFO, RESTORE */
-	uint32_t num_bos;	 /* Used during ops: PROCESS_INFO, RESTORE */
-	uint32_t num_objects;	 /* Used during ops: PROCESS_INFO, RESTORE */
-	uint32_t pid;		 /* Used during ops: PROCESS_INFO, RESUME */
-	uint32_t op;
+	__u64 devices;		/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 bos;		/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 priv_data;	/* Used during ops: CHECKPOINT, RESTORE */
+	__u64 priv_data_size;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_devices;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_bos;		/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 num_objects;	/* Used during ops: PROCESS_INFO, RESTORE */
+	__u32 pid;		/* Used during ops: PROCESS_INFO, RESUME */
+	__u32 op;
+	__u8 is_retry: 1;
 };
 
 struct kfd_criu_device_bucket {
-	uint32_t user_gpu_id;
-	uint32_t actual_gpu_id;
-	uint32_t drm_fd;
-	uint32_t pad;
+	__u32 user_gpu_id;
+	__u32 actual_gpu_id;
+	__u32 drm_fd;
+	__u32 pad;
 };
 
 struct kfd_criu_bo_bucket {
-	uint64_t addr;
-	uint64_t size;
-	uint64_t offset;
-	uint64_t restored_offset; /* During restore, updated offset for BO */
-	uint32_t gpu_id;	  /* This is the user_gpu_id */
-	uint32_t alloc_flags;
-	uint32_t dmabuf_fd;
-	uint32_t pad;
+	__u64 addr;
+	__u64 size;
+	__u64 offset;
+	__u64 restored_offset;    /* During restore, updated offset for BO */
+	__u32 gpu_id;             /* This is the user_gpu_id */
+	__u32 alloc_flags;
+	__u32 dmabuf_fd;
+	__u8 is_import: 1;
+	__u8 skip: 1;
 };
 
 /* CRIU IOCTLs - END */
@@ -552,15 +733,19 @@ enum kfd_mmio_remap {
 /* Guarantee host access to memory */
 #define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001
 /* Fine grained coherency between all devices with access */
-#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002
+#define KFD_IOCTL_SVM_FLAG_COHERENT    0x00000002
 /* Use any GPU in same hive as preferred device */
-#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004
+#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x00000004
 /* GPUs only read, allows replication */
-#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008
+#define KFD_IOCTL_SVM_FLAG_GPU_RO      0x00000008
 /* Allow execution on GPU */
-#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010
+#define KFD_IOCTL_SVM_FLAG_GPU_EXEC    0x00000010
 /* GPUs mostly read, may allow similar optimizations as RO, but writes fault */
-#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020
+#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
+/* Keep GPU memory mapping always valid as if XNACK is disable */
+#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
+/* Fine grained coherency between all devices using device-scope atomics */
+#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080
 
 /**
  * kfd_ioctl_svm_op - SVM ioctl operations
@@ -568,7 +753,10 @@ enum kfd_mmio_remap {
  * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes
  * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes
  */
-enum kfd_ioctl_svm_op { KFD_IOCTL_SVM_OP_SET_ATTR, KFD_IOCTL_SVM_OP_GET_ATTR };
+enum kfd_ioctl_svm_op {
+	KFD_IOCTL_SVM_OP_SET_ATTR,
+	KFD_IOCTL_SVM_OP_GET_ATTR
+};
 
 /** kfd_ioctl_svm_location - Enum for preferred and prefetch locations
  *
@@ -576,7 +764,10 @@ enum kfd_ioctl_svm_op { KFD_IOCTL_SVM_OP_SET_ATTR, KFD_IOCTL_SVM_OP_GET_ATTR };
  * Below definitions are used for system memory or for leaving the preferred
  * location unspecified.
  */
-enum kfd_ioctl_svm_location { KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff };
+enum kfd_ioctl_svm_location {
+	KFD_IOCTL_SVM_LOCATION_SYSMEM = 0,
+	KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff
+};
 
 /**
  * kfd_ioctl_svm_attr_type - SVM attribute types
@@ -616,8 +807,8 @@ enum kfd_ioctl_svm_attr_type {
  * @value: attribute value
  */
 struct kfd_ioctl_svm_attribute {
-	uint32_t type;
-	uint32_t value;
+	__u32 type;
+	__u32 value;
 };
 
 /**
@@ -659,12 +850,12 @@ struct kfd_ioctl_svm_attribute {
  * attribute type to indicate the access for the specified GPU.
  */
 struct kfd_ioctl_svm_args {
-	uint64_t start_addr;
-	uint64_t size;
-	uint32_t op;
-	uint32_t nattr;
+	__u64 start_addr;
+	__u64 size;
+	__u32 op;
+	__u32 nattr;
 	/* Variable length array of attributes */
-	struct kfd_ioctl_svm_attribute attrs[0];
+	struct kfd_ioctl_svm_attribute attrs[];
 };
 
 /**
@@ -705,81 +896,773 @@ struct kfd_ioctl_set_xnack_mode_args {
 	__s32 xnack_enabled;
 };
 
-#define AMDKFD_IOCTL_BASE     'K'
-#define AMDKFD_IO(nr)	      _IO(AMDKFD_IOCTL_BASE, nr)
-#define AMDKFD_IOR(nr, type)  _IOR(AMDKFD_IOCTL_BASE, nr, type)
-#define AMDKFD_IOW(nr, type)  _IOW(AMDKFD_IOCTL_BASE, nr, type)
-#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type)
+/* Wave launch override modes */
+enum kfd_dbg_trap_override_mode {
+	KFD_DBG_TRAP_OVERRIDE_OR = 0,
+	KFD_DBG_TRAP_OVERRIDE_REPLACE = 1
+};
+
+/* Wave launch overrides */
+enum kfd_dbg_trap_mask {
+	KFD_DBG_TRAP_MASK_FP_INVALID = 1,
+	KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2,
+	KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4,
+	KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8,
+	KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16,
+	KFD_DBG_TRAP_MASK_FP_INEXACT = 32,
+	KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64,
+	KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128,
+	KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256,
+	KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = (1 << 30),
+	KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = (1 << 31)
+};
+
+/* Wave launch modes */
+enum kfd_dbg_trap_wave_launch_mode {
+	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0,
+	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1,
+	KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3
+};
+
+/* Address watch modes */
+enum kfd_dbg_trap_address_watch_mode {
+	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0,
+	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1,
+	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2,
+	KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3
+};
+
+/* Additional wave settings */
+enum kfd_dbg_trap_flags {
+	KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1,
+	KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2,
+};
+
+/* Trap exceptions */
+enum kfd_dbg_trap_exception_code {
+	EC_NONE = 0,
+	/* per queue */
+	EC_QUEUE_WAVE_ABORT = 1,
+	EC_QUEUE_WAVE_TRAP = 2,
+	EC_QUEUE_WAVE_MATH_ERROR = 3,
+	EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4,
+	EC_QUEUE_WAVE_MEMORY_VIOLATION = 5,
+	EC_QUEUE_WAVE_APERTURE_VIOLATION = 6,
+	EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16,
+	EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17,
+	EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18,
+	EC_QUEUE_PACKET_RESERVED = 19,
+	EC_QUEUE_PACKET_UNSUPPORTED = 20,
+	EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21,
+	EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22,
+	EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23,
+	EC_QUEUE_PREEMPTION_ERROR = 30,
+	EC_QUEUE_NEW = 31,
+	/* per device */
+	EC_DEVICE_QUEUE_DELETE = 32,
+	EC_DEVICE_MEMORY_VIOLATION = 33,
+	EC_DEVICE_RAS_ERROR = 34,
+	EC_DEVICE_FATAL_HALT = 35,
+	EC_DEVICE_NEW = 36,
+	/* per process */
+	EC_PROCESS_RUNTIME = 48,
+	EC_PROCESS_DEVICE_REMOVE = 49,
+	EC_MAX
+};
+
+/* Mask generated by ecode in kfd_dbg_trap_exception_code */
+#define KFD_EC_MASK(ecode)	(1ULL << (ecode - 1))
+
+/* Masks for exception code type checks below */
+#define KFD_EC_MASK_QUEUE	(KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) |	\
+				 KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) |	\
+				 KFD_EC_MASK(EC_QUEUE_WAVE_MATH_ERROR) |	\
+				 KFD_EC_MASK(EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION) |	\
+				 KFD_EC_MASK(EC_QUEUE_WAVE_MEMORY_VIOLATION) |	\
+				 KFD_EC_MASK(EC_QUEUE_WAVE_APERTURE_VIOLATION) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)	|	\
+				 KFD_EC_MASK(EC_QUEUE_PREEMPTION_ERROR)	|	\
+				 KFD_EC_MASK(EC_QUEUE_NEW))
+#define KFD_EC_MASK_DEVICE	(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE) |		\
+				 KFD_EC_MASK(EC_DEVICE_RAS_ERROR) |		\
+				 KFD_EC_MASK(EC_DEVICE_FATAL_HALT) |		\
+				 KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION) |	\
+				 KFD_EC_MASK(EC_DEVICE_NEW))
+#define KFD_EC_MASK_PROCESS	(KFD_EC_MASK(EC_PROCESS_RUNTIME) |	\
+				 KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
+#define KFD_EC_MASK_PACKET	(KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) |	\
+				 KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED))
+
+/* Checks for exception code types for KFD search */
+#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX)
+#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode)					\
+			(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
+#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode)				\
+			(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
+#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode)				\
+			(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+#define KFD_DBG_EC_TYPE_IS_PACKET(ecode)				\
+			(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET))
+
+
+/* Runtime enable states */
+enum kfd_dbg_runtime_state {
+	DEBUG_RUNTIME_STATE_DISABLED = 0,
+	DEBUG_RUNTIME_STATE_ENABLED = 1,
+	DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2,
+	DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3
+};
+
+/* Runtime enable status */
+struct kfd_runtime_info {
+	__u64 r_debug;
+	__u32 runtime_state;
+	__u32 ttmp_setup;
+};
+
+/* Enable modes for runtime enable */
+#define KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK	1
+#define KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK	2
+
+/**
+ * kfd_ioctl_runtime_enable_args - Arguments for runtime enable
+ *
+ * Coordinates debug exception signalling and debug device enablement with runtime.
+ *
+ * @r_debug - pointer to user struct for sharing information between ROCr and the debuggger
+ * @mode_mask - mask to set mode
+ *	KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK - enable runtime for debugging, otherwise disable
+ *	KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK - enable trap temporary setup (ignore on disable)
+ * @capabilities_mask - mask to notify runtime on what KFD supports
+ *
+ * Return - 0 on SUCCESS.
+ *	  - EBUSY if runtime enable call already pending.
+ *	  - EEXIST if user queues already active prior to call.
+ *	    If process is debug enabled, runtime enable will enable debug devices and
+ *	    wait for debugger process to send runtime exception EC_PROCESS_RUNTIME
+ *	    to unblock - see kfd_ioctl_dbg_trap_args.
+ *
+ */
+struct kfd_ioctl_runtime_enable_args {
+	__u64 r_debug;
+	__u32 mode_mask;
+	__u32 capabilities_mask;
+};
+
+/* Queue information */
+struct kfd_queue_snapshot_entry {
+	__u64 exception_status;
+	__u64 ring_base_address;
+	__u64 write_pointer_address;
+	__u64 read_pointer_address;
+	__u64 ctx_save_restore_address;
+	__u32 queue_id;
+	__u32 gpu_id;
+	__u32 ring_size;
+	__u32 queue_type;
+	__u32 ctx_save_restore_area_size;
+	__u32 reserved;
+};
+
+/* Queue status return for suspend/resume */
+#define KFD_DBG_QUEUE_ERROR_BIT		30
+#define KFD_DBG_QUEUE_INVALID_BIT	31
+#define KFD_DBG_QUEUE_ERROR_MASK	(1 << KFD_DBG_QUEUE_ERROR_BIT)
+#define KFD_DBG_QUEUE_INVALID_MASK	(1 << KFD_DBG_QUEUE_INVALID_BIT)
+
+/* Context save area header information */
+struct kfd_context_save_area_header {
+	struct {
+		__u32 control_stack_offset;
+		__u32 control_stack_size;
+		__u32 wave_state_offset;
+		__u32 wave_state_size;
+	} wave_state;
+	__u32 debug_offset;
+	__u32 debug_size;
+	__u64 err_payload_addr;
+	__u32 err_event_id;
+	__u32 reserved1;
+};
+
+/*
+ * Debug operations
+ *
+ * For specifics on usage and return values, see documentation per operation
+ * below.  Otherwise, generic error returns apply:
+ *	- ESRCH if the process to debug does not exist.
+ *
+ *	- EINVAL (with KFD_IOC_DBG_TRAP_ENABLE exempt) if operation
+ *		 KFD_IOC_DBG_TRAP_ENABLE has not succeeded prior.
+ *		 Also returns this error if GPU hardware scheduling is not supported.
+ *
+ *	- EPERM (with KFD_IOC_DBG_TRAP_DISABLE exempt) if target process is not
+ *		 PTRACE_ATTACHED.  KFD_IOC_DBG_TRAP_DISABLE is exempt to allow
+ *		 clean up of debug mode as long as process is debug enabled.
+ *
+ *	- EACCES if any DBG_HW_OP (debug hardware operation) is requested when
+ *		 AMDKFD_IOC_RUNTIME_ENABLE has not succeeded prior.
+ *
+ *	- ENODEV if any GPU does not support debugging on a DBG_HW_OP call.
+ *
+ *	- Other errors may be returned when a DBG_HW_OP occurs while the GPU
+ *	  is in a fatal state.
+ *
+ */
+enum kfd_dbg_trap_operations {
+	KFD_IOC_DBG_TRAP_ENABLE = 0,
+	KFD_IOC_DBG_TRAP_DISABLE = 1,
+	KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2,
+	KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3,
+	KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4,  /* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5,      /* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6,		/* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7,		/* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8,	/* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9,	/* DBG_HW_OP */
+	KFD_IOC_DBG_TRAP_SET_FLAGS = 10,
+	KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11,
+	KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12,
+	KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13,
+	KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14
+};
+
+/**
+ * kfd_ioctl_dbg_trap_enable_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_ENABLE.
+ *
+ *     Enables debug session for target process. Call @op KFD_IOC_DBG_TRAP_DISABLE in
+ *     kfd_ioctl_dbg_trap_args to disable debug session.
+ *
+ *     @exception_mask (IN)	- exceptions to raise to the debugger
+ *     @rinfo_ptr      (IN)	- pointer to runtime info buffer (see kfd_runtime_info)
+ *     @rinfo_size     (IN/OUT)	- size of runtime info buffer in bytes
+ *     @dbg_fd	       (IN)	- fd the KFD will nofify the debugger with of raised
+ *				  exceptions set in exception_mask.
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *		Copies KFD saved kfd_runtime_info to @rinfo_ptr on enable.
+ *		Size of kfd_runtime saved by the KFD returned to @rinfo_size.
+ *            - EBADF if KFD cannot get a reference to dbg_fd.
+ *            - EFAULT if KFD cannot copy runtime info to rinfo_ptr.
+ *            - EINVAL if target process is already debug enabled.
+ *
+ */
+struct kfd_ioctl_dbg_trap_enable_args {
+	__u64 exception_mask;
+	__u64 rinfo_ptr;
+	__u32 rinfo_size;
+	__u32 dbg_fd;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_send_runtime_event_args
+ *
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT.
+ *     Raises exceptions to runtime.
+ *
+ *     @exception_mask (IN) - exceptions to raise to runtime
+ *     @gpu_id	       (IN) - target device id
+ *     @queue_id       (IN) - target queue id
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *	      - ENODEV if gpu_id not found.
+ *		If exception_mask contains EC_PROCESS_RUNTIME, unblocks pending
+ *		AMDKFD_IOC_RUNTIME_ENABLE call - see kfd_ioctl_runtime_enable_args.
+ *		All other exceptions are raised to runtime through err_payload_addr.
+ *		See kfd_context_save_area_header.
+ */
+struct kfd_ioctl_dbg_trap_send_runtime_event_args {
+	__u64 exception_mask;
+	__u32 gpu_id;
+	__u32 queue_id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_exceptions_enabled_args
+ *
+ *     Arguments for KFD_IOC_SET_EXCEPTIONS_ENABLED
+ *     Set new exceptions to be raised to the debugger.
+ *
+ *     @exception_mask (IN) - new exceptions to raise the debugger
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ */
+struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args {
+	__u64 exception_mask;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_wave_launch_override_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE
+ *     Enable HW exceptions to raise trap.
+ *
+ *     @override_mode	     (IN)     - see kfd_dbg_trap_override_mode
+ *     @enable_mask	     (IN/OUT) - reference kfd_dbg_trap_mask.
+ *					IN is the override modes requested to be enabled.
+ *					OUT is referenced in Return below.
+ *     @support_request_mask (IN/OUT) - reference kfd_dbg_trap_mask.
+ *					IN is the override modes requested for support check.
+ *					OUT is referenced in Return below.
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *		Previous enablement is returned in @enable_mask.
+ *		Actual override support is returned in @support_request_mask.
+ *	      - EINVAL if override mode is not supported.
+ *	      - EACCES if trap support requested is not actually supported.
+ *		i.e. enable_mask (IN) is not a subset of support_request_mask (OUT).
+ *		Otherwise it is considered a generic error (see kfd_dbg_trap_operations).
+ */
+struct kfd_ioctl_dbg_trap_set_wave_launch_override_args {
+	__u32 override_mode;
+	__u32 enable_mask;
+	__u32 support_request_mask;
+	__u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_wave_launch_mode_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE
+ *     Set wave launch mode.
+ *
+ *     @mode (IN) - see kfd_dbg_trap_wave_launch_mode
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ */
+struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args {
+	__u32 launch_mode;
+	__u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_suspend_queues_ags
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SUSPEND_QUEUES
+ *     Suspend queues.
+ *
+ *     @exception_mask	(IN) - raised exceptions to clear
+ *     @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
+ *			       to suspend
+ *     @num_queues	(IN) - number of queues to suspend in @queue_array_ptr
+ *     @grace_period	(IN) - wave time allowance before preemption
+ *			       per 1K GPU clock cycle unit
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Destruction of a suspended queue is blocked until the queue is
+ *     resumed.  This allows the debugger to access queue information and
+ *     the its context save area without running into a race condition on
+ *     queue destruction.
+ *     Automatically copies per queue context save area header information
+ *     into the save area base
+ *     (see kfd_queue_snapshot_entry and kfd_context_save_area_header).
+ *
+ *     Return - Number of queues suspended on SUCCESS.
+ *	.	KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK masked
+ *		for each queue id in @queue_array_ptr array reports unsuccessful
+ *		suspend reason.
+ *		KFD_DBG_QUEUE_ERROR_MASK = HW failure.
+ *		KFD_DBG_QUEUE_INVALID_MASK = queue does not exist, is new or
+ *		is being destroyed.
+ */
+struct kfd_ioctl_dbg_trap_suspend_queues_args {
+	__u64 exception_mask;
+	__u64 queue_array_ptr;
+	__u32 num_queues;
+	__u32 grace_period;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_resume_queues_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_RESUME_QUEUES
+ *     Resume queues.
+ *
+ *     @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id)
+ *			       to resume
+ *     @num_queues	(IN) - number of queues to resume in @queue_array_ptr
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - Number of queues resumed on SUCCESS.
+ *		KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK mask
+ *		for each queue id in @queue_array_ptr array reports unsuccessful
+ *		resume reason.
+ *		KFD_DBG_QUEUE_ERROR_MASK = HW failure.
+ *		KFD_DBG_QUEUE_INVALID_MASK = queue does not exist.
+ */
+struct kfd_ioctl_dbg_trap_resume_queues_args {
+	__u64 queue_array_ptr;
+	__u32 num_queues;
+	__u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_node_address_watch_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH
+ *     Sets address watch for device.
+ *
+ *     @address	(IN)  - watch address to set
+ *     @mode    (IN)  - see kfd_dbg_trap_address_watch_mode
+ *     @mask    (IN)  - watch address mask
+ *     @gpu_id  (IN)  - target gpu to set watch point
+ *     @id      (OUT) - watch id allocated
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *		Allocated watch ID returned to @id.
+ *	      - ENODEV if gpu_id not found.
+ *	      - ENOMEM if watch IDs can be allocated
+ */
+struct kfd_ioctl_dbg_trap_set_node_address_watch_args {
+	__u64 address;
+	__u32 mode;
+	__u32 mask;
+	__u32 gpu_id;
+	__u32 id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_clear_node_address_watch_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH
+ *     Clear address watch for device.
+ *
+ *     @gpu_id  (IN)  - target device to clear watch point
+ *     @id      (IN) - allocated watch id to clear
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *	      - ENODEV if gpu_id not found.
+ *	      - EINVAL if watch ID has not been allocated.
+ */
+struct kfd_ioctl_dbg_trap_clear_node_address_watch_args {
+	__u32 gpu_id;
+	__u32 id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_set_flags_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_SET_FLAGS
+ *     Sets flags for wave behaviour.
+ *
+ *     @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *	      - EACCESS if any debug device does not allow flag options.
+ */
+struct kfd_ioctl_dbg_trap_set_flags_args {
+	__u32 flags;
+	__u32 pad;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_query_debug_event_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT
+ *
+ *     Find one or more raised exceptions. This function can return multiple
+ *     exceptions from a single queue or a single device with one call. To find
+ *     all raised exceptions, this function must be called repeatedly until it
+ *     returns -EAGAIN. Returned exceptions can optionally be cleared by
+ *     setting the corresponding bit in the @exception_mask input parameter.
+ *     However, clearing an exception prevents retrieving further information
+ *     about it with KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO.
+ *
+ *     @exception_mask (IN/OUT) - exception to clear (IN) and raised (OUT)
+ *     @gpu_id	       (OUT)    - gpu id of exceptions raised
+ *     @queue_id       (OUT)    - queue id of exceptions raised
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on raised exception found
+ *              Raised exceptions found are returned in @exception mask
+ *              with reported source id returned in @gpu_id or @queue_id.
+ *            - EAGAIN if no raised exception has been found
+ */
+struct kfd_ioctl_dbg_trap_query_debug_event_args {
+	__u64 exception_mask;
+	__u32 gpu_id;
+	__u32 queue_id;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_query_exception_info_args
+ *
+ *     Arguments KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO
+ *     Get additional info on raised exception.
+ *
+ *     @info_ptr	(IN)	 - pointer to exception info buffer to copy to
+ *     @info_size	(IN/OUT) - exception info buffer size (bytes)
+ *     @source_id	(IN)     - target gpu or queue id
+ *     @exception_code	(IN)     - target exception
+ *     @clear_exception	(IN)     - clear raised @exception_code exception
+ *				   (0 = false, 1 = true)
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *              If @exception_code is EC_DEVICE_MEMORY_VIOLATION, copy @info_size(OUT)
+ *		bytes of memory exception data to @info_ptr.
+ *              If @exception_code is EC_PROCESS_RUNTIME, copy saved
+ *              kfd_runtime_info to @info_ptr.
+ *              Actual required @info_ptr size (bytes) is returned in @info_size.
+ */
+struct kfd_ioctl_dbg_trap_query_exception_info_args {
+	__u64 info_ptr;
+	__u32 info_size;
+	__u32 source_id;
+	__u32 exception_code;
+	__u32 clear_exception;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_get_queue_snapshot_args
+ *
+ *     Arguments KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT
+ *     Get queue information.
+ *
+ *     @exception_mask	 (IN)	  - exceptions raised to clear
+ *     @snapshot_buf_ptr (IN)	  - queue snapshot entry buffer (see kfd_queue_snapshot_entry)
+ *     @num_queues	 (IN/OUT) - number of queue snapshot entries
+ *         The debugger specifies the size of the array allocated in @num_queues.
+ *         KFD returns the number of queues that actually existed. If this is
+ *         larger than the size specified by the debugger, KFD will not overflow
+ *         the array allocated by the debugger.
+ *
+ *     @entry_size	 (IN/OUT) - size per entry in bytes
+ *         The debugger specifies sizeof(struct kfd_queue_snapshot_entry) in
+ *         @entry_size. KFD returns the number of bytes actually populated per
+ *         entry. The debugger should use the KFD_IOCTL_MINOR_VERSION to determine,
+ *         which fields in struct kfd_queue_snapshot_entry are valid. This allows
+ *         growing the ABI in a backwards compatible manner.
+ *         Note that entry_size(IN) should still be used to stride the snapshot buffer in the
+ *         event that it's larger than actual kfd_queue_snapshot_entry.
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *              Copies @num_queues(IN) queue snapshot entries of size @entry_size(IN)
+ *              into @snapshot_buf_ptr if @num_queues(IN) > 0.
+ *              Otherwise return @num_queues(OUT) queue snapshot entries that exist.
+ */
+struct kfd_ioctl_dbg_trap_queue_snapshot_args {
+	__u64 exception_mask;
+	__u64 snapshot_buf_ptr;
+	__u32 num_queues;
+	__u32 entry_size;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_get_device_snapshot_args
+ *
+ *     Arguments for KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT
+ *     Get device information.
+ *
+ *     @exception_mask	 (IN)	  - exceptions raised to clear
+ *     @snapshot_buf_ptr (IN)	  - pointer to snapshot buffer (see kfd_dbg_device_info_entry)
+ *     @num_devices	 (IN/OUT) - number of debug devices to snapshot
+ *         The debugger specifies the size of the array allocated in @num_devices.
+ *         KFD returns the number of devices that actually existed. If this is
+ *         larger than the size specified by the debugger, KFD will not overflow
+ *         the array allocated by the debugger.
+ *
+ *     @entry_size	 (IN/OUT) - size per entry in bytes
+ *         The debugger specifies sizeof(struct kfd_dbg_device_info_entry) in
+ *         @entry_size. KFD returns the number of bytes actually populated. The
+ *         debugger should use KFD_IOCTL_MINOR_VERSION to determine, which fields
+ *         in struct kfd_dbg_device_info_entry are valid. This allows growing the
+ *         ABI in a backwards compatible manner.
+ *         Note that entry_size(IN) should still be used to stride the snapshot buffer in the
+ *         event that it's larger than actual kfd_dbg_device_info_entry.
+ *
+ *     Generic errors apply (see kfd_dbg_trap_operations).
+ *     Return - 0 on SUCCESS.
+ *              Copies @num_devices(IN) device snapshot entries of size @entry_size(IN)
+ *              into @snapshot_buf_ptr if @num_devices(IN) > 0.
+ *              Otherwise return @num_devices(OUT) queue snapshot entries that exist.
+ */
+struct kfd_ioctl_dbg_trap_device_snapshot_args {
+	__u64 exception_mask;
+	__u64 snapshot_buf_ptr;
+	__u32 num_devices;
+	__u32 entry_size;
+};
+
+/**
+ * kfd_ioctl_dbg_trap_args
+ *
+ * Arguments to debug target process.
+ *
+ *     @pid - target process to debug
+ *     @op  - debug operation (see kfd_dbg_trap_operations)
+ *
+ *     @op determines which union struct args to use.
+ *     Refer to kern docs for each kfd_ioctl_dbg_trap_*_args struct.
+ */
+struct kfd_ioctl_dbg_trap_args {
+	__u32 pid;
+	__u32 op;
+
+	union {
+		struct kfd_ioctl_dbg_trap_enable_args enable;
+		struct kfd_ioctl_dbg_trap_send_runtime_event_args send_runtime_event;
+		struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args set_exceptions_enabled;
+		struct kfd_ioctl_dbg_trap_set_wave_launch_override_args launch_override;
+		struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args launch_mode;
+		struct kfd_ioctl_dbg_trap_suspend_queues_args suspend_queues;
+		struct kfd_ioctl_dbg_trap_resume_queues_args resume_queues;
+		struct kfd_ioctl_dbg_trap_set_node_address_watch_args set_node_address_watch;
+		struct kfd_ioctl_dbg_trap_clear_node_address_watch_args clear_node_address_watch;
+		struct kfd_ioctl_dbg_trap_set_flags_args set_flags;
+		struct kfd_ioctl_dbg_trap_query_debug_event_args query_debug_event;
+		struct kfd_ioctl_dbg_trap_query_exception_info_args query_exception_info;
+		struct kfd_ioctl_dbg_trap_queue_snapshot_args queue_snapshot;
+		struct kfd_ioctl_dbg_trap_device_snapshot_args device_snapshot;
+	};
+};
+
+#define AMDKFD_IOCTL_BASE 'K'
+#define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
+#define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOW(nr, type)		_IOW(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOWR(nr, type)		_IOWR(AMDKFD_IOCTL_BASE, nr, type)
+
+#define AMDKFD_IOC_GET_VERSION			\
+		AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
+
+#define AMDKFD_IOC_CREATE_QUEUE			\
+		AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
+
+#define AMDKFD_IOC_DESTROY_QUEUE		\
+		AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
+
+#define AMDKFD_IOC_SET_MEMORY_POLICY		\
+		AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
 
-#define AMDKFD_IOC_GET_VERSION AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
+#define AMDKFD_IOC_GET_CLOCK_COUNTERS		\
+		AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
 
-#define AMDKFD_IOC_CREATE_QUEUE AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
+#define AMDKFD_IOC_GET_PROCESS_APERTURES	\
+		AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
 
-#define AMDKFD_IOC_DESTROY_QUEUE AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
+#define AMDKFD_IOC_UPDATE_QUEUE			\
+		AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
 
-#define AMDKFD_IOC_SET_MEMORY_POLICY AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
+#define AMDKFD_IOC_CREATE_EVENT			\
+		AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
 
-#define AMDKFD_IOC_GET_CLOCK_COUNTERS AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
+#define AMDKFD_IOC_DESTROY_EVENT		\
+		AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
 
-#define AMDKFD_IOC_GET_PROCESS_APERTURES AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
+#define AMDKFD_IOC_SET_EVENT			\
+		AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
 
-#define AMDKFD_IOC_UPDATE_QUEUE AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
+#define AMDKFD_IOC_RESET_EVENT			\
+		AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
 
-#define AMDKFD_IOC_CREATE_EVENT AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
+#define AMDKFD_IOC_WAIT_EVENTS			\
+		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
 
-#define AMDKFD_IOC_DESTROY_EVENT AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
+#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED	\
+		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
 
-#define AMDKFD_IOC_SET_EVENT AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
+#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED	\
+		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
 
-#define AMDKFD_IOC_RESET_EVENT AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED	\
+		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
 
-#define AMDKFD_IOC_WAIT_EVENTS AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED	\
+		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
 
-#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
+		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
 
-#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
+		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
 
-#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+#define AMDKFD_IOC_SET_TRAP_HANDLER		\
+		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
 
-#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW	\
+		AMDKFD_IOWR(0x14,		\
+			struct kfd_ioctl_get_process_apertures_new_args)
 
-#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
+#define AMDKFD_IOC_ACQUIRE_VM			\
+		AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
 
-#define AMDKFD_IOC_GET_TILE_CONFIG AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU		\
+		AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
 
-#define AMDKFD_IOC_SET_TRAP_HANDLER AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU		\
+		AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
 
-#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW AMDKFD_IOWR(0x14, struct kfd_ioctl_get_process_apertures_new_args)
+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU		\
+		AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
 
-#define AMDKFD_IOC_ACQUIRE_VM AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
+		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
 
-#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
+#define AMDKFD_IOC_SET_CU_MASK		\
+		AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
 
-#define AMDKFD_IOC_FREE_MEMORY_OF_GPU AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
+#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE		\
+		AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
 
-#define AMDKFD_IOC_MAP_MEMORY_TO_GPU AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
+#define AMDKFD_IOC_GET_DMABUF_INFO		\
+		AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
 
-#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
+#define AMDKFD_IOC_IMPORT_DMABUF		\
+		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
 
-#define AMDKFD_IOC_SET_CU_MASK AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
+		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
 
-#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
+#define AMDKFD_IOC_SMI_EVENTS			\
+		AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
 
-#define AMDKFD_IOC_GET_DMABUF_INFO AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
+#define AMDKFD_IOC_SVM	AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
 
-#define AMDKFD_IOC_IMPORT_DMABUF AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
+#define AMDKFD_IOC_SET_XNACK_MODE		\
+		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
 
-#define AMDKFD_IOC_ALLOC_QUEUE_GWS AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
+#define AMDKFD_IOC_CRIU_OP			\
+		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
 
-#define AMDKFD_IOC_SMI_EVENTS AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
+#define AMDKFD_IOC_AVAILABLE_MEMORY		\
+		AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
 
-#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
+#define AMDKFD_IOC_EXPORT_DMABUF		\
+		AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args)
 
-#define AMDKFD_IOC_SET_XNACK_MODE AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
+#define AMDKFD_IOC_RUNTIME_ENABLE		\
+		AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args)
 
-#define AMDKFD_IOC_CRIU_OP AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
+#define AMDKFD_IOC_DBG_TRAP			\
+		AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args)
 
-#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END   0x23
+#define AMDKFD_COMMAND_START		0x01
+#define AMDKFD_COMMAND_END		0x27
 
 #endif
-- 
2.34.1