From David.Francis at amd.com Sat May 17 00:05:32 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:32 -0400 Subject: [CRIU] [PATCH v3] Add CRIU support for amdgpu dmabuf Message-ID: <20250516210539.3537211-1-David.Francis@amd.com> This patch series adds support for CRIU checkpointing of processes that share memory with the amdgpu dmabuf interface. In this v3, the sockets used by CRIU to exchange dmabuf fds between restoring processes have been refactored to be created within the amdgpu plugin itself. In the accompanying kernel patch set, the drm interfaces have been changed from creating buffer objects with specified gem handles to changign the gem handle of an existing buffer object. From David.Francis at amd.com Sat May 17 00:05:34 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:34 -0400 Subject: [CRIU] [PATCH 2/7] servicefd: Add mechanism to find unused high fds In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-3-David.Francis@amd.com> During restore, the amdgpu plugin must hold onto fds for dmabufs as they are transferred from one process to another. These fds must be chosen not to conflict with other fds used by restore. Extend the service_fd system, which already finds unused fds, to allow request of an unused fd. Signed-off-by: David Francis --- criu/include/servicefd.h | 1 + criu/servicefd.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h index 4265d94ed..f74d8ef43 100644 --- a/criu/include/servicefd.h +++ b/criu/include/servicefd.h @@ -47,5 +47,6 @@ extern int install_service_fd(enum sfd_type type, int fd); extern int close_service_fd(enum sfd_type type); extern void __close_service_fd(enum sfd_type type); extern int clone_service_fd(struct pstree_item *me); +extern int get_unused_high_fd(void); #endif /* __CR_SERVICE_FD_H__ */ diff --git a/criu/servicefd.c b/criu/servicefd.c index 06a8d3eba..5034a19f3 100644 --- a/criu/servicefd.c +++ b/criu/servicefd.c @@ -25,6 +25,7 @@ int service_fd_rlim_cur; /* Base of current process service fds set */ static int service_fd_base; +static int next_high_fd; /* Id of current process in shared fdt */ static int service_fd_id = 0; @@ -312,5 +313,15 @@ int clone_service_fd(struct pstree_item *me) service_fd_id = id; ret = 0; + next_high_fd = service_fd_base + 1024; + return ret; } + +int get_unused_high_fd(void) +{ + if (next_high_fd > service_fd_rlim_cur) + return -1; + next_high_fd += 1; + return next_high_fd - 1; +} -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:33 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:33 -0400 Subject: [CRIU] [PATCH 1/7] restorer: Skip non-regular VMAs In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-2-David.Francis@amd.com> amdgpu represents allocated device memory as a memory mapping of the device file. This is a non-standard VMA that must be handled by the plugin, not the normal VMA code. Ignore all VMAs on device files. Signed-off-by: David Francis --- criu/pie/restorer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 6d048c3f1..3a56ed210 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1920,6 +1920,10 @@ __visible long __export_restore_task(struct task_restore_args *args) for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { if (vma_entry->madv & (1ul << m)) { + + if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) + continue; + ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); if (ret) { pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) " -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:35 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:35 -0400 Subject: [CRIU] [PATCH 3/7] files-ext: Allow plugin files to retry In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-4-David.Francis@amd.com> amdgpu dmabuf CRIU requires the ability of the amdgpu plugin to retry. Change files_ext.c to read a response of 1 from a plugin restore function to mean retry. Signed-off-by: David Francis --- criu/files-ext.c | 10 +++++++--- criu/include/criu-plugin.h | 2 +- plugins/amdgpu/amdgpu_plugin.c | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/criu/files-ext.c b/criu/files-ext.c index 95ec8e37c..4cc99d921 100644 --- a/criu/files-ext.c +++ b/criu/files-ext.c @@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd) { struct ext_file_info *xfi; int fd; + bool retry_needed; xfi = container_of(d, struct ext_file_info, d); - fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id); + fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed); if (fd < 0) { pr_err("Unable to restore %#x\n", xfi->xfe->id); return -1; @@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd) if (restore_fown(fd, xfi->xfe->fown)) return -1; - *new_fd = fd; - return 0; + if (!retry_needed) + *new_fd = fd; + else + *new_fd = -1; + return retry_needed; } static struct file_desc_ops ext_desc_ops = { diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index 392ea9f53..b844dca4d 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -68,7 +68,7 @@ enum { DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind); diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 96c086162..4e2b0a789 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -1551,7 +1551,7 @@ exit: return ret; } -int amdgpu_plugin_restore_file(int id) +int amdgpu_plugin_restore_file(int id, bool *retry_needed) { int ret = 0, fd; char img_path[PATH_MAX]; @@ -1562,6 +1562,8 @@ int amdgpu_plugin_restore_file(int id) size_t img_size; FILE *img_fp = NULL; + *retry_needed = false; + if (plugin_disabled) return -ENOTSUP; -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:38 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:38 -0400 Subject: [CRIU] [PATCH 6/7] plugin/amdgpu: Add handling for amdgpu drm ioctl In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-7-David.Francis@amd.com> Buffer objects held by the amdgpu drm driver are checkpointed with the new DRM_IOCTL_AMDGPU_CRIU_OP ioctl. Handling for this ioctl is in amdgpu_plugin_drm.h Handling of imported buffer objects may require dmabuf fds to be transferred between processes. These occur over sockets created by the amdgpu plugin. There are two new plugin callbacks: COLLECT_FILE to identify the processes that have amdgpu files and so need a socket, and RESUME_DEVICES_EARLY to create the sockets before any files are restored. Before each amdgpu file restore, check the socket and record the received dmabuf_fds. During checkpoint, track shared buffer objects, so that buffer objects that are shared across processes can be identified. During restore, track which buffer objects have been restored. Retry restore of a drm file if a buffer object is imported and the original has not been exported yet. Skip buffer objects that have already been completed or cannot be completed in the current restore. So drm code can use sdma_copy_bo, that function no longer requires kfd bo structs Update the protobuf messages with new amdgpu drm information. Signed-off-by: David Francis --- criu/cr-restore.c | 3 + criu/files.c | 3 + criu/include/criu-plugin.h | 7 + criu/plugin.c | 2 + plugins/amdgpu/amdgpu_plugin.c | 259 ++++++++++++++-- plugins/amdgpu/amdgpu_plugin_drm.c | 452 +++++++++++++++++++++++++++- plugins/amdgpu/amdgpu_plugin_drm.h | 8 + plugins/amdgpu/amdgpu_plugin_util.c | 121 +++++++- plugins/amdgpu/amdgpu_plugin_util.h | 50 ++- plugins/amdgpu/criu-amdgpu.proto | 25 ++ 10 files changed, 890 insertions(+), 40 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ddca6b8ec..0b4acb99b 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1651,6 +1651,9 @@ static int __restore_task_with_children(void *_arg) if (open_transport_socket()) goto err; + if (run_plugins(RESUME_DEVICES_EARLY, current->pid->real)) + goto err; + timing_start(TIME_FORK); if (create_children_and_session()) diff --git a/criu/files.c b/criu/files.c index 31e705bcc..326f23cf7 100644 --- a/criu/files.c +++ b/criu/files.c @@ -836,6 +836,9 @@ struct fdinfo_list_entry *collect_fd_to(int pid, FdinfoEntry *e, struct rst_info { struct fdinfo_list_entry *new_le; + if (fdesc->ops->type == FD_TYPES__EXT) + run_plugins(COLLECT_FILE, pid, fdesc->id); + new_le = alloc_fle(pid, e); if (new_le) { new_le->fake = (!!fake); diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index b844dca4d..aaf4b0b94 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -60,6 +60,10 @@ enum { CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11, + CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY = 12, + + CR_PLUGIN_HOOK__COLLECT_FILE = 13, + CR_PLUGIN_HOOK__MAX }; @@ -78,6 +82,9 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY, int pid); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__COLLECT_FILE, int pid, int fd); + enum { CR_PLUGIN_STAGE__DUMP, diff --git a/criu/plugin.c b/criu/plugin.c index 65e79a069..cfb19e9f0 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -59,6 +59,8 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path) __assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late"); __assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices"); __assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices"); + __assign_hook(RESUME_DEVICES_EARLY, "cr_plugin_resume_devices_early"); + __assign_hook(COLLECT_FILE, "cr_plugin_collect_file"); #undef __assign_hook diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 4e2b0a789..ad66e4659 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,11 +25,14 @@ #include "criu-plugin.h" #include "plugin.h" #include "criu-amdgpu.pb-c.h" +#include "util.h" +#include "util-pie.h" #include "kfd_ioctl.h" #include "xmalloc.h" #include "criu-log.h" #include "files.h" +#include "sockets.h" #include "common/list.h" #include "amdgpu_plugin_drm.h" @@ -58,12 +63,18 @@ static int kfd_checkpoint_fd; static LIST_HEAD(update_vma_info_list); +static LIST_HEAD(amdgpu_processes); + size_t kfd_max_buffer_size; bool plugin_added_to_inventory = false; bool plugin_disabled = false; +int dmabuf_socket_fd = -1; + +int current_pid; + /**************************************************************************************************/ /* Call ioctl, restarting if it is interrupted */ @@ -503,11 +514,11 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va, amdgpu_bo_free(h_bo); } -static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, +int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, - uint64_t max_copy_size, enum sdma_op_type type) + uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free) { - uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; + uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size; amdgpu_va_handle h_va_src, h_va_dst, h_va_ib; amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib; @@ -520,10 +531,8 @@ static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, uint32_t expired; amdgpu_context_handle h_ctx; uint32_t *ib = NULL; - int j, err, shared_fd, packets_per_buffer; + int j, err, packets_per_buffer; - shared_fd = bo_bucket.dmabuf_fd; - size = bo_bucket.size; buffer_bo_size = min(size, buffer_size); packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1; src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size; @@ -734,7 +743,8 @@ err_dst_bo_map: if (err) pr_perror("dest range free failed"); err_dst_va: - err = amdgpu_bo_free(h_bo_dst); + if (!do_not_free) + err = amdgpu_bo_free(h_bo_dst); if (err) pr_perror("dest bo free failed"); err_dst_bo_prep: @@ -822,8 +832,9 @@ void *dump_bo_contents(void *_thread_data) num_bos++; /* perform sDMA based vram copy */ - ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_READ); + ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_READ, false); + if (ret) { pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -920,8 +931,8 @@ void *restore_bo_contents(void *_thread_data) num_bos++; - ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_WRITE); + ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_WRITE, false); if (ret) { pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -1030,6 +1041,124 @@ exit: return ret; } +static void dmabuf_socket_name_gen(struct sockaddr_un *addr, int *len, int pid) +{ + addr->sun_family = AF_UNIX; + snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-amdgpu-dmabuf-%d-%" PRIx64, pid, criu_run_id); + *len = SUN_LEN(addr); + *addr->sun_path = '\0'; +} + +int amdgpu_make_socket(int pid) +{ + int ret = 0; + struct amdgpu_process *p; + struct sockaddr_un saddr; + int sock, slen; + + list_for_each_entry(p, &amdgpu_processes, l) { + if (p->pid == pid) { + dmabuf_socket_fd = get_unused_high_fd(); + current_pid = pid; + + sock = socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); + if (sock < 0) { + pr_perror("Can't create socket"); + ret = -1; + goto out; + } + + dmabuf_socket_name_gen(&saddr, &slen, pid); + if (bind(sock, (struct sockaddr *)&saddr, slen) < 0) { + pr_perror("Can't bind dmabuf socket %s", saddr.sun_path + 1); + close(sock); + ret = -1; + goto out; + } + + ret = fcntl(sock, F_DUPFD, dmabuf_socket_fd); + if (ret < 0) { + close(sock); + goto out; + } else if (ret != dmabuf_socket_fd) { + close(dmabuf_socket_fd); + close(sock); + ret = -1; + goto out; + } + close(sock); + ret = 0; + } + } + + out: + + return ret; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY, amdgpu_make_socket) + +int serve_out_dmabuf_fd(int handle, int fd) +{ + int ret = 0; + struct amdgpu_process *p; + struct sockaddr_un saddr; + int len; + + list_for_each_entry(p, &amdgpu_processes, l) { + dmabuf_socket_name_gen(&saddr, &len, p->pid); + + ret = send_fds(dmabuf_socket_fd, &saddr, len, &fd, 1, (void *)&handle, sizeof(handle)); + if (ret < 0) + goto out; + } + +out: + return ret; +} + +int amdgpu_collect_file(int pid, int fd) +{ + struct amdgpu_process *p; + + list_for_each_entry(p, &amdgpu_processes, l) + if (p->pid == pid) + return 0; + + p = malloc(sizeof(struct amdgpu_process)); + + if (!p) + return -ENOMEM; + + p->pid = pid; + + list_add(&p->l, &amdgpu_processes); + + return 0; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__COLLECT_FILE, amdgpu_collect_file) + +static int recv_dmabuf_fds(void) +{ + int fd, newfd, ret, handle; + + while (true) { + ret = __recv_fds(dmabuf_socket_fd, &fd, 1, (void *)&handle, sizeof(handle), MSG_DONTWAIT); + + if (ret == -EAGAIN || ret == -EWOULDBLOCK) + return 0; + else if (ret) + return -1; + + newfd = get_unused_high_fd(); + + reopen_fd_as(newfd, fd); + + record_shared_dmabuf_fd(handle, newfd); + } + + return 0; +} + static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets, CriuKfd *e) { @@ -1072,6 +1201,8 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd { struct thread_data *thread_datas; int ret = 0, i; + amdgpu_device_handle h_dev; + uint32_t major, minor; pr_debug("Dumping %d BOs\n", args->num_bos); @@ -1095,6 +1226,19 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd boinfo->size = bo_bucket->size; boinfo->offset = bo_bucket->offset; boinfo->alloc_flags = bo_bucket->alloc_flags; + + ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev); + + boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd); + + amdgpu_device_deinitialize(h_dev); + } + for (i = 0; i < e->num_of_bos; i++) { + KfdBoEntry *boinfo = e->bo_entries[i]; + + ret = record_shared_bo(boinfo->handle, false); + if (ret) + goto exit; } for (int i = 0; i < e->num_of_gpus; i++) { @@ -1431,9 +1575,33 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) plugin_log_msg("BO [%d] gpu_id:%x addr:%llx size:%llx offset:%llx\n", i, bo_bucket->gpu_id, bo_bucket->addr, bo_bucket->size, bo_bucket->offset); + } pr_info("Restore BOs Ok\n"); + + return 0; +} + +int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd) +{ + struct vma_metadata *vma_md; + + vma_md = xmalloc(sizeof(*vma_md)); + if (!vma_md) { + return -ENOMEM; + } + + memset(vma_md, 0, sizeof(*vma_md)); + + vma_md->old_pgoff = offset; + vma_md->vma_entry = addr; + + vma_md->new_pgoff = restored_offset; + vma_md->fd = fd; + + list_add_tail(&vma_md->list, &update_vma_info_list); + return 0; } @@ -1567,6 +1735,10 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) if (plugin_disabled) return -ENOTSUP; + ret = recv_dmabuf_fds(); + if (ret) + return ret; + pr_info("Initialized kfd plugin restorer with ID = %d\n", id); snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id); @@ -1628,8 +1800,18 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id); fd = node_get_drm_render_device(tp_node); - if (fd < 0) + if (fd < 0) { pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor); + return -1; + } + + ret = amdgpu_plugin_drm_restore_file(fd, rd); + if (ret == 1) + *retry_needed = true; + if (ret < 0) { + fd = ret; + goto fail; + } fail: criu_render_node__free_unpacked(rd, NULL); xfree(buf); @@ -1641,12 +1823,20 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in * tp_node. */ - fd = dup(fd); - if (fd == -1) { - pr_perror("unable to duplicate the render fd"); - return -1; + + if (fd < 0) + return fd; + + if (!(*retry_needed)) { + fd = dup(fd); + if (fd == -1) { + pr_perror("unable to duplicate the render fd"); + return -1; + } + return fd; } - return fd; + + return 0; } fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -1690,13 +1880,16 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) * This way, we know that the file descriptors we store will not conflict with file descriptors inside core * CRIU. */ - fd_next = find_unused_fd_pid(e->pid); - if (fd_next <= 0) { - pr_err("Failed to find unused fd (fd:%d)\n", fd_next); - ret = -EINVAL; - goto exit; + if (fd_next == -1) { + fd_next = find_unused_fd_pid(e->pid); + if (fd_next <= 0) { + pr_err("Failed to find unused fd (fd:%d)\n", fd_next); + ret = -EINVAL; + goto exit; + } } + ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology); if (ret) { pr_err("Failed to convert stored device information to topology\n"); @@ -1727,14 +1920,26 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) args.num_objects = e->num_of_objects; args.priv_data_size = e->priv_data.len; args.priv_data = (uintptr_t)e->priv_data.data; - args.op = KFD_CRIU_OP_RESTORE; + if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) { pr_perror("Restore ioctl failed"); ret = -1; goto exit; } + if (ret < 0) + goto exit; + + for (int i = 0; i < args.num_bos; i++) { + struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i]; + KfdBoEntry *bo_entry = e->bo_entries[i]; + + if (bo_entry->handle != -1) { + serve_out_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd); + } + } + ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e); if (ret) goto exit; @@ -1859,6 +2064,14 @@ int amdgpu_plugin_resume_devices_late(int target_pid) } } + clear_restore_state(); + close(dmabuf_socket_fd); + while (!list_empty(&amdgpu_processes)) { + struct amdgpu_process *st = list_first_entry(&amdgpu_processes, struct amdgpu_process, l); + list_del(&st->l); + free(st); + } + close(fd); return exit_code; } diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c index d54cd937d..43e95d1db 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.c +++ b/plugins/amdgpu/amdgpu_plugin_drm.c @@ -19,20 +19,113 @@ #include #include "common/list.h" +#include "files.h" #include "criu-amdgpu.pb-c.h" +#define __user +#include "drm.h" #include #include #include "xmalloc.h" -#include "criu-log.h" -#include "kfd_ioctl.h" +#include "amdgpu_drm.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" +#include "util.h" +#include "common/scm.h" + +int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd) +{ + uint32_t handle; + int fd = amdgpu_device_get_fd(h_dev); + + if (dmabuf_fd == -1) { + return -1; + } + + drmPrimeFDToHandle(fd, dmabuf_fd, &handle); + + return handle; +} + +int drmIoctl(int fd, unsigned long request, void *arg) +{ + int ret, max_retries = 200; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN)); + + if (ret == -1 && errno == EBADF) + /* In case pthread_atfork didn't catch it, this will + * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN. + */ + pr_perror("KFD file descriptor not valid in this process"); + return ret; +} + +static int allocate_bo_entries(CriuRenderNode *e, int num_bos) +{ + e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos); + if (!e->bo_entries) { + pr_err("Failed to allocate bo_info\n"); + return -ENOMEM; + } + + for (int i = 0; i < num_bos; i++) { + DrmBoEntry *entry = xzalloc(sizeof(*entry)); + + if (!entry) { + pr_err("Failed to allocate botest\n"); + return -ENOMEM; + } + + drm_bo_entry__init(entry); + + e->bo_entries[i] = entry; + e->n_bo_entries++; + } + return 0; +} + +static int allocate_vm_entries(CriuRenderNode *e, int num_vms) +{ + e->vm_entries = xmalloc(sizeof(DrmVmEntry *) * num_vms); + if (!e->vm_entries) { + pr_err("Failed to allocate bo_info\n"); + return -ENOMEM; + } + + for (int i = 0; i < num_vms; i++) { + DrmVmEntry *entry = xzalloc(sizeof(*entry)); + + if (!entry) { + pr_err("Failed to allocate botest\n"); + return -ENOMEM; + } + + drm_vm_entry__init(entry); + + e->vm_entries[i] = entry; + e->n_vm_entries++; + } + return 0; +} + +static void free_e(CriuRenderNode *e) +{ + for (int i = 0; i < e->n_bo_entries; i++) { + if (e->bo_entries[i]) + xfree(e->bo_entries[i]); + } + + xfree(e); +} + int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) { char path[PATH_MAX]; @@ -60,19 +153,209 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) return 0; } +static int restore_bo_contents_drm(int drm_render_minor, pid_t pid, int drm_fd, uint64_t num_of_bos, struct drm_amdgpu_criu_bo_bucket *bo_buckets) +{ + size_t image_size = 0, total_bo_size = 0, max_bo_size = 0, buffer_size; + struct amdgpu_gpu_info gpu_info = { 0 }; + amdgpu_device_handle h_dev; + uint64_t max_copy_size; + uint32_t major, minor; + FILE *bo_contents_fp = NULL; + void *buffer = NULL; + char img_path[40]; + int num_bos = 0; + int i, ret = 0; + + ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev); + if (ret) { + pr_perror("failed to initialize device"); + goto exit; + } + plugin_log_msg("libdrm initialized successfully\n"); + + ret = amdgpu_query_gpu_info(h_dev, &gpu_info); + if (ret) { + pr_perror("failed to query gpuinfo via libdrm"); + goto exit; + } + + max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : + SDMA_LINEAR_COPY_MAX_SIZE - 1; + + for (i = 0; i < num_of_bos; i++) { + if (bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + total_bo_size += bo_buckets[i].size; + + if (bo_buckets[i].size > max_bo_size) + max_bo_size = bo_buckets[i].size; + } + } + + buffer_size = max_bo_size; + + posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); + if (!buffer) { + pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); + ret = -ENOMEM; + goto exit; + } + + for (i = 0; i < num_of_bos; i++) { + + if (!(bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT))) + continue; + + if (bo_buckets[i].addr == -1) + continue; + + num_bos++; + + snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, pid, drm_render_minor, i); + + bo_contents_fp = open_img_file(img_path, false, &image_size); + + ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_WRITE, true); + if (ret) { + pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); + break; + } + plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i); + + + if (bo_contents_fp) + fclose(bo_contents_fp); + + } + +exit: + for (int i = 0; i < num_of_bos; i++) { + if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD) + close(bo_buckets[i].dmabuf_fd); + } + + xfree(buffer); + + amdgpu_device_deinitialize(h_dev); + return ret; +} int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) { - CriuRenderNode rd = CRIU_RENDER_NODE__INIT; - struct tp_node *tp_node; + CriuRenderNode *rd = NULL; char path[PATH_MAX]; unsigned char *buf; int minor; int len; int ret; + struct drm_amdgpu_criu_args args = {0}; + size_t image_size; + struct tp_node *tp_node; + + rd = xmalloc(sizeof(*rd)); + if (!rd) { + ret = -ENOMEM; + goto exit; + } + criu_render_node__init(rd); /* Get the topology node of the DRM device */ minor = minor(drm->st_rdev); + rd->drm_render_minor = minor; + + args.op = AMDGPU_CRIU_OP_PROCESS_INFO; + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) { + pr_perror("Failed to call process info ioctl"); + ret = -1; + goto exit; + } + + rd->pid = args.pid; + rd->num_of_bos = args.num_bos; + rd->num_of_vms = args.num_vms; + ret = allocate_bo_entries(rd, args.num_bos); + if (ret) + goto exit; + ret = allocate_vm_entries(rd, args.num_vms); + if (ret) + goto exit; + + args.bos = (uintptr_t)xzalloc((args.num_bos * sizeof(struct drm_amdgpu_criu_bo_bucket))); + if (!args.bos) { + ret = -ENOMEM; + goto exit; + } + + args.vms = (uintptr_t)xzalloc((args.num_vms * sizeof(struct drm_amdgpu_criu_vm_bucket))); + if (!args.bos) { + ret = -ENOMEM; + goto exit; + } + + args.op = AMDGPU_CRIU_OP_CHECKPOINT; + ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args); + if (ret) { + pr_perror("Failed to call dumper (process) ioctl"); + goto exit; + } + + for (int i = 0; i < args.num_bos; i++) { + struct drm_amdgpu_criu_bo_bucket bo_bucket = ((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i]; + uint32_t major, minor; + amdgpu_device_handle h_dev; + void *buffer = NULL; + char img_path[40]; + FILE *bo_contents_fp = NULL; + DrmBoEntry *boinfo = rd->bo_entries[i]; + + boinfo->addr = bo_bucket.addr; + boinfo->size = bo_bucket.size; + boinfo->offset = bo_bucket.offset; + boinfo->alloc_flags = bo_bucket.alloc_flags; + boinfo->preferred_domains = bo_bucket.preferred_domains; + + ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); + + snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->pid, rd->drm_render_minor, i); + bo_contents_fp = open_img_file(img_path, true, &image_size); + + posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), bo_bucket.size); + + ret = sdma_copy_bo(bo_bucket.dmabuf_fd, bo_bucket.size, bo_contents_fp, buffer, bo_bucket.size, h_dev, 0x1000, + SDMA_OP_VRAM_READ, false); + + boinfo->handle = get_gem_handle(h_dev, bo_bucket.dmabuf_fd); + boinfo->is_import = (bo_bucket.flags & AMDGPU_CRIU_BO_FLAG_IS_IMPORT) + || shared_bo_has_exporter(boinfo->handle); + + if (bo_bucket.dmabuf_fd != KFD_INVALID_FD) + close(bo_bucket.dmabuf_fd); + + if (bo_contents_fp) + fclose(bo_contents_fp); + + ret = amdgpu_device_deinitialize(h_dev); + if (ret) + goto exit; + } + for (int i = 0; i < args.num_bos; i++) { + DrmBoEntry *boinfo = rd->bo_entries[i]; + + ret = record_shared_bo(boinfo->handle, boinfo->is_import); + if (ret) + goto exit; + } + for (int i = 0; i < args.num_vms; i++) { + DrmVmEntry *vminfo = rd->vm_entries[i]; + struct drm_amdgpu_criu_vm_bucket vm_bucket = ((struct drm_amdgpu_criu_vm_bucket *)args.vms)[i]; + + vminfo->start = vm_bucket.start; + vminfo->last = vm_bucket.last; + vminfo->offset = vm_bucket.offset; + vminfo->flags = vm_bucket.flags; + vminfo->gem_handle = vm_bucket.gem_handle; + } + tp_node = sys_get_node_by_render_minor(&src_topology, minor); if (!tp_node) { pr_err("Failed to find a device with minor number = %d\n", minor); @@ -80,21 +363,172 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) } /* Get the GPU_ID of the DRM device */ - rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); - if (!rd.gpu_id) { - pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id); + rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); + if (!rd->gpu_id) { + pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id); return -ENODEV; } - len = criu_render_node__get_packed_size(&rd); + len = criu_render_node__get_packed_size(rd); buf = xmalloc(len); if (!buf) return -ENOMEM; - criu_render_node__pack(&rd, buf); + criu_render_node__pack(rd, buf); snprintf(path, sizeof(path), IMG_DRM_FILE, id); ret = write_img_file(path, buf, len); + + exit: + xfree((void *)args.bos); + xfree((void *)args.vms); xfree(buf); + free_e(rd); return ret; } + +int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd) +{ + struct drm_amdgpu_criu_args args = {0}; + int ret = 0; + bool retry_needed = false; + uint32_t major, minor; + amdgpu_device_handle h_dev; + int device_fd; + + args.num_bos = rd->num_of_bos; + args.num_vms = rd->num_of_vms; + args.bos = (uint64_t)xzalloc(sizeof(struct drm_amdgpu_criu_bo_bucket) * rd->num_of_bos); + + ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); + if (ret) { + pr_info("Error in init amdgpu device\n"); + goto exit; + } + + device_fd = amdgpu_device_get_fd(h_dev); + + for (int i = 0; i < args.num_bos; i++) { + struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i]; + DrmBoEntry *boinfo = rd->bo_entries[i]; + int dmabuf_fd = -1; + uint32_t handle; + struct drm_prime_change_gem_handle change_args = {0}; + union drm_amdgpu_gem_mmap mmap_args = {0}; + struct drm_amdgpu_gem_va va_args = {0}; + + bo_bucket->addr = boinfo->addr; + + if (work_already_completed(boinfo->handle, rd->drm_render_minor)) { + bo_bucket->addr = -1; + continue; + } else if (boinfo->handle != -1) { + if (boinfo->is_import) { + dmabuf_fd = dmabuf_fd_for_handle(boinfo->handle); + if (dmabuf_fd == -1) { + bo_bucket->addr = -1; + continue; + } + } + } + + bo_bucket->dmabuf_fd = dmabuf_fd; + bo_bucket->size = boinfo->size; + bo_bucket->preferred_domains = boinfo->preferred_domains; + + if (boinfo->is_import) { + drmPrimeFDToHandle(device_fd, dmabuf_fd, &handle); + } else { + union drm_amdgpu_gem_create create_args = {0}; + + create_args.in.bo_size = boinfo->size; + create_args.in.alignment = 0x1000; + create_args.in.domains = boinfo->preferred_domains; + create_args.in.domain_flags = boinfo->alloc_flags; + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &create_args) == -1) { + pr_perror("Error Failed to call create ioctl"); + ret = -1; + goto exit; + } + handle = create_args.out.handle; + + drmPrimeHandleToFD(device_fd, handle, 0, &dmabuf_fd); + } + + change_args.handle = handle; + change_args.new_handle = boinfo->handle; + + if (drmIoctl(fd, DRM_IOCTL_PRIME_CHANGE_GEM_HANDLE, &change_args) == -1) { + pr_perror("Error Failed to call change ioctl"); + ret = -1; + goto exit; + } + + if (!boinfo->is_import) + serve_out_dmabuf_fd(boinfo->handle, dmabuf_fd); + + bo_bucket->dmabuf_fd = dmabuf_fd; + + ret = record_completed_work(boinfo->handle, rd->drm_render_minor); + if (ret) + goto exit; + + mmap_args.in.handle = boinfo->handle; + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) { + pr_perror("Error Failed to call mmap ioctl"); + ret = -1; + goto exit; + } + + for (int j = 0; j < args.num_vms; j++) { + DrmVmEntry *vminfo = rd->vm_entries[j]; + + if (vminfo->gem_handle != boinfo->handle) + continue; + + va_args.handle = boinfo->handle; + va_args.operation = AMDGPU_VA_OP_MAP; + va_args.flags = vminfo->flags; + va_args.va_address = vminfo->start * 0x1000; + va_args.offset_in_bo = vminfo->offset; + va_args.map_size = (vminfo->last - vminfo->start + 1) * 0x1000; + + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_VA, &va_args) == -1) { + pr_perror("Error Failed to call mmap ioctl"); + ret = -1; + goto exit; + } + + } + + ret = save_vma_updates(boinfo->offset, boinfo->addr, mmap_args.out.addr_ptr, fd); + if (ret < 0) + goto exit; + + } + + if (ret) { + pr_info("Error in deinit amdgpu device\n"); + goto exit; + } + + ret = record_completed_work(-1, rd->drm_render_minor); + if (ret) + goto exit; + + ret = amdgpu_device_deinitialize(h_dev); + + if (args.num_bos > 0) { + ret = restore_bo_contents_drm(rd->drm_render_minor, rd->pid, fd, args.num_bos, (struct drm_amdgpu_criu_bo_bucket *)args.bos); + if (ret) + goto exit; + } + + exit: + if (ret < 0) + return ret; + + return retry_needed; +} diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h index 6f0c1a9a6..3dd4499a6 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.h +++ b/plugins/amdgpu/amdgpu_plugin_drm.h @@ -24,5 +24,13 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm); */ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm); +int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd); + +int amdgpu_plugin_drm_unpause_file(int fd); + +int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd); + +int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id); + #endif /* __AMDGPU_PLUGIN_DRM_H__ */ diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c index a165fc9cd..4b3ae0cdd 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.c +++ b/plugins/amdgpu/amdgpu_plugin_util.c @@ -41,6 +41,10 @@ /* Tracks number of device files that need to be checkpointed */ static int dev_file_cnt = 0; +static LIST_HEAD(shared_bos); +static LIST_HEAD(shared_dmabuf_fds); +static LIST_HEAD(completed_work); + /* Helper structures to encode device topology of SRC and DEST platforms */ struct tp_system src_topology; struct tp_system dest_topology; @@ -68,18 +72,121 @@ void init_gpu_count(struct tp_system *topo) dev_file_cnt = 1 + topology_gpu_count(topo); } -int read_fp(FILE *fp, void *buf, const size_t buf_len) -{ - size_t len_read; +bool shared_bo_has_exporter(int handle) { + struct shared_bo *bo; - len_read = fread(buf, 1, buf_len, fp); - if (len_read != buf_len) { - pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len); - return -EIO; + if (handle == -1) { + return false; + } + + list_for_each_entry(bo, &shared_bos, l) { + if (bo->handle == handle) { + return bo->has_exporter; + } + } + + return false; +} + +int record_shared_bo(int handle, bool is_imported) { + struct shared_bo *bo; + + if (handle == -1) + return 0; + + list_for_each_entry(bo, &shared_bos, l) { + if (bo->handle == handle) { + return 0; + } + } + bo = malloc(sizeof(struct shared_bo)); + if (!bo) + return -1; + bo->handle = handle; + bo->has_exporter = !is_imported; + list_add(&bo->l, &shared_bos); + + return 0; +} + +int record_shared_dmabuf_fd(int handle, int dmabuf_fd) { + struct shared_dmabuf *bo; + + bo = malloc(sizeof(struct shared_dmabuf)); + if(!bo) + return -1; + bo->handle = handle; + bo->dmabuf_fd = dmabuf_fd; + list_add(&bo->l, &shared_dmabuf_fds); + + return 0; +} + +int dmabuf_fd_for_handle(int handle) { + struct shared_dmabuf *bo; + + list_for_each_entry(bo, &shared_dmabuf_fds, l) { + if (bo->handle == handle) { + return bo->dmabuf_fd; + } } + + return -1; +} + +int record_completed_work(int handle, int id) { + struct restore_completed_work *work; + + work = malloc(sizeof(struct restore_completed_work)); + if (!work) + return -1; + work->handle = handle; + work->id = id; + list_add(&work->l, &completed_work); + return 0; } +bool work_already_completed(int handle, int id) { + struct restore_completed_work *work; + + list_for_each_entry(work, &completed_work, l) { + if (work->handle == handle && work->id == id) { + return true; + } + } + + return false; +} + +void clear_restore_state() { + while (!list_empty(&shared_dmabuf_fds)) { + struct shared_dmabuf *st = list_first_entry(&shared_dmabuf_fds, struct shared_dmabuf, l); + list_del(&st->l); + close(st->dmabuf_fd); + free(st); + } + + while (!list_empty(&completed_work)) { + struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l); + list_del(&st->l); + free(st); + } +} + +int read_fp(FILE *fp, void *buf, const size_t buf_len) +{ + size_t len_read; + + len_read = fread(buf, 1, buf_len, fp); + if (len_read != buf_len) { + pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len); + return -EIO; + + } + return 0; +} + int write_fp(FILE *fp, const void *buf, const size_t buf_len) { size_t len_write; diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h index aacca3a28..bd23fc6d4 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.h +++ b/plugins/amdgpu/amdgpu_plugin_util.h @@ -1,6 +1,8 @@ #ifndef __AMDGPU_PLUGIN_UTIL_H__ #define __AMDGPU_PLUGIN_UTIL_H__ +#include + #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif @@ -52,7 +54,7 @@ #define IMG_DRM_FILE "amdgpu-renderD-%d.img" /* Name of file having serialized data of DRM device buffer objects (BOs) */ -#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%04x.img" +#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%d-%04x.img" /* Helper macros to Checkpoint and Restore a ROCm file */ #define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem" @@ -73,6 +75,35 @@ enum sdma_op_type { SDMA_OP_VRAM_WRITE, }; +struct dumped_fd { + struct list_head l; + int fd; + bool is_drm; +}; + +struct shared_bo { + struct list_head l; + int handle; + bool has_exporter; +}; + +struct shared_dmabuf { + struct list_head l; + int handle; + int dmabuf_fd; +}; + +struct restore_completed_work { + struct list_head l; + int handle; + int id; +}; + +struct amdgpu_process { + struct list_head l; + int pid; +}; + /* Helper structures to encode device topology of SRC and DEST platforms */ extern struct tp_system src_topology; extern struct tp_system dest_topology; @@ -101,6 +132,23 @@ bool checkpoint_is_complete(); void decrement_checkpoint_count(); void init_gpu_count(struct tp_system *topology); +bool shared_bo_has_exporter(int handle); +int record_shared_bo(int handle, bool is_imported); + +int record_shared_dmabuf_fd(int handle, int dmabuf_fd); +int dmabuf_fd_for_handle(int handle); + +int record_completed_work(int handle, int id); +bool work_already_completed(int handle, int id); + +void clear_restore_state(); + void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list); +int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, + void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, + uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free); + +int serve_out_dmabuf_fd(int handle, int fd); + #endif /* __AMDGPU_PLUGIN_UTIL_H__ */ diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto index 078b67650..8f198410b 100644 --- a/plugins/amdgpu/criu-amdgpu.proto +++ b/plugins/amdgpu/criu-amdgpu.proto @@ -46,6 +46,7 @@ message kfd_bo_entry { required uint64 offset = 3; required uint32 alloc_flags = 4; required uint32 gpu_id = 5; + required uint32 handle = 6; } message criu_kfd { @@ -61,6 +62,30 @@ message criu_kfd { required bytes priv_data = 10; } +message drm_bo_entry { + required uint64 addr = 1; + required uint64 size = 2; + required uint64 offset = 3; + required uint64 alloc_flags = 4; + required uint32 preferred_domains = 5; + required uint32 handle = 6; + required uint32 is_import = 7; +} + +message drm_vm_entry { + required uint64 start = 1; + required uint64 last = 2; + required uint64 offset = 3; + required uint64 flags = 4; + required uint32 gem_handle = 5; +} + message criu_render_node { required uint32 gpu_id = 1; + required uint32 pid = 2; + required uint32 drm_render_minor = 3; + required uint64 num_of_bos = 4; + repeated drm_bo_entry bo_entries = 5; + required uint32 num_of_vms = 6; + repeated drm_vm_entry vm_entries = 7; } -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:36 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:36 -0400 Subject: [CRIU] [PATCH 4/7] plugin/amdgpu: Add amdgpu drm header In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-5-David.Francis@amd.com> For amdgpu plugin to call the new amdgpu drm CRIU ioctls, it needs the amdgpu drm header file, copied from the kernel's includes. Signed-off-by: David Francis --- plugins/amdgpu/amdgpu_drm.h | 1637 +++++++++++++++++++++++++++++++++++ 1 file changed, 1637 insertions(+) create mode 100644 plugins/amdgpu/amdgpu_drm.h diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h new file mode 100644 index 000000000..365c9fb96 --- /dev/null +++ b/plugins/amdgpu/amdgpu_drm.h @@ -0,0 +1,1637 @@ +/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin + * Gareth Hughes + * Keith Whitwell + */ + +#ifndef __AMDGPU_DRM_H__ +#define __AMDGPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_AMDGPU_GEM_CREATE 0x00 +#define DRM_AMDGPU_GEM_MMAP 0x01 +#define DRM_AMDGPU_CTX 0x02 +#define DRM_AMDGPU_BO_LIST 0x03 +#define DRM_AMDGPU_CS 0x04 +#define DRM_AMDGPU_INFO 0x05 +#define DRM_AMDGPU_GEM_METADATA 0x06 +#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 +#define DRM_AMDGPU_GEM_VA 0x08 +#define DRM_AMDGPU_WAIT_CS 0x09 +#define DRM_AMDGPU_GEM_OP 0x10 +#define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 +#define DRM_AMDGPU_CRIU_OP 0x19 + +#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) +#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) +#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) +#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) +#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) +#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) +#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) +#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) +#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) +#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) +#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) +#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) +#define DRM_IOCTL_AMDGPU_CRIU_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_OP, struct drm_amdgpu_criu_args) + +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linearized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. + */ +#define AMDGPU_GEM_DOMAIN_CPU 0x1 +#define AMDGPU_GEM_DOMAIN_GTT 0x2 +#define AMDGPU_GEM_DOMAIN_VRAM 0x4 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 +#define AMDGPU_GEM_DOMAIN_GWS 0x10 +#define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL) + +/* Flag that CPU access will be required for the case of VRAM domain */ +#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) +/* Flag that CPU access will not work, this VRAM domain is invisible */ +#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) +/* Flag that USWC attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. It should never + * be used by user space applications. + */ +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) +/* Flag that BO may contain sensitive data that must be wiped before + * releasing the memory + */ +#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) + +struct drm_amdgpu_gem_create_in { + /** the requested memory size */ + __u64 bo_size; + /** physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; + /** the requested memory domains */ + __u64 domains; + /** allocation flags */ + __u64 domain_flags; +}; + +struct drm_amdgpu_gem_create_out { + /** returned GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +union drm_amdgpu_gem_create { + struct drm_amdgpu_gem_create_in in; + struct drm_amdgpu_gem_create_out out; +}; + +/** Opcode to create new residency list. */ +#define AMDGPU_BO_LIST_OP_CREATE 0 +/** Opcode to destroy previously created residency list */ +#define AMDGPU_BO_LIST_OP_DESTROY 1 +/** Opcode to update resource information in the list */ +#define AMDGPU_BO_LIST_OP_UPDATE 2 + +struct drm_amdgpu_bo_list_in { + /** Type of operation */ + __u32 operation; + /** Handle of list or 0 if we want to create one */ + __u32 list_handle; + /** Number of BOs in list */ + __u32 bo_number; + /** Size of each element describing BO */ + __u32 bo_info_size; + /** Pointer to array describing BOs */ + __u64 bo_info_ptr; +}; + +struct drm_amdgpu_bo_list_entry { + /** Handle of BO */ + __u32 bo_handle; + /** New (if specified) BO priority to be used during migration */ + __u32 bo_priority; +}; + +struct drm_amdgpu_bo_list_out { + /** Handle of resource list */ + __u32 list_handle; + __u32 _pad; +}; + +union drm_amdgpu_bo_list { + struct drm_amdgpu_bo_list_in in; + struct drm_amdgpu_bo_list_out out; +}; + +/* context related */ +#define AMDGPU_CTX_OP_ALLOC_CTX 1 +#define AMDGPU_CTX_OP_FREE_CTX 2 +#define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 + +/* GPU reset status */ +#define AMDGPU_CTX_NO_RESET 0 +/* this the context caused it */ +#define AMDGPU_CTX_GUILTY_RESET 1 +/* some other context caused it */ +#define AMDGPU_CTX_INNOCENT_RESET 2 +/* unknown cause */ +#define AMDGPU_CTX_UNKNOWN_RESET 3 + +/* indicate gpu reset occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) + +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* + * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires + * CAP_SYS_NICE or DRM_MASTER +*/ +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + +struct drm_amdgpu_ctx_in { + /** AMDGPU_CTX_OP_* */ + __u32 op; + /** Flags */ + __u32 flags; + __u32 ctx_id; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; +}; + +union drm_amdgpu_ctx_out { + struct { + __u32 ctx_id; + __u32 _pad; + } alloc; + + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** Number of resets caused by this context so far. */ + __u32 hangs; + /** Reset status since the last call of the ioctl. */ + __u32 reset_status; + } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; +}; + +union drm_amdgpu_ctx { + struct drm_amdgpu_ctx_in in; + union drm_amdgpu_ctx_out out; +}; + +/* user queue IOCTL operations */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue id passed for operation USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated with this userqueue client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + __u32 _pad; + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds IP specific MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). + */ + __u64 mqd_size; +}; + +/* The structure to carry output of userqueue ops */ +struct drm_amdgpu_userq_out { + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ + __u32 queue_id; + __u32 _pad; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 eop_va; +}; + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. + */ + __u64 syncobj_handles; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u64 num_syncobj_handles; + /** + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 syncobj_handles; + /** + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. + */ + __u64 syncobj_timeline_points; + /** + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; + /** + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. + */ + __u64 out_fences; +}; + +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID 2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; + __u32 ctx_id; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) +#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) +#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) +#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_amdgpu_gem_userptr { + __u64 addr; + __u64 size; + /* AMDGPU_GEM_USERPTR_* */ + __u32 flags; + /* Resulting GEM handle */ + __u32 handle; +}; + +/* SI-CI-VI: */ +/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ +#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 +#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf +#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 +#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f +#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 +#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 +#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 +#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 +#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 +#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 +#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 +#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 +#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 +#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 + +/* GFX9 - GFX11: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 + +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression setting for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ + +/* Set/Get helpers for tiling flags. */ +#define AMDGPU_TILING_SET(field, value) \ + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) +#define AMDGPU_TILING_GET(value, field) \ + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + +#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 +#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 + +/** The same structure is shared for input/output */ +struct drm_amdgpu_gem_metadata { + /** GEM Object handle */ + __u32 handle; + /** Do we want get or set metadata */ + __u32 op; + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** family specific tiling info */ + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; + } data; +}; + +struct drm_amdgpu_gem_mmap_in { + /** the GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +struct drm_amdgpu_gem_mmap_out { + /** mmap offset from the vma offset manager */ + __u64 addr_ptr; +}; + +union drm_amdgpu_gem_mmap { + struct drm_amdgpu_gem_mmap_in in; + struct drm_amdgpu_gem_mmap_out out; +}; + +struct drm_amdgpu_gem_wait_idle_in { + /** GEM object handle */ + __u32 handle; + /** For future use, no flags defined so far */ + __u32 flags; + /** Absolute timeout to wait */ + __u64 timeout; +}; + +struct drm_amdgpu_gem_wait_idle_out { + /** BO status: 0 - BO is idle, 1 - BO is busy */ + __u32 status; + /** Returned current memory domain */ + __u32 domain; +}; + +union drm_amdgpu_gem_wait_idle { + struct drm_amdgpu_gem_wait_idle_in in; + struct drm_amdgpu_gem_wait_idle_out out; +}; + +struct drm_amdgpu_wait_cs_in { + /* Command submission handle + * handle equals 0 means none to wait for + * handle equals ~0ull means wait for the latest sequence number + */ + __u64 handle; + /** Absolute timeout to wait */ + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; +}; + +struct drm_amdgpu_wait_cs_out { + /** CS status: 0 - CS completed, 1 - CS still busy */ + __u64 status; +}; + +union drm_amdgpu_wait_cs { + struct drm_amdgpu_wait_cs_in in; + struct drm_amdgpu_wait_cs_out out; +}; + +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + +#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 +#define AMDGPU_GEM_OP_SET_PLACEMENT 1 + +/* Sets or returns a value associated with a buffer. */ +struct drm_amdgpu_gem_op { + /** GEM object handle */ + __u32 handle; + /** AMDGPU_GEM_OP_* */ + __u32 op; + /** Input or return value */ + __u64 value; +}; + +#define AMDGPU_VA_OP_MAP 1 +#define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 + +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + +/* Mapping flags */ +/* readable mapping */ +#define AMDGPU_VM_PAGE_READABLE (1 << 1) +/* writable mapping */ +#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) +/* executable mapping, new for VI */ +#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use Non Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use Write Combine MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use Cache Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UnCached MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use Read Write MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) + +struct drm_amdgpu_gem_va { + /** GEM object handle */ + __u32 handle; + __u32 _pad; + /** AMDGPU_VA_OP_* */ + __u32 operation; + /** AMDGPU_VM_PAGE_* */ + __u32 flags; + /** va address to assign . Must be correctly aligned.*/ + __u64 va_address; + /** Specify offset inside of BO to assign. Must be correctly aligned.*/ + __u64 offset_in_bo; + /** Specify mapping size. Must be correctly aligned. */ + __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; +}; + +#define AMDGPU_HW_IP_GFX 0 +#define AMDGPU_HW_IP_COMPUTE 1 +#define AMDGPU_HW_IP_DMA 2 +#define AMDGPU_HW_IP_UVD 3 +#define AMDGPU_HW_IP_VCE 4 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ +#define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 + +#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 + +#define AMDGPU_CHUNK_ID_IB 0x01 +#define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a + +struct drm_amdgpu_cs_chunk { + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; +}; + +struct drm_amdgpu_cs_in { + /** Rendering context id */ + __u32 ctx_id; + /** Handle of resource list associated with CS */ + __u32 bo_list_handle; + __u32 num_chunks; + __u32 flags; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; +}; + +struct drm_amdgpu_cs_out { + __u64 handle; +}; + +union drm_amdgpu_cs { + struct drm_amdgpu_cs_in in; + struct drm_amdgpu_cs_out out; +}; + +/* Specify flags to be used for IB */ + +/* This IB should be submitted to CE */ +#define AMDGPU_IB_FLAG_CE (1<<0) + +/* Preamble flag, which means the IB could be dropped if no context switch */ +#define AMDGPU_IB_FLAG_PREAMBLE (1<<1) + +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1<<2) + +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + +struct drm_amdgpu_cs_chunk_ib { + __u32 _pad; + /** AMDGPU_IB_FLAG_* */ + __u32 flags; + /** Virtual address to begin IB execution */ + __u64 va_start; + /** Size of submission */ + __u32 ib_bytes; + /** HW IP to submit to */ + __u32 ip_type; + /** HW IP index of the same type to submit to */ + __u32 ip_instance; + /** Ring index to submit to */ + __u32 ring; +}; + +struct drm_amdgpu_cs_chunk_dep { + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; + __u64 handle; +}; + +struct drm_amdgpu_cs_chunk_fence { + __u32 handle; + __u32 offset; +}; + +struct drm_amdgpu_cs_chunk_sem { + __u32 handle; +}; + +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + __u32 pad; + } in; + struct { + __u32 handle; + } out; +}; + +struct drm_amdgpu_cs_chunk_data { + union { + struct drm_amdgpu_cs_chunk_ib ib_data; + struct drm_amdgpu_cs_chunk_fence fence_data; + }; +}; + +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + +/* + * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU + * + */ +#define AMDGPU_IDS_FLAGS_FUSION 0x1 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 +#define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 + +/* indicate if acceleration can be working */ +#define AMDGPU_INFO_ACCEL_WORKING 0x00 +/* get the crtc_id from the mode object id? */ +#define AMDGPU_INFO_CRTC_FROM_ID 0x01 +/* query hw IP info */ +#define AMDGPU_INFO_HW_IP_INFO 0x02 +/* query hw IP instance count for the specified type */ +#define AMDGPU_INFO_HW_IP_COUNT 0x03 +/* timestamp for GL_ARB_timer_query */ +#define AMDGPU_INFO_TIMESTAMP 0x05 +/* Query the firmware version */ +#define AMDGPU_INFO_FW_VERSION 0x0e + /* Subquery id: Query VCE firmware version */ + #define AMDGPU_INFO_FW_VCE 0x1 + /* Subquery id: Query UVD firmware version */ + #define AMDGPU_INFO_FW_UVD 0x2 + /* Subquery id: Query GMC firmware version */ + #define AMDGPU_INFO_FW_GMC 0x03 + /* Subquery id: Query GFX ME firmware version */ + #define AMDGPU_INFO_FW_GFX_ME 0x04 + /* Subquery id: Query GFX PFP firmware version */ + #define AMDGPU_INFO_FW_GFX_PFP 0x05 + /* Subquery id: Query GFX CE firmware version */ + #define AMDGPU_INFO_FW_GFX_CE 0x06 + /* Subquery id: Query GFX RLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC 0x07 + /* Subquery id: Query GFX MEC firmware version */ + #define AMDGPU_INFO_FW_GFX_MEC 0x08 + /* Subquery id: Query SMC firmware version */ + #define AMDGPU_INFO_FW_SMC 0x0a + /* Subquery id: Query SDMA firmware version */ + #define AMDGPU_INFO_FW_SDMA 0x0b + /* Subquery id: Query PSP SOS firmware version */ + #define AMDGPU_INFO_FW_SOS 0x0c + /* Subquery id: Query PSP ASD firmware version */ + #define AMDGPU_INFO_FW_ASD 0x0d + /* Subquery id: Query VCN firmware version */ + #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 + /* Subquery id: Query DMCU firmware version */ + #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 + /* Subquery id: Query DMCUB firmware version */ + #define AMDGPU_INFO_FW_DMCUB 0x14 + /* Subquery id: Query TOC firmware version */ + #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 + /* Subquery id: Query GFX RLCP firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCP 0x17 + /* Subquery id: Query GFX RLCV firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCV 0x18 + /* Subquery id: Query MES_KIQ firmware version */ + #define AMDGPU_INFO_FW_MES_KIQ 0x19 + /* Subquery id: Query MES firmware version */ + #define AMDGPU_INFO_FW_MES 0x1a + /* Subquery id: Query IMU firmware version */ + #define AMDGPU_INFO_FW_IMU 0x1b + /* Subquery id: Query VPE firmware version */ + #define AMDGPU_INFO_FW_VPE 0x1c + +/* number of bytes moved for TTM migration */ +#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f +/* the used VRAM size */ +#define AMDGPU_INFO_VRAM_USAGE 0x10 +/* the used GTT size */ +#define AMDGPU_INFO_GTT_USAGE 0x11 +/* Information about GDS, etc. resource configuration */ +#define AMDGPU_INFO_GDS_CONFIG 0x13 +/* Query information about VRAM and GTT domains */ +#define AMDGPU_INFO_VRAM_GTT 0x14 +/* Query information about register in MMR address space*/ +#define AMDGPU_INFO_READ_MMR_REG 0x15 +/* Query information about device: rev id, family, etc. */ +#define AMDGPU_INFO_DEV_INFO 0x16 +/* visible vram usage */ +#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 +/* Query memory about VRAM and GTT domains */ +#define AMDGPU_INFO_MEMORY 0x19 +/* Query vce clock table */ +#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A +/* Query vbios related information */ +#define AMDGPU_INFO_VBIOS 0x1B + /* Subquery id: Query vbios size */ + #define AMDGPU_INFO_VBIOS_SIZE 0x1 + /* Subquery id: Query vbios image */ + #define AMDGPU_INFO_VBIOS_IMAGE 0x2 + /* Subquery id: Query vbios info */ + #define AMDGPU_INFO_VBIOS_INFO 0x3 +/* Query UVD handles */ +#define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D + /* Subquery id: Query GPU shader clock */ + #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 + /* Subquery id: Query GPU memory clock */ + #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 + /* Subquery id: Query GPU temperature */ + #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 + /* Subquery id: Query GPU load */ + #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 + /* Subquery id: Query average GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 + /* Subquery id: Query northbridge voltage */ + #define AMDGPU_INFO_SENSOR_VDDNB 0x6 + /* Subquery id: Query graphics voltage */ + #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 + /* Subquery id: Query GPU stable pstate shader clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 + /* Subquery id: Query GPU stable pstate memory clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb + /* Subquery id: Query input GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc +/* Number of VRAM page faults on CPU access. */ +#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 + /* Subquery id: Decode */ + #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 + /* Subquery id: Encode */ + #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 + +#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 +#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 +#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff + +struct drm_amdgpu_query_fw { + /** AMDGPU_INFO_FW_* */ + __u32 fw_type; + /** + * Index of the IP if there are more IPs of + * the same type. + */ + __u32 ip_instance; + /** + * Index of the engine. Whether this is used depends + * on the firmware type. (e.g. MEC, SDMA) + */ + __u32 index; + __u32 _pad; +}; + +/* Input structure for the INFO ioctl */ +struct drm_amdgpu_info { + /* Where the return value will be stored */ + __u64 return_pointer; + /* The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write. */ + __u32 return_size; + /* The query request id. */ + __u32 query; + + union { + struct { + __u32 id; + __u32 _pad; + } mode_crtc; + + struct { + /** AMDGPU_HW_IP_* */ + __u32 type; + /** + * Index of the IP if there are more IPs of the same + * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. + */ + __u32 ip_instance; + } query_hw_ip; + + struct { + __u32 dword_offset; + /** number of registers to read */ + __u32 count; + __u32 instance; + /** For future use, no flags defined so far */ + __u32 flags; + } read_mmr_reg; + + struct drm_amdgpu_query_fw query_fw; + + struct { + __u32 type; + __u32 offset; + } vbios_info; + + struct { + __u32 type; + } sensor_info; + + struct { + __u32 type; + } video_cap; + }; +}; + +struct drm_amdgpu_info_gds { + /** GDS GFX partition size */ + __u32 gds_gfx_partition_size; + /** GDS compute partition size */ + __u32 compute_partition_size; + /** total GDS memory size */ + __u32 gds_total_size; + /** GWS size per GFX partition */ + __u32 gws_per_gfx_partition; + /** GSW size per compute partition */ + __u32 gws_per_compute_partition; + /** OA size per GFX partition */ + __u32 oa_per_gfx_partition; + /** OA size per compute partition */ + __u32 oa_per_compute_partition; + __u32 _pad; +}; + +struct drm_amdgpu_info_vram_gtt { + __u64 vram_size; + __u64 vram_cpu_accessible_size; + __u64 gtt_size; +}; + +struct drm_amdgpu_heap_info { + /** max. physical memory */ + __u64 total_heap_size; + + /** Theoretical max. available memory in the given heap */ + __u64 usable_heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + __u64 heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + __u64 max_allocation; +}; + +struct drm_amdgpu_memory_info { + struct drm_amdgpu_heap_info vram; + struct drm_amdgpu_heap_info cpu_accessible_vram; + struct drm_amdgpu_heap_info gtt; +}; + +struct drm_amdgpu_info_firmware { + __u32 ver; + __u32 feature; +}; + +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + +#define AMDGPU_VRAM_TYPE_UNKNOWN 0 +#define AMDGPU_VRAM_TYPE_GDDR1 1 +#define AMDGPU_VRAM_TYPE_DDR2 2 +#define AMDGPU_VRAM_TYPE_GDDR3 3 +#define AMDGPU_VRAM_TYPE_GDDR4 4 +#define AMDGPU_VRAM_TYPE_GDDR5 5 +#define AMDGPU_VRAM_TYPE_HBM 6 +#define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 + +struct drm_amdgpu_info_device { + /** PCI Device ID */ + __u32 device_id; + /** Internal chip revision: A0, A1, etc.) */ + __u32 chip_rev; + __u32 external_rev; + /** Revision id in PCI Config space */ + __u32 pci_rev; + __u32 family; + __u32 num_shader_engines; + __u32 num_shader_arrays_per_engine; + /* in KHz */ + __u32 gpu_counter_freq; + __u64 max_engine_clock; + __u64 max_memory_clock; + /* cu information */ + __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ + __u32 cu_ao_mask; + __u32 cu_bitmap[4][4]; + /** Render backend pipe mask. One render backend is CB+DB. */ + __u32 enabled_rb_pipes_mask; + __u32 num_rb_pipes; + __u32 num_hw_gfx_contexts; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; + __u64 ids_flags; + /** Starting virtual address for UMDs. */ + __u64 virtual_address_offset; + /** The maximum virtual address */ + __u64 virtual_address_max; + /** Required alignment of virtual addresses. */ + __u32 virtual_address_alignment; + /** Page table entry - fragment size */ + __u32 pte_fragment_size; + __u32 gart_page_size; + /** constant engine ram size*/ + __u32 ce_ram_size; + /** video memory type info*/ + __u32 vram_type; + /** video memory bit width*/ + __u32 vram_bit_width; + /* vce harvesting instance */ + __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; + /* gfx10 pa_sc_tile_steering_override */ + __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_hw_ip { + /** Version of h/w IP */ + __u32 hw_ip_version_major; + __u32 hw_ip_version_minor; + /** Capabilities */ + __u64 capabilities_flags; + /** command buffer address start alignment*/ + __u32 ib_start_alignment; + /** command buffer size alignment*/ + __u32 ib_size_alignment; + /** Bitmask of available rings. Bit 0 means ring 0, etc. */ + __u32 available_rings; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; +}; + +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + +struct drm_amdgpu_info_num_handles { + /** Max handles as supported by firmware for UVD */ + __u32 uvd_max_handles; + /** Handles currently in use for UVD */ + __u32 uvd_used_handles; +}; + +#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 + +struct drm_amdgpu_info_vce_clock_table_entry { + /** System clock */ + __u32 sclk; + /** Memory clock */ + __u32 mclk; + /** VCE clock */ + __u32 eclk; + __u32 pad; +}; + +struct drm_amdgpu_info_vce_clock_table { + struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; + __u32 num_valid_entries; + __u32 pad; +}; + +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 + +struct drm_amdgpu_info_video_codec_info { + __u32 valid; + __u32 max_width; + __u32 max_height; + __u32 max_pixels_per_frame; + __u32 max_level; + __u32 pad; +}; + +struct drm_amdgpu_info_video_caps { + struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; +}; + +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + +/* + * Supported GPU families + */ +#define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ +#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ +#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ +#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ +#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ +#define AMDGPU_FAMILY_RV 142 /* Raven */ +#define AMDGPU_FAMILY_NV 143 /* Navi10 */ +#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ + +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; + +/* CRIU ioctl + * + * When checkpointing a process, the CRIU amdgpu plugin will perform: + * 1. INFO op to get information about state that needs to be saved. This + * pauses execution until the checkpoint is done. + * 2. CHECKPOINT op to save state + * + * Restore uses other ioctls. + */ +enum drm_amdgpu_criu_op { + AMDGPU_CRIU_OP_PROCESS_INFO, + AMDGPU_CRIU_OP_CHECKPOINT, +}; + +struct drm_amdgpu_criu_args { + __u64 bos; /* user pointer to bos array */ + __u64 vms; /* user pointer to private data */ + __u32 num_bos; + __u32 num_vms; + __u32 pid; + __u32 op; +}; + +#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT (1 << 0) + +struct drm_amdgpu_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 alloc_flags; + __u32 preferred_domains; + __u32 dmabuf_fd; + __u32 flags; +}; + +struct drm_amdgpu_criu_vm_bucket { + __u64 start; + __u64 last; + __u64 offset; + __u64 flags; + __u32 gem_handle; +}; + +#if defined(__cplusplus) +} +#endif + +#endif -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:39 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:39 -0400 Subject: [CRIU] [PATCH 7/7] plugin: Add DUMP_DEVICE_LATE callback In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-8-David.Francis@amd.com> The amdgpu plugin was counting how many files were checkpointed to determine when it should close the device files. The number of device files is not consistent; a process may have multiple copies of the drm device files open. Instead of doing this counting, add a new callback after all files are checkpointed, so plugins can clean up their resources at an appropriate time. Signed-off-by: David Francis --- criu/cr-dump.c | 3 ++ criu/include/criu-plugin.h | 4 ++- criu/plugin.c | 1 + plugins/amdgpu/amdgpu_plugin.c | 55 ++++++++++++----------------- plugins/amdgpu/amdgpu_plugin_util.c | 41 ++++++++++++--------- plugins/amdgpu/amdgpu_plugin_util.h | 6 ++-- 6 files changed, 57 insertions(+), 53 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 1bc5d934f..34e756c7f 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -2225,6 +2225,9 @@ int cr_dump_tasks(pid_t pid) goto err; } + if(run_plugins(DUMP_DEVICE_LATE, pid)) + goto err; + if (parent_ie) { inventory_entry__free_unpacked(parent_ie, NULL); parent_ie = NULL; diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index aaf4b0b94..b2a3ffce9 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -64,6 +64,8 @@ enum { CR_PLUGIN_HOOK__COLLECT_FILE = 13, + CR_PLUGIN_HOOK__DUMP_DEVICE_LATE = 14, + CR_PLUGIN_HOOK__MAX }; @@ -84,7 +86,7 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__COLLECT_FILE, int pid, int fd); - +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, int id); enum { CR_PLUGIN_STAGE__DUMP, diff --git a/criu/plugin.c b/criu/plugin.c index cfb19e9f0..a0f27616c 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -61,6 +61,7 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path) __assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices"); __assign_hook(RESUME_DEVICES_EARLY, "cr_plugin_resume_devices_early"); __assign_hook(COLLECT_FILE, "cr_plugin_collect_file"); + __assign_hook(DUMP_DEVICE_LATE, "cr_plugin_dump_device_late"); #undef __assign_hook diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index ad66e4659..b39c78175 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -54,13 +54,6 @@ struct vma_metadata { /************************************ Global Variables ********************************************/ -/** - * FD of KFD device used to checkpoint. On a multi-process - * tree the order of checkpointing goes from parent to child - * and so on - so saving the FD will not be overwritten - */ -static int kfd_checkpoint_fd; - static LIST_HEAD(update_vma_info_list); static LIST_HEAD(amdgpu_processes); @@ -1018,28 +1011,34 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha return 0; } -static int unpause_process(int fd) +int amdgpu_unpause_processes(int pid) { int ret = 0; struct kfd_ioctl_criu_args args = { 0 }; + struct list_head *l = get_dumped_fds(); + struct dumped_fd *st; - args.op = KFD_CRIU_OP_UNPAUSE; + list_for_each_entry(st, l, l) { + if (st->is_drm) { + close(st->fd); + } else { + args.op = KFD_CRIU_OP_UNPAUSE; - ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args); - if (ret) { - pr_perror("Failed to unpause process"); - goto exit; + ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args); + if (ret) { + pr_perror("Failed to unpause process"); + goto exit; + } + } } - // Reset the KFD FD - kfd_checkpoint_fd = -1; - sys_close_drm_render_devices(&src_topology); - exit: pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret); + clear_dumped_fds(); return ret; } +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, amdgpu_unpause_processes) static void dmabuf_socket_name_gen(struct sockaddr_un *addr, int *len, int pid) { @@ -1359,9 +1358,6 @@ int amdgpu_plugin_dump_file(int fd, int id) return -1; } - /* Initialize number of device files that will be checkpointed */ - init_gpu_count(&src_topology); - /* Check whether this plugin was called for kfd or render nodes */ if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) { @@ -1373,11 +1369,9 @@ int amdgpu_plugin_dump_file(int fd, int id) if (ret) return ret; - /* Invoke unpause process if needed */ - decrement_checkpoint_count(); - if (checkpoint_is_complete()) { - ret = unpause_process(kfd_checkpoint_fd); - } + ret = record_dumped_fd(fd, true); + if (ret) + return ret; /* Need to return success here so that criu can call plugins for renderD nodes */ return ret; @@ -1475,14 +1469,11 @@ int amdgpu_plugin_dump_file(int fd, int id) xfree(buf); -exit: - /* Restore all queues if conditions permit */ - kfd_checkpoint_fd = fd; - decrement_checkpoint_count(); - if (checkpoint_is_complete()) { - ret = unpause_process(fd); - } + ret = record_dumped_fd(fd, false); + if (ret) + goto exit; +exit: xfree((void *)args.devices); xfree((void *)args.bos); xfree((void *)args.priv_data); diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c index 4b3ae0cdd..b7d6fe2f3 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.c +++ b/plugins/amdgpu/amdgpu_plugin_util.c @@ -38,9 +38,7 @@ #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" -/* Tracks number of device files that need to be checkpointed */ -static int dev_file_cnt = 0; - +static LIST_HEAD(dumped_fds); static LIST_HEAD(shared_bos); static LIST_HEAD(shared_dmabuf_fds); static LIST_HEAD(completed_work); @@ -53,23 +51,23 @@ struct tp_system dest_topology; struct device_maps checkpoint_maps; struct device_maps restore_maps; -bool checkpoint_is_complete() -{ - return (dev_file_cnt == 0); -} +int record_dumped_fd(int fd, bool is_drm) { + int newfd = dup(fd); -void decrement_checkpoint_count() -{ - dev_file_cnt--; -} + if (newfd < 0) + return newfd; + struct dumped_fd *st = malloc(sizeof(struct dumped_fd)); + if (!st) + return -1; + st->fd = newfd; + st->is_drm = is_drm; + list_add(&st->l, &dumped_fds); -void init_gpu_count(struct tp_system *topo) -{ - if (dev_file_cnt != 0) - return; + return 0; +} - /* We add ONE to include checkpointing of KFD device */ - dev_file_cnt = 1 + topology_gpu_count(topo); +struct list_head *get_dumped_fds() { + return &dumped_fds; } bool shared_bo_has_exporter(int handle) { @@ -174,6 +172,15 @@ void clear_restore_state() { } } +void clear_dumped_fds() { + while (!list_empty(&dumped_fds)) { + struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l); + list_del(&st->l); + close(st->fd); + free(st); + } +} + int read_fp(FILE *fp, void *buf, const size_t buf_len) { size_t len_read; diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h index bd23fc6d4..edf0d05f4 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.h +++ b/plugins/amdgpu/amdgpu_plugin_util.h @@ -128,9 +128,9 @@ int read_file(const char *file_path, void *buf, const size_t buf_len); int write_img_file(char *path, const void *buf, const size_t buf_len); FILE *open_img_file(char *path, bool write, size_t *size); -bool checkpoint_is_complete(); -void decrement_checkpoint_count(); -void init_gpu_count(struct tp_system *topology); +int record_dumped_fd(int fd, bool is_drm); +struct list_head *get_dumped_fds(); +void clear_dumped_fds(); bool shared_bo_has_exporter(int handle); int record_shared_bo(int handle, bool is_imported); -- 2.34.1 From David.Francis at amd.com Sat May 17 00:05:37 2025 From: David.Francis at amd.com (David Francis) Date: Fri, 16 May 2025 17:05:37 -0400 Subject: [CRIU] [PATCH 5/7] plugin/amdgpu: Add drm header In-Reply-To: <20250516210539.3537211-1-David.Francis@amd.com> References: <20250516210539.3537211-1-David.Francis@amd.com> Message-ID: <20250516210539.3537211-6-David.Francis@amd.com> The amdgpu plugin usually calls drm ioctls through the libdrm wrappers. However, amdgpu restore requires dealing with dmabufs and gem handles directly, which means drm ioctls must be called directly. Add the drm.h header (from the kernel's uapi). Signed-off-by: David Francis --- plugins/amdgpu/drm.h | 1440 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1440 insertions(+) create mode 100644 plugins/amdgpu/drm.h diff --git a/plugins/amdgpu/drm.h b/plugins/amdgpu/drm.h new file mode 100644 index 000000000..ae701b8f9 --- /dev/null +++ b/plugins/amdgpu/drm.h @@ -0,0 +1,1440 @@ +/* + * Header for the Direct Rendering Manager + * + * Author: Rickard E. (Rik) Faith + * + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#if defined(__KERNEL__) + +#include +#include +typedef unsigned int drm_handle_t; + +#elif defined(__linux__) + +#include +#include +typedef unsigned int drm_handle_t; + +#else /* One of the BSDs */ + +#include +#include +#include +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef size_t __kernel_size_t; +typedef unsigned long drm_handle_t; + +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/* + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/* + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/* + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/* + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/* + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char __user *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char __user *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char __user *desc; /**< User-space buffer to hold desc */ +}; + +/* + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + __kernel_size_t unique_len; /**< Length of unique */ + char __user *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version __user *version; +}; + +struct drm_block { + int unused; +}; + +/* + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/* + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ +}; + +/* + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/* + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/* + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/* + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/* + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/* + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/* + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/* + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/* + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc __user *list; +}; + +/* + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int __user *list; +}; + +/* + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void __user *address; /**< Address of buffer */ +}; + +/* + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void __user *virt; +#else + void __user *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub __user *list; /**< Buffer information */ +}; + +/* + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int __user *send_indices; /**< List of handles to buffers */ + int __user *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int __user *request_indices; /**< Buffer information */ + int __user *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/* + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/* + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx __user *contexts; +}; + +/* + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/* + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { + DRM_DRAWABLE_CLIPRECTS +} drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/* + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/* + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/* + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/* + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/* + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/* + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/* + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/* + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/* + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/* + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +struct drm_prime_change_gem_handle { + __u32 handle; + __u32 new_handle; +}; + +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ +#define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a :ref:`CRTC index` + * in the high bits of &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. + */ +#define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. + */ +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ +#define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 + +/* DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * If set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + +/** + * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS + * + * If set to 1, the DRM core will expose special connectors to be used for + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. + */ +#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 + +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; +}; + +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + + +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + +#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 flags; +}; + + +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence */ + __u64 user_data; /* user data passed to event */ +}; + +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + +#if defined(__cplusplus) +} +#endif + +#include "drm_mode.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) + +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ +#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) +#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) +#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) +#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) +#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) + +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ +#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) + +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + +#define DRM_IOCTL_PRIME_CHANGE_GEM_HANDLE DRM_IOWR(0xD2, struct drm_prime_change_gem_handle) + +/* + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x9f. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ +#define DRM_EVENT_CRTC_SEQUENCE 0x03 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + +/* typedef area */ +#ifndef __KERNEL__ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; +#endif + +#if defined(__cplusplus) +} +#endif + +#endif -- 2.34.1