[Devel] Re: [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions
Oren Laadan
orenl at cs.columbia.edu
Thu Sep 16 17:40:35 PDT 2010
On 09/14/2010 04:02 PM, Nathan Lynch wrote:
> Large page-backed shm regions require special handling, especially
> during restart. The association of a large page with a shm region's
> inode can occur only in the context of a process causing a fault with
> the region mapped into its mm. In order to restore that association,
> temporarily shmat-attach the restored SHM_HUGETLB region to the
> restarting process's mm, using the just-restored ipc namespace
> instead of the current one (the nsproxy switch hasn't occured yet).
>
> Since the temporary shmat of the region during restart causes some of
> the shm attributes to be updated, re-restore them from the ipc_shm
> checkpoint header after unmapping.
Would it work to just move the original call to load_ipc_shm_hdr()
further down in restore_ipc_shm(), especially since the mutex is
not needed anymore - that way you don't need to re-restore them ?
I'm not too familiar with HUGETLB code otherwise, so hoping that
others review those parts while I find time to study it ...
Thanks,
Oren.
>
> Signed-off-by: Nathan Lynch <ntl at pobox.com>
> ---
> ipc/checkpoint_shm.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 142 insertions(+), 12 deletions(-)
>
> diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
> index 69ba35a..7f9d701 100644
> --- a/ipc/checkpoint_shm.c
> +++ b/ipc/checkpoint_shm.c
> @@ -32,6 +32,69 @@
> * ipc checkpoint
> */
>
> +#define CKPT_HDR_HPAGE_LAST ~(0UL)
> +static bool ckpt_hdr_hpage_last(const struct ckpt_hdr_hpage *hdr)
> +{
> + return hdr->index == CKPT_HDR_HPAGE_LAST;
> +}
> +
> +static void ckpt_hdr_hpage_init(struct ckpt_hdr_hpage *hdr, unsigned long shift)
> +{
> + hdr->h.type = CKPT_HDR_HPAGE;
> + hdr->h.len = sizeof(struct ckpt_hdr_hpage);
> + hdr->shift = shift;
> + hdr->index = 0; /* to be filled in by user */
> +}
> +
> +static int shm_hugetlb_checkpoint_contents(struct ckpt_ctx *ctx, struct file *filp)
> +{
> + struct hstate *h = hstate_file(filp);
> + struct address_space *mapping = filp->f_mapping;
> + struct inode *inode = mapping->host;
> + struct ckpt_hdr_hpage hdr;
> + unsigned long end_index;
> + unsigned long index;
> + ssize_t retval = 0;
> + loff_t isize;
> +
> + isize = i_size_read(inode);
> + if (isize == 0)
> + goto out;
> +
> + end_index = (isize - 1) >> huge_page_shift(h);
> +
> + ckpt_hdr_hpage_init(&hdr, huge_page_shift(h));
> +
> + for (index = 0; index < end_index + 1; index++) {
> + struct page *page;
> +
> + page = find_get_page(mapping, index);
> +
> + /* skip holes */
> + if (!page)
> + continue;
> +
> + hdr.index = index;
> +
> + retval = ckpt_write_obj(ctx, &hdr.h);
> + if (retval < 0)
> + goto release;
> +
> + retval = hugetlb_checkpoint_page(ctx, page);
> +release:
> + page_cache_release(page);
> + if (retval < 0)
> + break;
> + }
> +
> + if (retval < 0)
> + goto out;
> + hdr.index = CKPT_HDR_HPAGE_LAST;
> + retval = ckpt_write_obj(ctx, &hdr.h);
> +out:
> + return retval;
> +}
> +
> /* called with the msgids->rw_mutex is read-held */
> static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
> struct ckpt_hdr_ipc_shm *h,
> @@ -59,10 +122,8 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>
> h->flags = 0;
>
> - /* check if shm was setup with SHM_HUGETLB (unsupported yet) */
> if (is_file_hugepages(shp->shm_file)) {
> - pr_warning("c/r: unsupported SHM_HUGETLB\n");
> - ret = -ENOSYS;
> + h->flags |= SHM_HUGETLB;
> } else {
> struct shmem_inode_info *info;
>
> @@ -117,7 +178,10 @@ int checkpoint_ipc_shm(int id, void *p, void *data)
> if (ret < 0)
> goto out;
>
> - ret = checkpoint_memory_contents(ctx, NULL, inode);
> + if (is_file_hugepages(shp->shm_file))
> + ret = shm_hugetlb_checkpoint_contents(ctx, shp->shm_file);
> + else
> + ret = checkpoint_memory_contents(ctx, NULL, inode);
> out:
> ckpt_hdr_put(ctx, h);
> return ret;
> @@ -149,6 +213,75 @@ struct dq_ipcshm_del {
> int id;
> };
>
> +static void __load_ipc_shm_hdr(const struct ckpt_hdr_ipc_shm *h, struct shmid_kernel *shp)
> +{
> + shp->shm_atim = h->shm_atim;
> + shp->shm_dtim = h->shm_dtim;
> + shp->shm_ctim = h->shm_ctim;
> + shp->shm_cprid = h->shm_cprid;
> + shp->shm_lprid = h->shm_lprid;
> +}
> +
> +static int shm_hugetlb_restore_contents(struct ckpt_ctx *ctx, struct ipc_namespace *ipcns, struct shmid_kernel *shp, const struct ckpt_hdr_ipc_shm *hdr)
> +{
> + unsigned long start;
> + int ret;
> +
> + ret = do_shmat_ns_pgoff(ipcns, shp->shm_perm.id, (char __user *)0,
> + 0, &start, 0, 0);
> + if (ret != 0)
> + return ret;
> +
> + ckpt_debug("temporarily using %#lx for huge shm restore\n", start);
> +
> + while (1) {
> + struct ckpt_hdr_hpage *hdr;
> + unsigned long hpagesize;
> + unsigned long index;
> + unsigned long addr;
> + struct page *page;
> + bool last;
> +
> + hdr = ckpt_read_obj_type(ctx, sizeof(*hdr), CKPT_HDR_HPAGE);
> + if (IS_ERR(hdr)) {
> + ret = PTR_ERR(hdr);
> + break;
> + }
> +
> + last = ckpt_hdr_hpage_last(hdr);
> + index = (unsigned long)hdr->index;
> + hpagesize = 1UL << hdr->shift;
> +
> + ckpt_hdr_put(ctx, hdr);
> +
> + if (last)
> + break;
> +
> + addr = start + (hpagesize * index);
> +
> + down_read(¤t->mm->mmap_sem);
> + ret = get_user_pages(current, current->mm, addr, 1, 1, 1,
> + &page, NULL);
> + up_read(¤t->mm->mmap_sem);
> +
> + if (ret < 0)
> + break;
> +
> + ret = hugetlb_restore_page(ctx, page);
> +
> + page_cache_release(page);
> +
> + if (ret < 0)
> + break;
> + }
> +
> + sys_shmdt((void __user *)start);
> +
> + __load_ipc_shm_hdr(hdr, shp);
> +
> + return ret;
> +}
> +
> static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
> {
> mm_segment_t old_fs;
> @@ -190,11 +323,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
> if (h->shm_cprid < 0 || h->shm_lprid < 0)
> return -EINVAL;
>
> - shp->shm_atim = h->shm_atim;
> - shp->shm_dtim = h->shm_dtim;
> - shp->shm_ctim = h->shm_ctim;
> - shp->shm_cprid = h->shm_cprid;
> - shp->shm_lprid = h->shm_lprid;
> + __load_ipc_shm_hdr(h, shp);
>
> return 0;
> }
> @@ -224,8 +353,6 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> ret = -ENOSYS;
> if (h->mlock_uid != (unsigned int) -1) /* FIXME: support SHM_LOCK */
> goto out;
> - if (h->flags & SHM_HUGETLB) /* FIXME: support SHM_HUGETLB */
> - goto out;
>
> shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
> ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> @@ -294,7 +421,10 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> ret = ckpt_obj_insert(ctx, file, h->objref, CKPT_OBJ_FILE);
> if (ret < 0)
> goto fput;
> - ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> + if (is_file_hugepages(file))
> + ret = shm_hugetlb_restore_contents(ctx, ns, shp, h);
> + else
> + ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> fput:
> fput(file);
>
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list