[Devel] [PATCH] c/r: support ipc shm regions which are partially mapped
Oren Laadan
orenl at cs.columbia.edu
Fri Aug 6 14:18:52 PDT 2010
UNTESTED....
Because do_shmat() has limited interface of all-or-nothing mapping,
previous we did not support VMAs that reflect partiaul mapping to a
shared memory from IPC. For example, if a task calls shmat() and
then munmap() part of that mapping.
Now that Dan Smith hit an application that actually does that, it's
time to fix the situation:
Introduce do_shmat_pgoff() that accepts also @shmsize and @shmpgoff.
If these two are 0, then it behaves like do_shmat(). Otherwise, it
attempts to map only the relevant region within the shared memory
segment.
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
include/linux/shm.h | 3 +++
ipc/shm.c | 50 ++++++++++++++++++++++++++++++++------------------
2 files changed, 35 insertions(+), 18 deletions(-)
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 94ac1a7..1f6af8c 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -105,6 +105,9 @@ struct shmid_kernel /* private to the kernel */
#ifdef CONFIG_SYSVIPC
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr);
+long do_shmat_pgoff(int shmid, char __user *shmaddr,
+ int shmflg, unsigned long *addr,
+ unsigned long shmsize, unsigned long shmpgoff);
extern int is_file_shm_hugepages(struct file *file);
#else
static inline long do_shmat(int shmid, char __user *shmaddr,
diff --git a/ipc/shm.c b/ipc/shm.c
index 5a5dfb5..5336dc9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -323,7 +323,6 @@ int ipcshm_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
{
struct file *file;
int shmid, shmflg = 0;
- mm_segment_t old_fs;
unsigned long start;
unsigned long addr;
int ret;
@@ -341,22 +340,14 @@ int ipcshm_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
if (!(h->vm_flags & VM_WRITE))
shmflg |= SHM_RDONLY;
- /*
- * FIX: do_shmat() has limited interface: all-or-nothing
- * mapping. If the vma, however, reflects a partial mapping
- * then we need to modify that function to accomplish the
- * desired outcome. Partial mapping can exist due to the user
- * call shmat() and then unmapping part of the region.
- * Currently, we at least detect this and call it a foul play.
- */
- if (((h->vm_end - h->vm_start) != h->ino_size) || h->vm_pgoff)
- return -ENOSYS;
+ /* just another sanity check; rest is done by do_shmat */
+ if (h->ino_size != i_size_read(file->f_dentry->d_inode))
+ return -EINVAL;
- old_fs = get_fs();
- set_fs(get_ds());
- start = h->vm_start;
- ret = do_shmat(shmid, (char __user *) start, shmflg, &addr);
- set_fs(old_fs);
+ /* do_shmat() below handles partial and offset mapping */
+ start = (unsigned long) h->vm_start;
+ ret = do_shmat_pgoff(shmid, (char __user *) start, shmflg,
+ &addr, h->vm_end - h->vm_start, h->vm_pgoff);
BUG_ON(ret >= 0 && addr != h->vm_start);
return ret;
@@ -887,11 +878,13 @@ out:
* "raddr" thing points to kernel space, and there has to be a wrapper around
* this.
*/
-long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
+long do_shmat_pgoff(int shmid, char __user *shmaddr, int shmflg,
+ ulong *raddr, ulong shmsize, ulong shmpgoff)
{
struct shmid_kernel *shp;
unsigned long addr;
unsigned long size;
+ unsigned long pgoff;
struct file * file;
int err;
unsigned long flags;
@@ -963,6 +956,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
size = i_size_read(path.dentry->d_inode);
shm_unlock(shp);
+ pgoff = 0;
+
+ err = -EINVAL;
+ if (shmsize) {
+ if (shmpgoff + shmsize > size ||
+ shmpgoff + shmsize < shmpgoff)
+ goto out_put_dentry;
+ size = shmsize;
+ pgoff = shmpgoff;
+ }
+
err = -ENOMEM;
sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
if (!sfd)
@@ -996,7 +1000,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
goto invalid;
}
- user_addr = do_mmap (file, addr, size, prot, flags, 0);
+ user_addr = do_mmap (file, addr, size, prot, flags, pgoff);
*raddr = user_addr;
err = 0;
if (IS_ERR_VALUE(user_addr))
@@ -1032,6 +1036,16 @@ out_put_dentry:
goto out_nattch;
}
+/*
+ * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
+ * "raddr" thing points to kernel space, and there has to be a wrapper around
+ * this.
+ */
+long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
+{
+ return do_shmat_pgoff(shmid, shmaddr, shmflg, raddr, 0, 0);
+}
+
SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
{
unsigned long ret;
--
1.7.0.4
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list