[Devel] [RFC][PATCH] clone_with_pids()^w eclone() for x86_64
Dave Hansen
dave at linux.vnet.ibm.com
Wed Nov 18 16:48:38 PST 2009
This is still a bit rough, but I figured I'd post it for kicks.
Most of the process.c stuff is copy-n-paste with i386 and needs
to get consolidated. I also need to give this the new name.
I'd appreciate anybody that knows inline assembly well to make
sure that I'm not being a complete doofus with this call below.
This seems to work, but I'm not confident it is the best way.
int clone_with_pids(long flags_low, struct clone_args *clone_args, long args_size,
int *pids)
{
long retval;
__asm__ __volatile__(
"movq %3, %%r10\n\t" /* pids in r10*/
"pushq %%rbp\n\t" /* save value of ebp */
:
:"D" (flags_low), /* rdi */
"S" (clone_args),/* rsi */
"d" (args_size), /* rdx */
"a" (pids) /* use rax, which gets moved to r10 */
);
__asm__ __volatile__(
"syscall\n\t" /* Linux/x86_64 system call */
"testq %0,%0\n\t" /* check return value */
"jne 1f\n\t" /* jump if parent */
"popq %%rbx\n\t" /* get subthread function */
"call *%%rbx\n\t" /* start subthread function */
"movq %2,%0\n\t"
"syscall\n" /* exit system call: exit subthread */
"1:\n\t"
"popq %%rbp\t" /* restore parent's ebp */
:"=a" (retval)
:"0" (__NR_clone3), "i" (__NR_exit)
:"ebx", "ecx", "edx"
);
if (retval < 0) {
errno = -retval;
retval = -1;
}
return retval;
}
---
linux-2.6.git-dave/arch/x86/include/asm/syscalls.h | 5 ++
linux-2.6.git-dave/arch/x86/include/asm/unistd_64.h | 2
linux-2.6.git-dave/arch/x86/kernel/entry_64.S | 8 +++
linux-2.6.git-dave/arch/x86/kernel/process_64.c | 49 ++++++++++++++++++++
linux-2.6.git-dave/kernel/fork.c | 18 +++++++
5 files changed, 82 insertions(+)
diff -puN arch/x86/include/asm/syscalls.h~cwp-x86_64 arch/x86/include/asm/syscalls.h
--- linux-2.6.git/arch/x86/include/asm/syscalls.h~cwp-x86_64 2009-11-18 16:37:09.000000000 -0800
+++ linux-2.6.git-dave/arch/x86/include/asm/syscalls.h 2009-11-18 16:37:09.000000000 -0800
@@ -78,6 +78,11 @@ asmlinkage long sys_iopl(unsigned int, s
asmlinkage long sys_clone(unsigned long, unsigned long,
void __user *, void __user *,
struct pt_regs *);
+asmlinkage long sys_clone_with_pids(unsigned int flags_low,
+ struct clone_args * __user cargs,
+ int cargs_size,
+ pid_t * __user pids,
+ struct pt_regs *pt_regs);
asmlinkage long sys_execve(char __user *, char __user * __user *,
char __user * __user *,
struct pt_regs *);
diff -puN arch/x86/include/asm/unistd_64.h~cwp-x86_64 arch/x86/include/asm/unistd_64.h
--- linux-2.6.git/arch/x86/include/asm/unistd_64.h~cwp-x86_64 2009-11-18 16:37:09.000000000 -0800
+++ linux-2.6.git-dave/arch/x86/include/asm/unistd_64.h 2009-11-18 16:37:09.000000000 -0800
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_counter_open 298
__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_clone_with_pids 299
+__SYSCALL(__NR_clone_with_pids, stub_clone_with_pids)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff -puN arch/x86/kernel/entry_64.S~cwp-x86_64 arch/x86/kernel/entry_64.S
--- linux-2.6.git/arch/x86/kernel/entry_64.S~cwp-x86_64 2009-11-18 16:37:09.000000000 -0800
+++ linux-2.6.git-dave/arch/x86/kernel/entry_64.S 2009-11-18 16:37:09.000000000 -0800
@@ -684,6 +684,13 @@ END(system_call)
/*
* Certain special system calls that need to save a complete full stack frame.
+ *
+ * 'arg' should be the register that pt_regs will show up in when
+ * 'func' is called. Using normal calling conventions, this is:
+ *
+ * func(%rdi, %rsi, %rdx, %rcx, %r8, %r9)
+ *
+ * So, if you want pt_regs as the third argument, use %rdx.
*/
.macro PTREGSCALL label,func,arg
ENTRY(\label)
@@ -704,6 +711,7 @@ END(\label)
PTREGSCALL stub_vfork, sys_vfork, %rdi
PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
PTREGSCALL stub_iopl, sys_iopl, %rsi
+ PTREGSCALL stub_clone_with_pids, sys_clone_with_pids, %r8
ENTRY(ptregscall_common)
DEFAULT_FRAME 1 8 /* offset 8: return address */
diff -puN arch/x86/kernel/process_64.c~cwp-x86_64 arch/x86/kernel/process_64.c
--- linux-2.6.git/arch/x86/kernel/process_64.c~cwp-x86_64 2009-11-18 16:37:09.000000000 -0800
+++ linux-2.6.git-dave/arch/x86/kernel/process_64.c 2009-11-18 16:37:09.000000000 -0800
@@ -534,6 +534,55 @@ sys_clone(unsigned long clone_flags, uns
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
+asmlinkage long
+sys_clone_with_pids(unsigned int flags_low, struct clone_args * __user cargs,
+ int args_size, pid_t * __user pids, struct pt_regs *pt_regs)
+{
+ int rc;
+ struct clone_args kca;
+ unsigned long flags;
+ int __user *parent_tid_ptr;
+ int __user *child_tid_ptr;
+ unsigned long __user child_stack;
+ unsigned long stack_size;
+
+ printk("%s() 0\n", __func__);
+ rc = fetch_clone_args_from_user(cargs, args_size, &kca);
+ if (rc) {
+ printk("%s() 1\n", __func__);
+ return rc;
+ }
+
+ /*
+ * TODO: Convert 'clone-flags' to 64-bits on all architectures.
+ * TODO: When ->clone_flags_high is non-zero, copy it in to the
+ * higher word(s) of 'flags':
+ *
+ * flags = (kca.clone_flags_high << 32) | flags_low;
+ */
+ printk("%s() 2\n", __func__);
+ flags = flags_low;
+ parent_tid_ptr = (int *)kca.parent_tid_ptr;
+ child_tid_ptr = (int *)kca.child_tid_ptr;
+
+ printk("%s() 3\n", __func__);
+ stack_size = (unsigned long)kca.child_stack_size;
+ child_stack = (unsigned long)kca.child_stack_base + stack_size;
+
+ printk("%s() 4\n", __func__);
+ if (!child_stack)
+ child_stack = pt_regs->sp;
+ printk("%s() 5\n", __func__);
+
+ /*
+ * TODO: On 32-bit systems, clone_flags is passed in as 32-bit value
+ * to several functions. Need to convert clone_flags to 64-bit.
+ */
+ return do_fork_with_pids(flags, child_stack, pt_regs, stack_size,
+ parent_tid_ptr, child_tid_ptr, kca.nr_pids,
+ pids);
+}
+
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
diff -puN kernel/fork.c~cwp-x86_64 kernel/fork.c
--- linux-2.6.git/kernel/fork.c~cwp-x86_64 2009-11-18 16:37:09.000000000 -0800
+++ linux-2.6.git-dave/kernel/fork.c 2009-11-18 16:37:09.000000000 -0800
@@ -1359,8 +1359,10 @@ static pid_t *copy_target_pids(int unum_
if (!unum_pids)
return NULL;
+ printk("%s(%d, %p) 0\n", __func__, unum_pids, upids);
knum_pids = task_pid(current)->level + 1;
+ printk("%s(%d, %p) knum_pids: %d\n", __func__, unum_pids, upids, knum_pids);
if (unum_pids > knum_pids)
return ERR_PTR(-EINVAL);
@@ -1407,6 +1409,7 @@ static pid_t *copy_target_pids(int unum_
size = unum_pids * sizeof(pid_t);
rc = copy_from_user(&target_pids[j], upids, size);
+ printk("%s() copy(%p, %p, %d) rc: %d\n", __func__, &target_pids[j], upids, size, rc);
if (rc) {
rc = -EFAULT;
goto out_free;
@@ -1467,6 +1470,8 @@ long do_fork_with_pids(unsigned long clo
long nr;
pid_t *target_pids;
+ if (upids)
+ printk("%s() 0\n", __func__);
/*
* Do some preliminary argument and permissions checking before we
* actually start allocating stuff
@@ -1482,6 +1487,8 @@ long do_fork_with_pids(unsigned long clo
return -EPERM;
}
+ if (upids)
+ printk("%s() 1\n", __func__);
/*
* We hope to recycle these flags after 2.6.26
*/
@@ -1501,6 +1508,7 @@ long do_fork_with_pids(unsigned long clo
target_pids = copy_target_pids(num_pids, upids);
if (target_pids) {
+ printk("%s() 1a\n", __func__);
if (IS_ERR(target_pids))
return PTR_ERR(target_pids);
@@ -1509,6 +1517,8 @@ long do_fork_with_pids(unsigned long clo
goto out_free;
}
+ if (upids)
+ printk("%s() 2\n", __func__);
/*
* When called from kernel_thread, don't do user tracing stuff.
*/
@@ -1517,12 +1527,16 @@ long do_fork_with_pids(unsigned long clo
p = copy_process(clone_flags, stack_start, regs, stack_size,
child_tidptr, NULL, target_pids, trace);
+ if (upids)
+ printk("%s() 3\n", __func__);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
struct completion vfork;
+ if (upids)
+ printk("%s() 4\n", __func__);
trace_sched_process_fork(current, p);
@@ -1571,9 +1585,13 @@ long do_fork_with_pids(unsigned long clo
nr = PTR_ERR(p);
}
+ if (upids)
+ printk("%s() 5\n", __func__);
out_free:
kfree(target_pids);
+ if (upids)
+ printk("%s() 6\n", __func__);
return nr;
}
_
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list