[Devel] Re: [PATCH] An attempt to have an unlimitedly extendable sys_clone
Cedric Le Goater
clg at fr.ibm.com
Tue Jan 15 06:48:13 PST 2008
Cedric Le Goater wrote:
> Pavel Emelyanov wrote:
>> We have one bit in the clone_flags left, so we won't be
>> able to create more namespaces after we make it busy.
>> Besides, for checkpoint/restart jobs we might want to
>> create tasks with pre-defined pids (virtual of course).
>> What else might be required from clone() - nobody knows.
>>
>> This is an attempt to create a extendable API for clone.
>>
>> I use the last bit in the clone_flags for CLONE_NEWCLONE.
>> When set it will denote that the child_tidptr is not a
>> pointer on the tid storage, but the pointer on the struct
>> long_clone_struct which currently looks like this:
>>
>> struct long_clone_arg {
>> int size;
>> };
>>
>> When we want to add a new argument for clone we just put
>> it *at the end of this structure* and adjust the size.
>> The binary compatibility with older long_clone_arg-s is
>> facilitated with the clone_arg_has() macro.
>
> hmm, I wonder how lkml@ will react to this. do we have
> similar apis in the kernel ?
>
>> Sure, we lose the ability to clone tasks with extended
>> argument and the CLONE_CHILD_SETTID/CLEARTID, but do we
>> really need this?
>
> not in the extended clone flag version. I think.
>
>> The same thing is about to be done for unshare - we can
>> add the second argument for it and iff the CLONE_NEWCLONE
>> is specified - try to use it. Binary compatibility with
>> the old ushare will be kept.
>>
>> The new argument is pulled up to the create_new_namespaces
>> so that later we can easily use it w/o sending additional
>> patches.
>>
>> This is a final, but a pre-review patch for sys_clone()
>> that I plan to send to Andrew before we go on developing
>> new namespaces.
>>
>> Made against 2.6.24-rc5-mm1.
>
> The patch looks good and I compiled it and booted on x64 and
> x86_64.
>
> I think we should add the unshare support before sending to
> andrew and also add an extended flag array to show how it will
> be used. I have a mq_namespace patchset pending we could use
> for that and send all together ?
Here's the unshare part if you want to fold that with your patch.
Thanks,
C.
Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>
---
include/linux/nsproxy.h | 2 +-
include/linux/syscalls.h | 2 +-
kernel/fork.c | 19 +++++++++++++++----
kernel/nsproxy.c | 7 ++++---
4 files changed, 21 insertions(+), 9 deletions(-)
Index: 2.6.24-rc5-mm1/include/linux/nsproxy.h
===================================================================
--- 2.6.24-rc5-mm1.orig/include/linux/nsproxy.h
+++ 2.6.24-rc5-mm1/include/linux/nsproxy.h
@@ -68,7 +68,7 @@ void exit_task_namespaces(struct task_st
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
void free_nsproxy(struct nsproxy *ns);
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
- struct fs_struct *);
+ struct fs_struct *, struct long_clone_arg *carg);
static inline void put_nsproxy(struct nsproxy *ns)
{
Index: 2.6.24-rc5-mm1/include/linux/syscalls.h
===================================================================
--- 2.6.24-rc5-mm1.orig/include/linux/syscalls.h
+++ 2.6.24-rc5-mm1/include/linux/syscalls.h
@@ -585,7 +585,7 @@ asmlinkage long compat_sys_newfstatat(un
int flag);
asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
int flags, int mode);
-asmlinkage long sys_unshare(unsigned long unshare_flags);
+asmlinkage long sys_unshare(unsigned long unshare_flags, int __user *flagptr);
asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
int fd_out, loff_t __user *off_out,
Index: 2.6.24-rc5-mm1/kernel/fork.c
===================================================================
--- 2.6.24-rc5-mm1.orig/kernel/fork.c
+++ 2.6.24-rc5-mm1/kernel/fork.c
@@ -1700,7 +1700,7 @@ static int unshare_semundo(unsigned long
* constructed. Here we are modifying the current, active,
* task_struct.
*/
-asmlinkage long sys_unshare(unsigned long unshare_flags)
+asmlinkage long sys_unshare(unsigned long unshare_flags, int __user *flagptr)
{
int err = 0;
struct fs_struct *fs, *new_fs = NULL;
@@ -1709,6 +1709,7 @@ asmlinkage long sys_unshare(unsigned lon
struct files_struct *fd, *new_fd = NULL;
struct sem_undo_list *new_ulist = NULL;
struct nsproxy *new_nsproxy = NULL;
+ struct long_clone_arg *carg = NULL;
check_unshare_flags(&unshare_flags);
@@ -1717,11 +1718,19 @@ asmlinkage long sys_unshare(unsigned lon
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
- CLONE_NEWNET))
+ CLONE_NEWNET|CLONE_NEWCLONE))
goto bad_unshare_out;
+ if (unshare_flags & CLONE_NEWCLONE) {
+ carg = get_long_clone_arg(flagptr);
+ if (IS_ERR(carg)) {
+ err = PTR_ERR(carg);
+ goto bad_unshare_out;
+ }
+ }
+
if ((err = unshare_thread(unshare_flags)))
- goto bad_unshare_out;
+ goto bad_unshare_cleanup_carg;
if ((err = unshare_fs(unshare_flags, &new_fs)))
goto bad_unshare_cleanup_thread;
if ((err = unshare_sighand(unshare_flags, &new_sigh)))
@@ -1733,7 +1742,7 @@ asmlinkage long sys_unshare(unsigned lon
if ((err = unshare_semundo(unshare_flags, &new_ulist)))
goto bad_unshare_cleanup_fd;
if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
- new_fs)))
+ new_fs, carg)))
goto bad_unshare_cleanup_semundo;
if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
@@ -1791,6 +1800,8 @@ bad_unshare_cleanup_fs:
put_fs_struct(new_fs);
bad_unshare_cleanup_thread:
+bad_unshare_cleanup_carg:
+ kfree(carg);
bad_unshare_out:
return err;
}
Index: 2.6.24-rc5-mm1/kernel/nsproxy.c
===================================================================
--- 2.6.24-rc5-mm1.orig/kernel/nsproxy.c
+++ 2.6.24-rc5-mm1/kernel/nsproxy.c
@@ -182,19 +182,20 @@ void free_nsproxy(struct nsproxy *ns)
* On success, returns the new nsproxy.
*/
int unshare_nsproxy_namespaces(unsigned long unshare_flags,
- struct nsproxy **new_nsp, struct fs_struct *new_fs)
+ struct nsproxy **new_nsp, struct fs_struct *new_fs,
+ struct long_clone_arg *carg)
{
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER | CLONE_NEWNET)))
+ CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWCLONE)))
return 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
*new_nsp = create_new_namespaces(unshare_flags, current,
- new_fs ? new_fs : current->fs, NULL);
+ new_fs ? new_fs : current->fs, carg);
if (IS_ERR(*new_nsp)) {
err = PTR_ERR(*new_nsp);
goto out;
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list