[Devel] [patch 1/1] net namespace : veth management interface
Daniel Lezcano
dlezcano at fr.ibm.com
Mon Feb 19 05:37:45 PST 2007
From: Daniel Lezcano <dlezcano at fr.ibm.com>
The veth module has been modified to be managed from userspace via ioctl.
The temporary /proc/veth_ctl interface has been removed.
Refcounting has been added on the module.
Misc dev is used to register the module.
Mac address is now assigned via ifconfig <interface> hw ether <hwaddr> in
both child and parent namespace.
Usage:
. load veth module
. retrieve in /proc/misc minor number
. mknod /dev/net/veth c 10 <minor>
. unshare with bind_ns in order to assign an identifier
. from the parent namespace use the vethctl program below to add/delete
. (add) vethctl -I <nsid> -v <parent_ifname> -i <child_ifname> -a
. (delete) vethctl -v <parent_ifname> -d
. assign mac address in the child namespace
. assign mac address in the parent namespace
vethctl.c:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
#define VETH_IOC_MAGIC 0x1234
#define VETH_IOC_ADD _IOW(VETH_IOC_MAGIC, 0x1, struct veth_ioc_pair*)
#define VETH_IOC_DEL _IOW(VETH_IOC_MAGIC, 0x2, struct veth_ioc_pair*)
const char *vethname = "/dev/net/veth";
struct veth_ioc_pair {
char parent[IFNAMSIZ];
char child[IFNAMSIZ];
int id;
};
static void usage(const char *name)
{
printf("usage: %s [-h] [-I id] [-i <ifname>] [-v ifname] [-ad]\n", name);
printf("\n");
printf(" -h this message\n");
printf("\n");
printf(" -I <id> add pass-through device to nsproxy <id>\n");
printf(" -v parent interface name\n");
printf(" -i child interface name\n");
printf(" -a add the interface\n");
printf(" -d delete the interface\n");
printf("\n");
printf("(C) Copyright IBM Corp. 2007\n");
printf("\n");
exit(1);
}
int main(int argc, char* argv[])
{
int fd;
struct veth_ioc_pair cmd;
char *veth = NULL;
char *eth = NULL;
char c;
int id = -1;
int add = -1;
while ((c = getopt(argc, argv, "adi:v:I:")) != EOF) {
switch (c) {
case 'I':if (optarg) id = atoi(optarg); break;
case 'i': eth = optarg; break;
case 'v': veth = optarg; break;
case 'a': add = 1; break;
case 'd': add = 0; break;
default: usage(argv[0]);
};
};
if (id == -1)
usage(argv[0]);
if (add == -1)
usage(argv[0]);
if (add) {
if (!veth || !eth)
usage(argv[0]);
} else {
if (!veth)
usage(argv[0]);
}
fd = open(vethname, 0, O_WRONLY);
if (fd == -1) {
perror("open");
return 1;
}
strncpy(cmd.parent, veth, sizeof(cmd.parent));
if (add)
strncpy(cmd.child, eth, sizeof(cmd.parent));
cmd.id = id;
if (ioctl(fd, add?VETH_IOC_ADD:VETH_IOC_DEL, &cmd, sizeof(cmd))) {
perror("ioctl");
return 1;
}
close(fd);
return 0;
}
Signed-off-by: Daniel Lezcano <dlezcano at fr.ibm.com>
---
drivers/net/veth.c | 361 ++++++++++++++++++++++++++++++++-------------------
include/linux/veth.h | 20 ++
kernel/nsproxy.c | 2
3 files changed, 255 insertions(+), 128 deletions(-)
Index: 2.6.20-lxc2/drivers/net/veth.c
===================================================================
--- 2.6.20-lxc2.orig/drivers/net/veth.c
+++ 2.6.20-lxc2/drivers/net/veth.c
@@ -13,6 +13,11 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/veth.h>
#include <net/dst.h>
#include <net/xfrm.h>
@@ -24,6 +29,8 @@
#define veth_from_netdev(dev) ((struct veth_struct *)(netdev_priv(dev)))
+static struct module *veth_module = THIS_MODULE;
+
/* ------------------------------------------------------------------- *
*
* Device functions
@@ -75,12 +82,22 @@
return 0;
}
-static int veth_open(struct net_device *dev)
+static inline int veth_mod_inc_use(void)
+{
+ return try_module_get(veth_module)?0:1;
+}
+
+static inline void veth_mod_dec_use(void)
+{
+ module_put(veth_module);
+}
+
+static int veth_dev_open(struct net_device *dev)
{
return 0;
}
-static int veth_close(struct net_device *dev)
+static int veth_dev_close(struct net_device *dev)
{
return 0;
}
@@ -95,14 +112,25 @@
return &veth_from_netdev(dev)->stats;
}
-int veth_init_dev(struct net_device *dev)
+static int veth_set_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *sa = p;
+
+ if (!is_valid_ether_addr(sa->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+ return 0;
+}
+
+static int veth_init_dev(struct net_device *dev)
{
dev->hard_start_xmit = veth_xmit;
- dev->open = veth_open;
- dev->stop = veth_close;
+ dev->open = veth_dev_open;
+ dev->stop = veth_dev_close;
dev->destructor = veth_destructor;
dev->get_stats = get_stats;
-
+ dev->set_mac_address = veth_set_address;
ether_setup(dev);
dev->tx_queue_len = 0;
@@ -114,6 +142,173 @@
dev->init = veth_init_dev;
}
+static int veth_add(struct veth_ioc_pair *veth_pair)
+{
+ struct net_namespace *child_ns;
+ struct net_namespace *parent_ns;
+ struct net_device *parent_dev;
+ struct net_device *child_dev;
+ struct nsproxy *nsproxy;
+ int err;
+
+ err = -ESRCH;
+ nsproxy = find_nsproxy_by_id(veth_pair->id);
+ if (!nsproxy)
+ goto out;
+
+ child_ns = nsproxy->net_ns;
+ put_nsproxy(nsproxy);
+ get_net_ns(child_ns);
+
+ parent_ns = current_net_ns;
+ get_net_ns(parent_ns);
+
+ err = -EINVAL;
+ if (parent_ns != child_ns->parent)
+ goto out_parent_net_ns;
+
+ err = -ENOMEM;
+ parent_dev = alloc_netdev(sizeof(struct veth_struct),
+ veth_pair->parent, veth_setup);
+ if (!parent_dev)
+ goto out_parent_net_ns;
+
+ push_net_ns(child_ns);
+ child_dev = alloc_netdev(sizeof(struct veth_struct),
+ veth_pair->child, veth_setup);
+ pop_net_ns(parent_ns);
+ if (!child_dev)
+ goto out_parent_dev;
+
+ veth_from_netdev(parent_dev)->pair = child_dev;
+ veth_from_netdev(child_dev)->pair = parent_dev;
+
+ rtnl_lock();
+
+ err = register_netdevice(parent_dev);
+ if (err)
+ goto out_parent_reg;
+
+ push_net_ns(child_ns);
+ err = register_netdevice(child_dev);
+ pop_net_ns(parent_ns);
+ if (err)
+ goto out_child_reg;
+
+ rtnl_unlock();
+
+ err = -EBUSY;
+ if (veth_mod_inc_use())
+ goto out_child_reg;
+
+ err = 0;
+
+out_parent_net_ns:
+ put_net_ns(parent_ns);
+ put_net_ns(child_ns);
+out:
+ return err;
+
+out_child_reg:
+ unregister_netdevice(parent_dev);
+out_parent_reg:
+ rtnl_unlock();
+ free_netdev(child_dev);
+out_parent_dev:
+ free_netdev(parent_dev);
+ goto out_parent_net_ns;
+}
+
+static int veth_del(struct veth_ioc_pair *veth_pair)
+{
+ struct net_device *child_dev;
+ struct net_namespace *parent_ns, *child_ns;
+ struct net_device *parent_dev;
+
+ parent_dev = dev_get_by_name(veth_pair->parent);
+ if (!parent_dev)
+ return -ENODEV;
+
+ rtnl_lock();
+
+ child_dev = veth_from_netdev(parent_dev)->pair;
+ get_net_ns(child_dev->net_ns);
+ child_ns = child_dev->net_ns;
+
+ dev_close(child_dev);
+ synchronize_net();
+
+ /*
+ * Now child_dev does not send or receives anything.
+ * This means child_dev->hard_start_xmit is not called anymore.
+ */
+ unregister_netdevice(parent_dev);
+ /*
+ * At this point child_dev has dead pointer to parent_dev.
+ * But this pointer is not dereferenced.
+ */
+ parent_ns = push_net_ns(child_ns);
+ unregister_netdevice(child_dev);
+
+ dev_put(parent_dev);
+ rtnl_unlock();
+
+ pop_net_ns(parent_ns);
+ put_net_ns(child_ns);
+
+ veth_mod_dec_use();
+ return 0;
+}
+
+static int veth_open(struct inode *i, struct file *f)
+{
+ if (veth_mod_inc_use())
+ return -EBUSY;
+ return 0;
+}
+
+static int veth_release(struct inode *i, struct file *f)
+{
+ veth_mod_dec_use();
+ return 0;
+}
+
+static int veth_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct veth_ioc_pair *veth_pair;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ veth_pair = kmalloc(sizeof(*veth_pair), GFP_KERNEL);
+ if (!veth_pair)
+ return -ENOMEM;
+
+ if (copy_from_user(veth_pair, (void*)arg, sizeof(*veth_pair))) {
+ kfree(veth_pair);
+ return -EFAULT;
+ }
+
+ switch (cmd) {
+ case VETH_IOC_ADD:
+ err = veth_add(veth_pair);
+ break;
+
+ case VETH_IOC_DEL:
+ err = veth_del(veth_pair);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ kfree(veth_pair);
+
+ return err;
+}
+
static inline int is_veth_dev(struct net_device *dev)
{
return dev->init == veth_init_dev;
@@ -246,123 +441,6 @@
/* ------------------------------------------------------------------- *
*
- * Temporary interface to create veth devices
- *
- * ------------------------------------------------------------------- */
-
-#ifdef CONFIG_PROC_FS
-
-static int veth_debug_open(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-static char *parse_addr(char *s, char *addr)
-{
- int i, v;
-
- for (i = 0; i < ETH_ALEN; i++) {
- if (!isxdigit(*s))
- return NULL;
- *addr = 0;
- v = isdigit(*s) ? *s - '0' : toupper(*s) - 'A' + 10;
- s++;
- if (isxdigit(*s)) {
- *addr += v << 16;
- v = isdigit(*s) ? *s - '0' : toupper(*s) - 'A' + 10;
- s++;
- }
- *addr++ += v;
- if (i < ETH_ALEN - 1 && ispunct(*s))
- s++;
- }
- return s;
-}
-
-static ssize_t veth_debug_write(struct file *file, const char __user *user_buf,
- size_t size, loff_t *ppos)
-{
- char buf[128], *s, *parent_name, *child_name;
- char parent_addr[ETH_ALEN], child_addr[ETH_ALEN];
- struct net_namespace *parent_ns, *child_ns;
- int err;
-
- s = buf;
- err = -EINVAL;
- if (size >= sizeof(buf))
- goto out;
- err = -EFAULT;
- if (copy_from_user(buf, user_buf, size))
- goto out;
- buf[size] = 0;
-
- err = -EBADRQC;
- if (!strncmp(buf, "add ", 4)) {
- parent_name = buf + 4;
- if ((s = strchr(parent_name, ' ')) == NULL)
- goto out;
- *s = 0;
- if ((s = parse_addr(s + 1, parent_addr)) == NULL)
- goto out;
- if (!*s)
- goto out;
- child_name = s + 1;
- if ((s = strchr(child_name, ' ')) == NULL)
- goto out;
- *s = 0;
- if ((s = parse_addr(s + 1, child_addr)) == NULL)
- goto out;
-
- get_net_ns(current_net_ns);
- parent_ns = current_net_ns;
- if (*s == ' ') {
- unsigned int id;
- id = simple_strtoul(s + 1, &s, 0);
- err = sys_bind_ns(id, NS_ALL);
- } else
- err = sys_unshare(CLONE_NEWNET2);
- if (err)
- goto out;
- /* after bind_ns() or unshare_ns() namespace is changed */
- get_net_ns(current_net_ns);
- child_ns = current_net_ns;
- err = veth_entry_add(parent_name, parent_addr, parent_ns,
- child_name, child_addr, child_ns);
- if (err) {
- put_net_ns(child_ns);
- put_net_ns(parent_ns);
- } else
- err = size;
- }
-out:
- return err;
-}
-
-static struct file_operations veth_debug_ops = {
- .open = &veth_debug_open,
- .write = &veth_debug_write,
-};
-
-static int veth_debug_create(void)
-{
- proc_net_fops_create("veth_ctl", 0200, &veth_debug_ops);
- return 0;
-}
-
-static void veth_debug_remove(void)
-{
- proc_net_remove("veth_ctl");
-}
-
-#else
-
-static int veth_debug_create(void) { return -1; }
-static void veth_debug_remove(void) { }
-
-#endif
-
-/* ------------------------------------------------------------------- *
- *
* Information in proc
*
* ------------------------------------------------------------------- */
@@ -420,19 +498,46 @@
*
* ------------------------------------------------------------------- */
+static struct file_operations veth_fops = {
+ open: veth_open,
+ release: veth_release,
+ ioctl: veth_ioctl,
+};
+
+static struct miscdevice veth_miscdev =
+{
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "veth",
+ .fops = &veth_fops
+};
+
int __init veth_init(void)
{
- if (veth_debug_create())
- return -EINVAL;
- veth_proc_create();
- return 0;
+ int err;
+
+ err = veth_proc_create();
+ if (err)
+ goto out;
+
+ err = misc_register(&veth_miscdev);
+ if (err < 0)
+ goto out_veth_proc_create;
+
+ err = 0;
+out:
+ return err;
+
+out_veth_proc_create:
+ veth_proc_remove();
+ goto out;
}
void __exit veth_exit(void)
{
- veth_debug_remove();
veth_proc_remove();
veth_entry_del_all();
+
+ misc_deregister(&veth_miscdev);
}
module_init(veth_init)
Index: 2.6.20-lxc2/include/linux/veth.h
===================================================================
--- /dev/null
+++ 2.6.20-lxc2/include/linux/veth.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_VETH_H
+#define _LINUX_VETH_H
+
+#include <linux/if.h>
+
+/* Structure for ioctl */
+
+struct veth_ioc_pair {
+ char parent[IFNAMSIZ];
+ char child[IFNAMSIZ];
+ int id;
+};
+
+/* IOCTL commands */
+
+#define VETH_IOC_MAGIC 0x1234
+#define VETH_IOC_ADD _IOW(VETH_IOC_MAGIC, 0x1, struct veth_ioc_pair*)
+#define VETH_IOC_DEL _IOW(VETH_IOC_MAGIC, 0x2, struct veth_ioc_pair*)
+
+#endif /* _LINUX_VETH_H */
Index: 2.6.20-lxc2/kernel/nsproxy.c
===================================================================
--- 2.6.20-lxc2.orig/kernel/nsproxy.c
+++ 2.6.20-lxc2/kernel/nsproxy.c
@@ -186,6 +186,7 @@
put_net_ns(ns->net_ns);
kfree(ns);
}
+EXPORT_SYMBOL_GPL(free_nsproxy);
struct mnt_namespace *get_task_mnt_ns(struct task_struct *tsk)
{
@@ -233,6 +234,7 @@
return ns;
}
+EXPORT_SYMBOL_GPL(find_nsproxy_by_id);
static int bind_ns(int id, struct nsproxy *ns)
{
--
_______________________________________________
Containers mailing list
Containers at lists.osdl.org
https://lists.osdl.org/mailman/listinfo/containers
More information about the Devel
mailing list