[Debian] Re: lenny updates (networking)

Kir Kolyshkin kir at openvz.org
Mon Mar 9 19:54:04 EDT 2009


Kir Kolyshkin wrote:
> I am currently checking all the ~80 patches that are not in openvz 
> lenny kernel. Looks like most are really needed. Let me suggest some 
> in a few emails I will send as a reply to this one.
Some networking fixes. Networking changed much in 2.6.26 (net namespaces 
etc.) so some fixes came out later.


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=24cebf40278cb071ff8b5671b03c763f0f74b5ec
netns: add support for net namespace in igmp code
Fixes networking lock up while dealing with multicast traffic. Backport 
from mainstream.
OpenVZ bug #992 (http://bugzilla.openvz.org/992)
Could be an ABI breaker.
Attached as 0013*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=849af42466bed078e6953a4eeeff28c81f64a983
[UB]: Double free for UDP socket
Found by internal testing. Not an ABI breaker.
Attached as 0015*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=b6133ea5860a6c549065be5eaca57244ac8ccc92
Removes a compilation warning. Very trivial :)
Attached as 0030*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=9baf6095c98f930e02769b09addbd4b5f18772d5
Simplify call __dev_change_net_namespace() by remove parameters.
Related to OpenVZ bug #1044, prerequisite to the next patch.
Attached as 0040*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=35f41f111afc1a9f024153ac43d8d829a894fb2b
Adjust VE before call netdev_unregister_kobject/netdev_register_kobject
Fix for OpenVZ bug #1044 (http://bugzilla.openvz.org/1044)
Attached as 0041*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=ce67d5b4cc85fa0c6a6d226d436276ab307ae041
iptables: setup init iptables mask before net initialization
Trivial fix for IPv6 iptables in container. Not an ABI breaker.
Attached as 0042*


http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=fffc6ffba65ec0b12aeb89f2e4a448785298aa75
net: set ve context when init/exit method is called
Attached as 0043*

http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=8a951e3f434541143a639dd529a504d343d28cc7
tun: mark tun/tap devices with NETIF_F_VIRTUAL flag
Fix for OpenVZ bug #1145 (http://bugzilla.openvz.org/1145)
Attached as 0062*

http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=5c591aeb2a194a9554b0cf0bd3959d8c18fa5129
bridge: don't leak master device on brctl addif
Fix for OpenVZ bug #1145 (http://bugzilla.openvz.org/1145)
Attached as 0063*

http://git.openvz.org/?p=linux-2.6.26-openvz;a=commitdiff;h=c578262d8816d27ab5530696d7b5f1e102e3b977
net: NETIF_F_VIRTUAL intersects with NETIF_F_LRO
Fix for OpenVZ bug #1145 (http://bugzilla.openvz.org/1145)
Attached as 0064*

-------------- next part --------------
>From 24cebf40278cb071ff8b5671b03c763f0f74b5ec Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul at openvz.org>
Date: Thu, 4 Sep 2008 11:31:43 +0400
Subject: [PATCH] netns: add support for net namespace in igmp code

This is a port of two patches from Eric W. Biederman:
[PATCH] netns: Only route multicast trafic in init_net
[PATCH] netns: Teach the igmp code to handle multiple namespaces

Without them this code locked hard in processing incoming mc traffic
http://bugzilla.openvz.org/show_bug.cgi?id=992

Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/ipv4/igmp.c |   86 ++++++++++++++++++------------------------------------
 1 files changed, 29 insertions(+), 57 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 03a7004..22fd1c1 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,6 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	struct rtable *rt;
 	struct iphdr *pip;
 	struct igmpv3_report *pig;
+	struct net *net = dev_net(dev);
 
 	skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
@@ -302,7 +303,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 				    .nl_u = { .ip4_u = {
 				    .daddr = IGMPV3_ALL_MCR } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
+		if (ip_route_output_key(net, &rt, &fl)) {
 			kfree_skb(skb);
 			return NULL;
 		}
@@ -632,6 +633,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	struct igmphdr *ih;
 	struct rtable *rt;
 	struct net_device *dev = in_dev->dev;
+	struct net *net = dev_net(dev);
 	__be32	group = pmc ? pmc->multiaddr : 0;
 	__be32	dst;
 
@@ -646,7 +648,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		struct flowi fl = { .oif = dev->ifindex,
 				    .nl_u = { .ip4_u = { .daddr = dst } },
 				    .proto = IPPROTO_IGMP };
-		if (ip_route_output_key(&init_net, &rt, &fl))
+		if (ip_route_output_key(net, &rt, &fl))
 			return -1;
 	}
 	if (rt->rt_src == 0) {
@@ -1199,9 +1201,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	for (im=in_dev->mc_list; im; im=im->next) {
 		if (im->multiaddr == addr) {
 			im->users++;
@@ -1281,9 +1280,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
 		if (i->multiaddr==addr) {
 			if (--i->users == 0) {
@@ -1311,9 +1307,6 @@ void ip_mc_down(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	for (i=in_dev->mc_list; i; i=i->next)
 		igmp_group_dropped(i);
 
@@ -1334,9 +1327,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
 {
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	in_dev->mc_tomb = NULL;
 #ifdef CONFIG_IP_MULTICAST
 	in_dev->mr_gq_running = 0;
@@ -1360,9 +1350,6 @@ void ip_mc_up(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for (i=in_dev->mc_list; i; i=i->next)
@@ -1379,9 +1366,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
-	if (dev_net(in_dev->dev) != &init_net)
-		return;
-
 	/* Deactivate timers */
 	ip_mc_down(in_dev);
 
@@ -1398,7 +1382,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	write_unlock_bh(&in_dev->mc_list_lock);
 }
 
-static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+static struct in_device * ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 {
 	struct flowi fl = { .nl_u = { .ip4_u =
 				      { .daddr = imr->imr_multiaddr.s_addr } } };
@@ -1407,19 +1391,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
 	struct in_device *idev = NULL;
 
 	if (imr->imr_ifindex) {
-		idev = inetdev_by_index(&init_net, imr->imr_ifindex);
+		idev = inetdev_by_index(net, imr->imr_ifindex);
 		if (idev)
 			__in_dev_put(idev);
 		return idev;
 	}
 	if (imr->imr_address.s_addr) {
-		dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
+		dev = ip_dev_find(net, imr->imr_address.s_addr);
 		if (!dev)
 			return NULL;
 		dev_put(dev);
 	}
 
-	if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) {
+	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
 		dev = rt->u.dst.dev;
 		ip_rt_put(rt);
 	}
@@ -1757,18 +1741,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	struct ip_mc_socklist *iml=NULL, *i;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	int ifindex;
 	int count = 0;
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
-	in_dev = ip_mc_find_dev(imr);
+	in_dev = ip_mc_find_dev(net, imr);
 
 	if (!in_dev) {
 		iml = NULL;
@@ -1828,17 +1810,15 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_mc_socklist *iml, **imlp;
 	struct in_device *in_dev;
 	__be32 group = imr->imr_multiaddr.s_addr;
 	u32 ifindex;
 	int ret = -EADDRNOTAVAIL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
-	in_dev = ip_mc_find_dev(imr);
+	in_dev = ip_mc_find_dev(net, imr);
 	ifindex = imr->imr_ifindex;
 	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
 		if (iml->multi.imr_multiaddr.s_addr != group)
@@ -1875,6 +1855,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev = NULL;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_sf_socklist *psl;
 	int leavegroup = 0;
 	int i, j, rv;
@@ -1882,15 +1863,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
 	imr.imr_address.s_addr = mreqs->imr_interface;
 	imr.imr_ifindex = ifindex;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2009,6 +1987,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_sf_socklist *newpsl, *psl;
 	int leavegroup = 0;
 
@@ -2018,15 +1997,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	    msf->imsf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
 	imr.imr_ifindex = ifindex;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2096,20 +2072,18 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_sf_socklist *psl;
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
 	imr.imr_ifindex = 0;
-	in_dev = ip_mc_find_dev(&imr);
+	in_dev = ip_mc_find_dev(net, &imr);
 
 	if (!in_dev) {
 		err = -ENODEV;
@@ -2166,9 +2140,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	if (sock_net(sk) != &init_net)
-		return -EPROTONOSUPPORT;
-
 	rtnl_lock();
 
 	err = -EADDRNOTAVAIL;
@@ -2253,15 +2224,12 @@ void ip_mc_drop_socket(struct sock *sk)
 	if (inet->mc_list == NULL)
 		return;
 
-	if (sock_net(sk) != &init_net)
-		return;
-
 	rtnl_lock();
 	while ((iml = inet->mc_list) != NULL) {
 		struct in_device *in_dev;
 		inet->mc_list = iml->next;
 
-		in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
+		in_dev = inetdev_by_index(sock_net(sk), iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 		if (in_dev != NULL) {
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
@@ -2306,6 +2274,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 
 #if defined(CONFIG_PROC_FS)
 struct igmp_mc_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *in_dev;
 };
@@ -2316,9 +2285,10 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 {
 	struct ip_mc_list *im = NULL;
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
 	state->in_dev = NULL;
-	for_each_netdev(get_exec_env()->ve_netns, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *in_dev;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
@@ -2439,7 +2409,7 @@ static const struct seq_operations igmp_mc_seq_ops = {
 
 static int igmp_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mc_seq_ops,
+	return seq_open_net(inode, file, &igmp_mc_seq_ops,
 			sizeof(struct igmp_mc_iter_state));
 }
 
@@ -2448,10 +2418,11 @@ static const struct file_operations igmp_mc_seq_fops = {
 	.open		=	igmp_mc_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 struct igmp_mcf_iter_state {
+	struct seq_net_private p;
 	struct net_device *dev;
 	struct in_device *idev;
 	struct ip_mc_list *im;
@@ -2464,10 +2435,11 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 	struct ip_sf_list *psf = NULL;
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+	struct net *net = seq_file_net(seq);
 
 	state->idev = NULL;
 	state->im = NULL;
-	for_each_netdev(get_exec_env()->ve_netns, state->dev) {
+	for_each_netdev(net, state->dev) {
 		struct in_device *idev;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2598,7 +2570,7 @@ static const struct seq_operations igmp_mcf_seq_ops = {
 
 static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &igmp_mcf_seq_ops,
+	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
 			sizeof(struct igmp_mcf_iter_state));
 }
 
@@ -2607,7 +2579,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
 	.open		=	igmp_mcf_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	seq_release_private,
+	.release	=	seq_release_net,
 };
 
 static int igmp_net_init(struct net *net)
-- 
1.6.0.6

-------------- next part --------------
>From 849af42466bed078e6953a4eeeff28c81f64a983 Mon Sep 17 00:00:00 2001
From: Denis Lunev <den at openvz.org>
Date: Tue, 9 Sep 2008 17:55:51 +0400
Subject: [PATCH] [UB]: Double free for UDP socket

The socket resided in UB space waiting queue could be released. In this
case ub_snd_wakeup running on the another CPU could hold/release that
socket effectively hitting 0 refcounter second time.

Signed-off-by: Denis V. Lunev <den at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/socket.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 58a9495..09d8fc5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -518,6 +518,9 @@ const struct file_operations bad_sock_fops = {
 
 void sock_release(struct socket *sock)
 {
+	if (sock->sk)
+		ub_sock_sndqueuedel(sock->sk);
+
 	if (sock->ops) {
 		struct module *owner = sock->ops->owner;
 
-- 
1.6.0.6

-------------- next part --------------
>From b6133ea5860a6c549065be5eaca57244ac8ccc92 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul at openvz.org>
Date: Tue, 30 Sep 2008 19:00:39 +0400
Subject: [PATCH] Remove now irrelevant #warning about rtflush sysctls

Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/ipv4/route.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d032f59..3c1b8bd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2919,7 +2919,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 }
 
 #ifdef CONFIG_SYSCTL
-#warning "Rework this shit via ro net sysctls"
 
 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
 					struct file *filp, void __user *buffer,
-- 
1.6.0.6

-------------- next part --------------
>From 9baf6095c98f930e02769b09addbd4b5f18772d5 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev at openvz.org>
Date: Tue, 14 Oct 2008 19:18:57 +0400
Subject: [PATCH] Simplify call __dev_change_net_namespace() by remove parameters.

1. Source VE and destination VE doesn't need to pass to the
__dev_change_net_namespace() as src VE can be obtained from
dev->owner_env and dst VE from net->owner_ve.

2. Destination VE that is passed to __dev_change_net_namespace()
was wrong, so this patch also fixes it.

Related to the bug http://bugzilla.openvz.org/show_bug.cgi?id=1044

Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 include/linux/netdevice.h |    1 -
 kernel/ve/vecalls.c       |    5 ++---
 net/core/dev.c            |   14 ++++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bacc0a0..2e2acdb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1175,7 +1175,6 @@ extern unsigned		dev_get_flags(const struct net_device *);
 extern int		dev_change_flags(struct net_device *, unsigned);
 extern int		dev_change_name(struct net_device *, char *);
 int __dev_change_net_namespace(struct net_device *, struct net *, const char *,
-			struct ve_struct *src_ve, struct ve_struct *dst_ve,
 			struct user_beancounter *exec_ub);
 extern int		dev_change_net_namespace(struct net_device *,
 						 struct net *, const char *);
diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
index 5aab66c..b04c19f 100644
--- a/kernel/ve/vecalls.c
+++ b/kernel/ve/vecalls.c
@@ -1805,8 +1805,7 @@ static int ve_dev_add(envid_t veid, char *dev_name)
 	if (dev == NULL)
 		goto out_unlock;
 
-	err = __dev_change_net_namespace(dev, dst_net, dev_name,
-					get_ve0(), dst_ve, get_exec_ub());
+	err = __dev_change_net_namespace(dev, dst_net, dev_name, get_exec_ub());
 out_unlock:
 	rtnl_unlock();
 	real_put_ve(dst_ve);
@@ -1840,7 +1839,7 @@ static int ve_dev_del(envid_t veid, char *dev_name)
 		goto out_unlock;
 
 	err = __dev_change_net_namespace(dev, &init_net, dev_name,
-				src_ve, get_ve0(), netdev_bc(dev)->owner_ub);
+					 netdev_bc(dev)->owner_ub);
 out_unlock:
 	rtnl_unlock();
 	real_put_ve(src_ve);
diff --git a/net/core/dev.c b/net/core/dev.c
index 26b529f..ce7e730 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4207,14 +4207,17 @@ EXPORT_SYMBOL(unregister_netdev);
  */
 
 int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat,
-		struct ve_struct *src_ve, struct ve_struct *dst_ve,
 		struct user_beancounter *exec_ub)
 {
 	char buf[IFNAMSIZ];
 	const char *destname;
 	int err;
-	struct ve_struct *cur_ve;
 	struct user_beancounter *tmp_ub;
+#ifdef CONFIG_VE
+	struct ve_struct *cur_ve = get_exec_env();
+	struct ve_struct *src_ve = dev->owner_env;
+	struct ve_struct *dst_ve = net->owner_ve;
+#endif
 
 	ASSERT_RTNL();
 
@@ -4278,7 +4281,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, const ch
 	/* Notify protocols, that we are about to destroy
 	   this device. They should clean all the things.
 	*/
-	cur_ve = set_exec_env(src_ve);
+	set_exec_env(src_ve);
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	(void)set_exec_env(cur_ve);
 
@@ -4311,7 +4314,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, const ch
 	list_netdevice(dev);
 
 	/* Notify protocols, that a new device appeared. */
-	cur_ve = set_exec_env(dst_ve);
+	set_exec_env(dst_ve);
 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	(void)set_exec_env(cur_ve);
 
@@ -4323,10 +4326,9 @@ out:
 
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
-	struct ve_struct *ve = get_exec_env();
 	struct user_beancounter *ub = get_exec_ub();
 
-	return __dev_change_net_namespace(dev, net, pat, ve, ve, ub);
+	return __dev_change_net_namespace(dev, net, pat, ub);
 }
 
 static int dev_cpu_callback(struct notifier_block *nfb,
-- 
1.6.0.6

-------------- next part --------------
>From 35f41f111afc1a9f024153ac43d8d829a894fb2b Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev at openvz.org>
Date: Tue, 14 Oct 2008 19:20:33 +0400
Subject: [PATCH] Adjust VE before call netdev_unregister_kobject/netdev_register_kobject

These function use visible_net_class.

http://bugzilla.openvz.org/show_bug.cgi?id=1044

Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/core/dev.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ce7e730..246deda 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4306,8 +4306,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, const ch
 	}
 
 	/* Fixup kobjects */
+	set_exec_env(src_ve);
 	netdev_unregister_kobject(dev);
+	set_exec_env(dst_ve);
 	err = netdev_register_kobject(dev);
+	set_exec_env(cur_ve);
 	WARN_ON(err);
 
 	/* Add the device back in the hashes */
-- 
1.6.0.6

-------------- next part --------------
>From ce67d5b4cc85fa0c6a6d226d436276ab307ae041 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev at openvz.org>
Date: Mon, 20 Oct 2008 15:38:43 +0400
Subject: [PATCH] iptables: setup init iptables mask before net initialization

Net initialization uses iptables init mask and checks
VE_IP_IPTABLES6, VE_IP_FILTER6, VE_IP_MANGLE6.
Thus without setup before net init, VE's ipv6 iptables
will not be initialized.

Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 kernel/ve/vecalls.c |   45 +++++++++++++++++++++++++--------------------
 1 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
index b04c19f..55d8b7b 100644
--- a/kernel/ve/vecalls.c
+++ b/kernel/ve/vecalls.c
@@ -939,27 +939,8 @@ EXPORT_SYMBOL(ve_move_task);
 static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
 		int init_or_cleanup)
 {
-	int err;
+	int err = 0;
 
-	/* Remove when userspace will start supplying IPv6-related bits. */
-	init_mask &= ~VE_IP_IPTABLES6;
-	init_mask &= ~VE_IP_FILTER6;
-	init_mask &= ~VE_IP_MANGLE6;
-	init_mask &= ~VE_IP_IPTABLE_NAT_MOD;
-	init_mask &= ~VE_NF_CONNTRACK_MOD;
-	if ((init_mask & VE_IP_IPTABLES) == VE_IP_IPTABLES)
-		init_mask |= VE_IP_IPTABLES6;
-	if ((init_mask & VE_IP_FILTER) == VE_IP_FILTER)
-		init_mask |= VE_IP_FILTER6;
-	if ((init_mask & VE_IP_MANGLE) == VE_IP_MANGLE)
-		init_mask |= VE_IP_MANGLE6;
-	if ((init_mask & VE_IP_NAT) == VE_IP_NAT)
-		init_mask |= VE_IP_IPTABLE_NAT;
-
-	if ((init_mask & VE_IP_CONNTRACK) == VE_IP_CONNTRACK)
-		init_mask |= VE_NF_CONNTRACK;
-
-	err = 0;
 	if (!init_or_cleanup)
 		goto cleanup;
 
@@ -1026,6 +1007,29 @@ static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
 	(void)do_ve_iptables(ve, init_mask, 0);
 }
 
+static __u64 setup_iptables_mask(__u64 init_mask)
+{
+	/* Remove when userspace will start supplying IPv6-related bits. */
+	init_mask &= ~VE_IP_IPTABLES6;
+	init_mask &= ~VE_IP_FILTER6;
+	init_mask &= ~VE_IP_MANGLE6;
+	init_mask &= ~VE_IP_IPTABLE_NAT_MOD;
+	init_mask &= ~VE_NF_CONNTRACK_MOD;
+	if ((init_mask & VE_IP_IPTABLES) == VE_IP_IPTABLES)
+		init_mask |= VE_IP_IPTABLES6;
+	if ((init_mask & VE_IP_FILTER) == VE_IP_FILTER)
+		init_mask |= VE_IP_FILTER6;
+	if ((init_mask & VE_IP_MANGLE) == VE_IP_MANGLE)
+		init_mask |= VE_IP_MANGLE6;
+	if ((init_mask & VE_IP_NAT) == VE_IP_NAT)
+		init_mask |= VE_IP_IPTABLE_NAT;
+
+	if ((init_mask & VE_IP_CONNTRACK) == VE_IP_CONNTRACK)
+		init_mask |= VE_NF_CONNTRACK;
+
+	return init_mask;
+}
+
 #else
 #define init_ve_iptables(x, y)	(0)
 #define fini_ve_iptables(x, y)	do { } while (0)
@@ -1162,6 +1166,7 @@ static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
 	/* Set up ipt_mask as it will be used during
 	 * net namespace initialization
 	 */
+	init_mask = setup_iptables_mask(init_mask);
 	ve->ipt_mask = init_mask;
 #endif
 
-- 
1.6.0.6

-------------- next part --------------
>From fffc6ffba65ec0b12aeb89f2e4a448785298aa75 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev at openvz.org>
Date: Fri, 31 Oct 2008 16:48:47 +0300
Subject: [PATCH] net: set ve context when init/exit method is called

Both pernet init and exit methods are called:
    - from VE context when VE is created;
    - from VE0 context if module registers pernet operations

This difference in approches leads to many nasty things, since the
init callback can be actually called with wrong exec_env.

Unify both approaches.

Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/core/net_namespace.c |   20 +++++++++++++++++++-
 1 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 4ccdf17..523eba0 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -232,6 +232,16 @@ static int __init net_ns_init(void)
 pure_initcall(net_ns_init);
 
 #ifdef CONFIG_NET_NS
+
+#include <linux/netdevice.h>
+
+static inline void set_net_context(struct net *net)
+{
+	set_exec_env(net->owner_ve);
+	if (net->loopback_dev)
+		set_exec_ub(netdev_bc(net->loopback_dev)->exec_ub);
+}
+
 static int register_pernet_operations(struct list_head *list,
 				      struct pernet_operations *ops)
 {
@@ -241,7 +251,9 @@ static int register_pernet_operations(struct list_head *list,
 	list_add_tail(&ops->list, list);
 	if (ops->init) {
 		for_each_net(net) {
+			set_net_context(net);
 			error = ops->init(net);
+			set_net_context(&init_net);
 			if (error)
 				goto out_undo;
 		}
@@ -255,7 +267,10 @@ out_undo:
 		for_each_net(undo_net) {
 			if (undo_net == net)
 				goto undone;
+
+			set_net_context(undo_net);
 			ops->exit(undo_net);
+			set_net_context(&init_net);
 		}
 	}
 undone:
@@ -268,8 +283,11 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 
 	list_del(&ops->list);
 	if (ops->exit)
-		for_each_net(net)
+		for_each_net(net) {
+			set_net_context(net);
 			ops->exit(net);
+			set_net_context(&init_net);
+		}
 }
 
 #else
-- 
1.6.0.6

-------------- next part --------------
>From 8a951e3f434541143a639dd529a504d343d28cc7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul at openvz.org>
Date: Wed, 14 Jan 2009 18:22:14 +0300
Subject: [PATCH] tun: mark tun/tap devices with NETIF_F_VIRTUAL flag

This flag is not only a "don't register me in CTs" sign, but
also a "can be a bridge master device" one.

Need it back.

http://bugzilla.openvz.org/show_bug.cgi?id=1145

Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 drivers/net/tun.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d9a5222..54e7c44 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -468,7 +468,7 @@ void tun_setup(struct net_device *dev)
 	dev->stop = tun_net_close;
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = free_netdev;
-	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_VIRTUAL;
 }
 EXPORT_SYMBOL(tun_setup);
 
-- 
1.6.0.6

-------------- next part --------------
>From 5c591aeb2a194a9554b0cf0bd3959d8c18fa5129 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul at openvz.org>
Date: Wed, 14 Jan 2009 18:23:02 +0300
Subject: [PATCH] bridge: don't leak master device on brctl addif

If we add a second ethernet device to bridge the former one leaks.

http://bugzilla.openvz.org/show_bug.cgi?id=1145

Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 net/bridge/br_if.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3dac8fc..4588ddc 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -406,7 +406,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
 	    (br->dev->flags & IFF_UP))
 		br_stp_enable_port(p);
-	if (!(dev->features & NETIF_F_VIRTUAL)) {
+	if (!(dev->features & NETIF_F_VIRTUAL) && !br->master_dev) {
 		dev_hold(dev);
 		br->master_dev = dev;
 	}
-- 
1.6.0.6

-------------- next part --------------
>From c578262d8816d27ab5530696d7b5f1e102e3b977 Mon Sep 17 00:00:00 2001
From: Denis V. Lunev <den at openvz.org>
Date: Wed, 1 Oct 2008 12:06:39 +0400
Subject: [PATCH] net: NETIF_F_VIRTUAL intersects with NETIF_F_LRO

Fortunately, this is not a part of user/kernel interface
[xemul picked 2.6.27's 4826fea3]

Signed-off-by: Denis V. Lunev <den at openvz.org>
Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
---
 include/linux/netdevice.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e2acdb..0a4fb43 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -525,8 +525,8 @@ struct net_device
 #define NETIF_F_LRO		32768	/* large receive offload */
 
 	/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0xffff0000
+#define NETIF_F_GSO_SHIFT	20
+#define NETIF_F_GSO_MASK	0xfff00000
 #define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
 #define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
 #define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-- 
1.6.0.6



More information about the Debian mailing list