[Devel] [PATCH rh7] ve/kobj: Send events per VE instead of all net-namespaces broadcasting

Andrew Vagin avagin at odin.com
Wed Jun 3 07:34:51 PDT 2015


On Wed, Jun 03, 2015 at 03:55:44PM +0300, Cyrill Gorcunov wrote:
> Currently uevents are sending broadcastly to all net-namespaces present
> in the system which is leading to problem of C/R'ing systemd based
> containers (netlink socket sees data from the node and we can't dump
> until the data is read). So lets send events broadcastly not per
> net-namespace but per-VE. For this sake add @_uevent_sock_list
> list into VE instance and gather uevents sockets there.
> 
> n.b.: In pcs6 we already have virtualized uevents so no problem there.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at virtuozzo.com>
> CC: Andrey Vagin <avagin at virtuozzo.com>
> CC: Vladimir Davydov <vdavydov at virtuozzo.com>
> CC: Konstantin Khorenko <khorenko at virtuozzo.com>
> CC: Pavel Emelyanov <xemul at virtuozzo.com>
> ---
>  include/linux/ve.h       |    1 +
>  kernel/ve/ve.c           |    2 ++
>  lib/kobject_uevent.c     |   16 +++++++++++++++-
>  net/core/net_namespace.c |    3 +++
>  4 files changed, 21 insertions(+), 1 deletion(-)
> 
> Index: linux-pcs7.git/include/linux/ve.h
> ===================================================================
> --- linux-pcs7.git.orig/include/linux/ve.h
> +++ linux-pcs7.git/include/linux/ve.h
> @@ -121,6 +121,7 @@ struct ve_struct {
>  	int			fsync_enable;
>  
>  	u64			_uevent_seqnum;
> +	struct list_head	_uevent_sock_list;
>  	struct nsproxy __rcu	*ve_ns;
>  	struct cred		*init_cred;
>  	struct net		*ve_netns;
> Index: linux-pcs7.git/kernel/ve/ve.c
> ===================================================================
> --- linux-pcs7.git.orig/kernel/ve/ve.c
> +++ linux-pcs7.git/kernel/ve/ve.c
> @@ -66,6 +66,7 @@ static DEFINE_PER_CPU(struct kstat_lat_p
>  struct ve_struct ve0 = {
>  	.ve_name		= "0",
>  	.start_jiffies		= INITIAL_JIFFIES,
> +	._uevent_sock_list	= { &ve0._uevent_sock_list, &ve0._uevent_sock_list },
>  	RCU_POINTER_INITIALIZER(ve_ns, &init_nsproxy),
>  	.ve_netns		= &init_net,
>  	.is_running		= 1,
> @@ -713,6 +714,7 @@ do_init:
>  	INIT_LIST_HEAD(&ve->devices);
>  	INIT_LIST_HEAD(&ve->ve_list);
>  	INIT_LIST_HEAD(&ve->devmnt_list);
> +	INIT_LIST_HEAD(&ve->_uevent_sock_list);
>  	mutex_init(&ve->devmnt_mutex);
>  	kmapset_init_key(&ve->ve_sysfs_perms);
>  
> Index: linux-pcs7.git/lib/kobject_uevent.c
> ===================================================================
> --- linux-pcs7.git.orig/lib/kobject_uevent.c
> +++ linux-pcs7.git/lib/kobject_uevent.c
> @@ -26,7 +26,7 @@
>  #include <linux/netlink.h>
>  #include <net/sock.h>
>  #include <net/net_namespace.h>
> -
> +#include <linux/ve.h>
>  
>  u64 uevent_seqnum;
>  char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
> @@ -35,8 +35,10 @@ struct uevent_sock {
>  	struct list_head list;
>  	struct sock *sk;
>  };
> +#ifndef CONFIG_VE
>  static LIST_HEAD(uevent_sock_list);
elif
#define uevent_sock_list (get_exec_env()->_uevent_sock_list)

and remove the next ifdef..endif in this file

What do you think about this way?

>  #endif
> +#endif
>  
>  /* This lock protects uevent_seqnum and uevent_sock_list */
>  static DEFINE_MUTEX(uevent_sock_mutex);
> @@ -254,7 +256,11 @@ int kobject_uevent_env_one(struct kobjec
>  
>  #if defined(CONFIG_NET)
>  	/* send netlink message */
> +#ifdef CONFIG_VE
> +	list_for_each_entry(ue_sk, &get_exec_env()->_uevent_sock_list, list) {
> +#else
>  	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
> +#endif
>  		struct sock *uevent_sock = ue_sk->sk;
>  		struct sk_buff *skb;
>  		size_t len;
> @@ -396,7 +402,11 @@ static int uevent_net_init(struct net *n
>  		return -ENODEV;
>  	}
>  	mutex_lock(&uevent_sock_mutex);
> +#ifdef CONFIG_VE
> +	list_add_tail(&ue_sk->list, &net->owner_ve->_uevent_sock_list);
> +#else
>  	list_add_tail(&ue_sk->list, &uevent_sock_list);
> +#endif
>  	mutex_unlock(&uevent_sock_mutex);
>  	return 0;
>  }
> @@ -406,7 +416,11 @@ static void uevent_net_exit(struct net *
>  	struct uevent_sock *ue_sk;
>  
>  	mutex_lock(&uevent_sock_mutex);
> +#ifdef CONFIG_VE
> +	list_for_each_entry(ue_sk, &net->owner_ve->_uevent_sock_list, list) {

A container can have more than one network namespace, we need to do this
only if &net->owner_ve->netns == net

> +#else
>  	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
> +#endif
>  		if (sock_net(ue_sk->sk) == net)
>  			goto found;
>  	}
> Index: linux-pcs7.git/net/core/net_namespace.c
> ===================================================================
> --- linux-pcs7.git.orig/net/core/net_namespace.c
> +++ linux-pcs7.git/net/core/net_namespace.c
> @@ -33,6 +33,9 @@ EXPORT_SYMBOL_GPL(net_namespace_list);
>  
>  struct net init_net = {
>  	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
> +#ifdef CONFIG_VE
> +	.owner_ve = &ve0,
> +#endif
>  };
>  EXPORT_SYMBOL(init_net);
>  



More information about the Devel mailing list