[Devel] [PATCH RHEL7 COMMIT] ve/net/tcp: Kill tcp_v4_kill_ve_sockets()

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jun 4 06:45:11 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.9
------>
commit cf611b271f142d8cb1656d8b9fb8cdbc9d690e66
Author: Kirill Tkhai <ktkhai at odin.com>
Date:   Thu Jun 4 17:45:11 2015 +0400

    ve/net/tcp: Kill tcp_v4_kill_ve_sockets()
    
    This is a leftover from earlier versions of PCS,
    and we do not need that functionality in 3.10.
    
    It was the reason of the kernel panic in 2.6.32:
    https://jira.sw.ru/browse/PSBM-33755, in the test
    of forced CT destroying.
    
    Also, tcp_v4_kill_ve_sockets() looks as the only
    user of synchronize_net(), so we're not need this
    function too (the only net hook fini_venet_acct_ip6_stat()
    from the chain doesn't need it). Besides this, there
    will be one more synchronize_rcu() later in
    ve_drop_context().
    
    ktkhai: how the patch fixes the bug:
    
    tcp_v4_kill_ve_sockets() is called when all processes are killed,
    and their sockets have been already replaced with time wait sockets.
    
    So, when we are iterating through chain list:
    
    	sk_nulls_for_each(sk, node, &head[i].chain)
    
    we can't find TCP sockets there. Every sk is of type inet_timewait_sock.
    Yes, they both are linked in the same table.
    
    struct sock (tcp_sock) and inet_timewait_sock are not a child and a parent:
    
    struct sock {                           struct inet_timewait_sock {
       struct sock_common __sk_common;         struct sock_common __tw_common;
       socket_lock_t          sk_lock;         int                 tw_timeout;
    
    They are different.
    
    So, when we are doing bh_lock_sock(sk) in tcp_v4_kill_ve_sockets(), i.e.
    
    #define bh_lock_sock(__sk)      spin_lock(&((__sk)->sk_lock.slock))
    
    we are trying to manipulate with tw_timeout, which is not zero, and can't
    acquire the spinlock.
    
    https://jira.sw.ru/browse/PSBM-33926
    
    Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
    Acked-by: Andrew Vagin <avagin at odin.com>
---
 include/linux/ve_proto.h |  5 +--
 kernel/ve/ve.c           |  5 +--
 net/ipv4/tcp_ipv4.c      | 88 ------------------------------------------------
 3 files changed, 2 insertions(+), 96 deletions(-)

diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
index 60d50df..0f5898e 100644
--- a/include/linux/ve_proto.h
+++ b/include/linux/ve_proto.h
@@ -45,12 +45,9 @@ typedef void (*ve_seq_print_t)(struct seq_file *, struct ve_struct *);
 void vzmon_register_veaddr_print_cb(ve_seq_print_t);
 void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
 
-#ifdef CONFIG_INET
-void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
-#ifdef CONFIG_VE_NETDEV
+#if defined(CONFIG_INET) && defined(CONFIG_VE_NETDEV)
 int venet_init(void);
 #endif
-#endif
 
 extern struct list_head ve_list_head;
 #define for_each_ve(ve)	list_for_each_entry((ve), &ve_list_head, ve_list)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 8a04cc3..e699784 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -621,10 +621,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
 
 	down_write(&ve->op_sem);
 	ve_hook_iterate_fini(VE_SS_CHAIN, ve);
-#ifdef CONFIG_INET
-	tcp_v4_kill_ve_sockets(ve);
-	synchronize_net();
-#endif
+
 	ve_list_del(ve);
 	ve_drop_context(ve);
 	up_write(&ve->op_sem);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad4520c..a8ef57a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2812,91 +2812,3 @@ void __init tcp_v4_init(void)
 	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
 }
-
-#ifdef CONFIG_VE
-static void tcp_kill_ve_onesk(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	/* Check the assumed state of the socket. */
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk(KERN_WARNING "Killing sk: dead %d, state %d, "
-			"wrseq %u unseq %u, wrqu %d.\n",
-			sock_flag(sk, SOCK_DEAD), sk->sk_state,
-			tp->write_seq, tp->snd_una,
-			!skb_queue_empty(&sk->sk_write_queue));
-		sk->sk_err = ECONNRESET;
-		sk->sk_error_report(sk);
-	}
-
-	tcp_send_active_reset(sk, GFP_ATOMIC);
-	switch (sk->sk_state) {
-		case TCP_FIN_WAIT1:
-		case TCP_CLOSING:
-			/* In these 2 states the peer may want us to retransmit
-			 * some data and/or FIN.  Entering "resetting mode"
-			 * instead.
-			 */
-			tcp_time_wait(sk, TCP_CLOSE, 0);
-			break;
-		case TCP_FIN_WAIT2:
-			/* By some reason the socket may stay in this state
-			 * without turning into a TW bucket.  Fix it.
-			 */
-			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
-			break;
-		default:
-			/* Just jump into CLOSED state. */
-			tcp_done(sk);
-			break;
-	}
-}
-
-void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
-{
-	struct inet_ehash_bucket *head;
-	int i, retry;
-
-	/* alive */
-again:
-	retry = 0;
-	local_bh_disable();
-	head = tcp_hashinfo.ehash;
-	for (i = 0; i < tcp_hashinfo.ehash_mask; i++) {
-		struct sock *sk;
-		struct hlist_nulls_node *node;
-		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, i);
-more_work:
-		spin_lock(lock);
-		sk_nulls_for_each(sk, node, &head[i].chain) {
-			if (sock_net(sk)->owner_ve == envid) {
-				sock_hold(sk);
-				spin_unlock(lock);
-
-				bh_lock_sock(sk);
-				if (sock_owned_by_user(sk)) {
-					retry = 1;
-					bh_unlock_sock(sk);
-					sock_put(sk);
-					goto enable_bh;
-				}
-				/* sk might have disappeared from the hash before
-				 * we got the lock */
-				if (sk->sk_state != TCP_CLOSE)
-					tcp_kill_ve_onesk(sk);
-				bh_unlock_sock(sk);
-				sock_put(sk);
-				goto more_work;
-			}
-		}
-		spin_unlock(lock);
-	}
-enable_bh:
-	local_bh_enable();
-	if (retry) {
-		schedule_timeout_interruptible(HZ);
-		goto again;
-	}
-}
-EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
-#endif



More information about the Devel mailing list