[Devel] [PATCH] ve: Kill tcp_v4_kill_ve_sockets()
Vasily Averin
vvs at virtuozzo.com
Mon Jun 1 05:16:16 PDT 2015
On 01.06.2015 14:31, Kirill Tkhai wrote:
> В Пт, 29/05/2015 в 17:20 +0300, Andrew Vagin пишет:
>> Acked-by: Andrew Vagin <avagin at odin.com>
>>
>> I'm agree that we need to remove this function, but I don't know how it
>> fixes the bug.
>
> tcp_v4_kill_ve_sockets() is called when all processes are killed,
> and their sockets have been already replaced with time wait sockets.
>
> So, when we are iterating through chain list:
>
> sk_nulls_for_each(sk, node, &head[i].chain)
>
> we can't find TCP sockets there. Every sk is of type inet_timewait_sock.
> Yes, they both are linked in the same table.
>
> struct sock (tcp_sock) and inet_timewait_sock are not a child and a parent:
>
> struct sock { struct inet_timewait_sock {
> struct sock_common __sk_common; struct sock_common __tw_common;
> socket_lock_t sk_lock; int tw_timeout;
>
> They are different.
>
> So, when we are doing bh_lock_sock(sk) in tcp_v4_kill_ve_sockets(), i.e.
>
> #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock))
>
> we are trying to manipulate with tw_timeout, which is not zero, and can't
> acquire the spinlock.
it's nice theory, but I still do not understand how is it related to observed crash?
I did not found such process in crash dump of PSBM-33755.
Kernel was crashed in another function, not on access to spinlock.
and it was happen in 50 seconds after stop of CT 106.
How can you explain it?
>> On Fri, May 29, 2015 at 04:53:39PM +0300, Kirill Tkhai wrote:
>>> This is a leftover from earlier versions of PCS,
>>> and we do not need that functionality in 3.10.
>>>
>>> It was the reason of the kernel panic in 2.6.32:
>>> https://jira.sw.ru/browse/PSBM-33755, in the test
>>> of forced CT destroying.
>>>
>>> Also, tcp_v4_kill_ve_sockets() looks as the only
>>> user of synchronize_net(), so we're not need this
>>> function too (the only net hook fini_venet_acct_ip6_stat()
>>> from the chain doesn't need it). Besides this, there
>>> will be one more synchronize_rcu() later in
>>> ve_drop_context().
>>>
>>> Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
>>> ---
>>> include/linux/ve_proto.h | 5 +--
>>> kernel/ve/ve.c | 5 +--
>>> net/ipv4/tcp_ipv4.c | 88 ----------------------------------------------
>>> 3 files changed, 2 insertions(+), 96 deletions(-)
>>>
>>> diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
>>> index d491ab0..e77d6f9 100644
>>> --- a/include/linux/ve_proto.h
>>> +++ b/include/linux/ve_proto.h
>>> @@ -50,12 +50,9 @@ typedef void (*ve_seq_print_t)(struct seq_file *, struct ve_struct *);
>>> void vzmon_register_veaddr_print_cb(ve_seq_print_t);
>>> void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
>>>
>>> -#ifdef CONFIG_INET
>>> -void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
>>> -#ifdef CONFIG_VE_NETDEV
>>> +#if defined(CONFIG_INET) && defined(CONFIG_VE_NETDEV)
>>> int venet_init(void);
>>> #endif
>>> -#endif
>>>
>>> extern struct list_head ve_list_head;
>>> #define for_each_ve(ve) list_for_each_entry((ve), &ve_list_head, ve_list)
>>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>>> index 55b7d86..3128fdb 100644
>>> --- a/kernel/ve/ve.c
>>> +++ b/kernel/ve/ve.c
>>> @@ -647,10 +647,7 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
>>>
>>> down_write(&ve->op_sem);
>>> ve_hook_iterate_fini(VE_SS_CHAIN, ve);
>>> -#ifdef CONFIG_INET
>>> - tcp_v4_kill_ve_sockets(ve);
>>> - synchronize_net();
>>> -#endif
>>> +
>>> ve_list_del(ve);
>>> ve_drop_context(ve);
>>> up_write(&ve->op_sem);
>>> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
>>> index ad4520c..a8ef57a 100644
>>> --- a/net/ipv4/tcp_ipv4.c
>>> +++ b/net/ipv4/tcp_ipv4.c
>>> @@ -2812,91 +2812,3 @@ void __init tcp_v4_init(void)
>>> if (register_pernet_subsys(&tcp_sk_ops))
>>> panic("Failed to create the TCP control socket.\n");
>>> }
>>> -
>>> -#ifdef CONFIG_VE
>>> -static void tcp_kill_ve_onesk(struct sock *sk)
>>> -{
>>> - struct tcp_sock *tp = tcp_sk(sk);
>>> -
>>> - /* Check the assumed state of the socket. */
>>> - if (!sock_flag(sk, SOCK_DEAD)) {
>>> - printk(KERN_WARNING "Killing sk: dead %d, state %d, "
>>> - "wrseq %u unseq %u, wrqu %d.\n",
>>> - sock_flag(sk, SOCK_DEAD), sk->sk_state,
>>> - tp->write_seq, tp->snd_una,
>>> - !skb_queue_empty(&sk->sk_write_queue));
>>> - sk->sk_err = ECONNRESET;
>>> - sk->sk_error_report(sk);
>>> - }
>>> -
>>> - tcp_send_active_reset(sk, GFP_ATOMIC);
>>> - switch (sk->sk_state) {
>>> - case TCP_FIN_WAIT1:
>>> - case TCP_CLOSING:
>>> - /* In these 2 states the peer may want us to retransmit
>>> - * some data and/or FIN. Entering "resetting mode"
>>> - * instead.
>>> - */
>>> - tcp_time_wait(sk, TCP_CLOSE, 0);
>>> - break;
>>> - case TCP_FIN_WAIT2:
>>> - /* By some reason the socket may stay in this state
>>> - * without turning into a TW bucket. Fix it.
>>> - */
>>> - tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
>>> - break;
>>> - default:
>>> - /* Just jump into CLOSED state. */
>>> - tcp_done(sk);
>>> - break;
>>> - }
>>> -}
>>> -
>>> -void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
>>> -{
>>> - struct inet_ehash_bucket *head;
>>> - int i, retry;
>>> -
>>> - /* alive */
>>> -again:
>>> - retry = 0;
>>> - local_bh_disable();
>>> - head = tcp_hashinfo.ehash;
>>> - for (i = 0; i < tcp_hashinfo.ehash_mask; i++) {
>>> - struct sock *sk;
>>> - struct hlist_nulls_node *node;
>>> - spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, i);
>>> -more_work:
>>> - spin_lock(lock);
>>> - sk_nulls_for_each(sk, node, &head[i].chain) {
>>> - if (sock_net(sk)->owner_ve == envid) {
>>> - sock_hold(sk);
>>> - spin_unlock(lock);
>>> -
>>> - bh_lock_sock(sk);
>>> - if (sock_owned_by_user(sk)) {
>>> - retry = 1;
>>> - bh_unlock_sock(sk);
>>> - sock_put(sk);
>>> - goto enable_bh;
>>> - }
>>> - /* sk might have disappeared from the hash before
>>> - * we got the lock */
>>> - if (sk->sk_state != TCP_CLOSE)
>>> - tcp_kill_ve_onesk(sk);
>>> - bh_unlock_sock(sk);
>>> - sock_put(sk);
>>> - goto more_work;
>>> - }
>>> - }
>>> - spin_unlock(lock);
>>> - }
>>> -enable_bh:
>>> - local_bh_enable();
>>> - if (retry) {
>>> - schedule_timeout_interruptible(HZ);
>>> - goto again;
>>> - }
>>> -}
>>> -EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
>>> -#endif
>>>
>
>
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
>
>
More information about the Devel
mailing list