[Devel] [PATCH v4] nfs: protect callback execution against per-net callback thread shutdown

Wed Nov 8 17:25:11 MSK 2017

On 08.11.2017 17:07, Stanislav Kinsburskiy wrote:
> From: Stanislav Kinsburskiy <skinsbursky at parallels.com>
> 
> Here is the race:
> 
> CPU #0				CPU#1
> 
> cleanup_mnt			nfs41_callback_svc (get xprt from the list)
> nfs_callback_down		...
> ...				...
> svc_close_net			...
> ...				...
> svc_xprt_free			...
> svc_bc_sock_free		bc_svc_process
> kfree(xprt)			svc_process_common
> 				rqstp->rq_xprt->xpt_ops (use after free)
> 
> The problem is that per-net SUNRPC transports shutdown is done regardless
> current callback execution. This is a race leading to transport use-after-free
> in callback handler.
> This patch fixes it in stright-forward way. I.e. it protects callback
> execution with the same mutex used for per-net data creation and destruction.
> Hopefully, it won't slow down NFS client significantly.
> 
> https://jira.sw.ru/browse/PSBM-75751
> 
> v4: use another mutex to protect callback execution agains per-net transports
> shutdown.
> This guarantees, that transports won't be destroyed by shutdown callback while
> execution is in progress and vice versa.
> 
> v3: Fix mutex deadlock, when shutdown callback waits for thread to exit (with
> mutex taken), while thread wait for the mutex to take.
> The idea is to simply check if thread has to exit, if mutex lock has failed.
> This is a busy loop, but it shouldn't happend often and for long.
> 
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at parallels.com>
> ---
>  fs/nfs/callback.c |   16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 0beb275..4d7979b 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -99,6 +99,8 @@ nfs4_callback_up(struct svc_serv *serv)
>  }
>  
>  #if defined(CONFIG_NFS_V4_1)
> +static DEFINE_MUTEX(nfs41_callback_mutex);
> +
>  /*
>   * The callback service for NFSv4.1 callbacks
>   */
> @@ -117,6 +119,12 @@ nfs41_callback_svc(void *vrqstp)
>  		if (try_to_freeze())
>  			continue;
>  
> +		mutex_lock(&nfs41_callback_mutex);
> +		if (kthread_should_stop()) {
> +			mutex_unlock(&nfs41_callback_mutex);
> +			return 0;
> +		}
> +
>  		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
>  		spin_lock_bh(&serv->sv_cb_lock);
>  		if (!list_empty(&serv->sv_cb_list)) {
> @@ -129,8 +137,10 @@ nfs41_callback_svc(void *vrqstp)
>  			error = bc_svc_process(serv, req, rqstp);
>  			dprintk("bc_svc_process() returned w/ error code= %d\n",
>  				error);
> +			mutex_unlock(&nfs41_callback_mutex);
>  		} else {
>  			spin_unlock_bh(&serv->sv_cb_lock);
> +			mutex_unlock(&nfs41_callback_mutex);
>  			schedule();
>  			finish_wait(&serv->sv_cb_waitq, &wq);
>  		}
> @@ -377,7 +387,13 @@ void nfs_callback_down(int minorversion, struct net *net)
>  	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
>  
>  	mutex_lock(&nfs_callback_mutex);
> +#if defined(CONFIG_NFS_V4_1)
> +	mutex_lock(&nfs41_callback_mutex);
> +	nfs_callback_down_net(minorversion, cb_info->serv, net);
> +	mutex_unlock(&nfs41_callback_mutex);

It seems we also should flush queue here.

> +#else
>  	nfs_callback_down_net(minorversion, cb_info->serv, net);
> +#endif
>  	cb_info->users--;
>  	if (cb_info->users == 0 && cb_info->task != NULL) {
>  		kthread_stop(cb_info->task);
>