[Devel] [PATCH RHEL7 COMMIT] ms/unix: Show number of pending scm files of receive queue in fdinfo

Konstantin Khorenko khorenko at virtuozzo.com
Thu Dec 26 16:58:46 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-1062.7.1.vz7.130.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.7.1.vz7.130.5
------>
commit 56a318575ebf8cef4b677f9e20b9590848fcff85
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Thu Dec 26 16:58:44 2019 +0300

    ms/unix: Show number of pending scm files of receive queue in fdinfo
    
    Unix sockets like a block box. You never know what is stored there:
    there may be a file descriptor holding a mount or a block device,
    or there may be whole universes with namespaces, sockets with receive
    queues full of sockets etc.
    
    The patch adds a little debug and accounts number of files (not recursive),
    which is in receive queue of a unix socket. Sometimes this is useful
    to determine, that socket should be investigated or which task should
    be killed to put reference counter on a resourse.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Signed-off-by: David S. Miller <davem at davemloft.net>
    
    =====================
    Patchset description:
    
    This patchset expands the information whether a socket
    contains scmd fds or not:
    
    $cat /proc/[pid]/fdinfo/[unix_sk_fd] | grep scm_fds
    scm_fds: 1
    
    This sometimes will be useful to check who owns a counter
    to a resource and prevents it destruction. Say, someone
    opens ploop and places its fd into a unix socket.
    
    Kirill Tkhai (2):
          net: Allow to show socket-specific information in /proc/[pid]/fdinfo/[fd]
          unix: Show number of pending scm files of receive queue in fdinfo
---
 include/net/af_unix.h |  5 +++++
 net/unix/af_unix.c    | 54 +++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 4f975b0e6fae5..33e5b5b55f4a2 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -38,6 +38,10 @@ struct unix_skb_parms {
 	u32			consumed;
 };
 
+struct scm_stat {
+	u32 nr_fds;
+};
+
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
@@ -64,6 +68,7 @@ struct unix_sock {
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
 	wait_queue_t		peer_wake;
+	struct scm_stat		scm_stat;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5c77a03350397..8223c5bf9de1f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -664,6 +664,16 @@ static int unix_set_peek_off(struct sock *sk, int val)
 	return 0;
 }
 
+static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct unix_sock *u;
+
+	if (sk) {
+		u = unix_sk(sock->sk);
+		seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
+	}
+}
 
 static const struct proto_ops unix_stream_ops = {
 	.family =	PF_UNIX,
@@ -686,6 +696,7 @@ static const struct proto_ops unix_stream_ops = {
 	.sendpage =	unix_stream_sendpage,
 	.splice_read =	unix_stream_splice_read,
 	.set_peek_off =	unix_set_peek_off,
+	.show_fdinfo =	unix_show_fdinfo,
 };
 
 static const struct proto_ops unix_dgram_ops = {
@@ -708,6 +719,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
 	.set_peek_off =	unix_set_peek_off,
+	.show_fdinfo =	unix_show_fdinfo,
 };
 
 static const struct proto_ops unix_seqpacket_ops = {
@@ -730,6 +742,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
 	.set_peek_off =	unix_set_peek_off,
+	.show_fdinfo =	unix_show_fdinfo,
 };
 
 static struct proto unix_proto = {
@@ -777,6 +790,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	mutex_init(&u->bindlock); /* single task binding lock */
 	init_waitqueue_head(&u->peer_wait);
 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -1619,6 +1633,28 @@ static bool unix_skb_scm_eq(struct sk_buff *skb,
 	       gid_eq(u->gid, scm->creds.gid);
 }
 
+static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
+{
+	struct scm_fp_list *fp = UNIXCB(skb).fp;
+	struct unix_sock *u = unix_sk(sk);
+
+	lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+	if (unlikely(fp && fp->count))
+		u->scm_stat.nr_fds += fp->count;
+}
+
+static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
+{
+	struct scm_fp_list *fp = UNIXCB(skb).fp;
+	struct unix_sock *u = unix_sk(sk);
+
+	lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+	if (unlikely(fp && fp->count))
+		u->scm_stat.nr_fds -= fp->count;
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1810,7 +1846,10 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
-	skb_queue_tail(&other->sk_receive_queue, skb);
+	spin_lock(&other->sk_receive_queue.lock);
+	scm_stat_add(other, skb);
+	__skb_queue_tail(&other->sk_receive_queue, skb);
+	spin_unlock(&other->sk_receive_queue.lock);
 	if (max_level > unix_sk(other)->recursion_level)
 		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
@@ -1920,7 +1959,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto pipe_err_free;
 
 		maybe_add_creds(skb, sock, other);
-		skb_queue_tail(&other->sk_receive_queue, skb);
+		spin_lock(&other->sk_receive_queue.lock);
+		scm_stat_add(other, skb);
+		__skb_queue_tail(&other->sk_receive_queue, skb);
+		spin_unlock(&other->sk_receive_queue.lock);
 		if (max_level > unix_sk(other)->recursion_level)
 			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
@@ -2131,7 +2173,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	skip = sk_peek_offset(sk, flags);
 
-	skb = __skb_recv_datagram(sk, flags, NULL, &peeked, &skip, &err);
+	skb = __skb_recv_datagram(sk, flags, scm_stat_del, &peeked, &skip, &err);
 	if (!skb) {
 		unix_state_lock(sk);
 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
@@ -2424,8 +2466,12 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 
 			sk_peek_offset_bwd(sk, chunk);
 
-			if (UNIXCB(skb).fp)
+			if (UNIXCB(skb).fp) {
+				spin_lock(&sk->sk_receive_queue.lock);
+				scm_stat_del(sk, skb);
+				spin_unlock(&sk->sk_receive_queue.lock);
 				unix_detach_fds(siocb->scm, skb);
+			}
 
 			if (unix_skb_len(skb))
 				break;



More information about the Devel mailing list