[Devel] [PATCH 7/7] packet: Pre-account maximum socket buffer into cg memory

Pavel Emelyanov xemul at parallels.com
Fri May 29 07:19:15 PDT 2015


Packet sockets have incoming queue of packets that is only limited
with per-socket wmem buffer. Strictly speaking we should sum up
all the queues and charge them into kmem once new packet arrives,
but this will result in huge patch. Since there's typically quite
a few of packet sockets in container (tcpdump) we can just forward
charge the maximum socket rmem size into cg memory upon socket
creation.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

---
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c            | 20 +++++++++++++
 net/packet/af_packet.c     | 74 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d250663..9daaec2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -412,6 +412,8 @@ struct sock;
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
+struct mem_cgroup *sock_get_current_memcg(void);
+void sock_put_memcg(struct mem_cgroup *);
 #else
 static inline void sock_update_memcg(struct sock *sk)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b498237..0e67616 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -554,6 +554,20 @@ void sock_update_memcg(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
+struct mem_cgroup *sock_get_current_memcg(void)
+{
+	struct mem_cgroup *cg;
+
+	rcu_read_lock();
+	cg = mem_cgroup_from_task(current);
+	if (mem_cgroup_is_root(cg) || !css_tryget(&cg->css))
+		cg = NULL;
+	rcu_read_unlock();
+
+	return cg;
+}
+EXPORT_SYMBOL(sock_get_current_memcg);
+
 void sock_release_memcg(struct sock *sk)
 {
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
@@ -564,6 +578,12 @@ void sock_release_memcg(struct sock *sk)
 	}
 }
 
+void sock_put_memcg(struct mem_cgroup *cg)
+{
+	css_put(&cg->css);
+}
+EXPORT_SYMBOL(sock_put_memcg);
+
 struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
 {
 	if (!memcg || mem_cgroup_is_root(memcg))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e8b5a0d..5a13dfa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2420,6 +2420,67 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return packet_snd(sock, msg, len);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+struct packet_sk_charge {
+	struct mem_cgroup	*memcg;
+	unsigned long		amt;
+};
+
+static struct cg_proto *packet_sk_charge(void)
+{
+	struct packet_sk_charge *psc;
+
+	if (!mem_cgroup_sockets_enabled)
+		return NULL;
+
+	psc = kmalloc(sizeof(*psc), GFP_KERNEL);
+	if (!psc)
+		return ERR_PTR(-ENOMEM);
+
+	psc->memcg = sock_get_current_memcg();
+	if (psc->memcg) {
+		/*
+		 * Forcedly charge the maximum amount of data this socket
+		 * may have. It's typically not huge and packet sockets are
+		 * rare guests in containers, so we don't disturb the memory
+		 * consumption much.
+		 */
+		psc->amt = sysctl_rmem_max;
+		memcg_charge_kmem_bytes(psc->memcg, psc->amt);
+	} else {
+		kfree(psc);
+		psc = NULL;
+	}
+	rcu_read_unlock();
+
+	/*
+	 * The sk->sk_cgrp is not used for packet sockets,
+	 * so we'll just put the smaller structure into it.
+	 */
+	return (struct cg_proto *)psc;
+}
+
+static void packet_sk_uncharge(struct cg_proto *cg)
+{
+	struct packet_sk_charge *psc = (struct packet_sk_charge *)cg;
+
+	if (psc) {
+		memcg_uncharge_kmem_bytes(psc->memcg, psc->amt);
+		sock_put_memcg(psc->memcg);
+		kfree(psc);
+	}
+}
+#else
+static struct cg_proto *packet_sk_charge(void)
+{
+	return NULL;
+}
+
+static void packet_sk_uncharge(struct sock *sk)
+{
+}
+#endif
+
 /*
  *	Close a PACKET socket. This is fairly simple. We immediately go
  *	to 'closed' state and remove our protocol entry in the device list.
@@ -2469,6 +2530,8 @@ static int packet_release(struct socket *sock)
 	}
 
 	fanout_release(sk);
+	packet_sk_uncharge(sk->sk_cgrp);
+	sk->sk_cgrp = NULL;
 
 	synchronize_net();
 	/*
@@ -2603,6 +2666,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct packet_sock *po;
+	struct cg_proto *cg;
 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
 	int err;
 
@@ -2613,11 +2677,14 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		return -ESOCKTNOSUPPORT;
 
 	sock->state = SS_UNCONNECTED;
+	cg = packet_sk_charge();
+	if (IS_ERR(cg))
+		goto out;
 
 	err = -ENOBUFS;
 	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
 	if (sk == NULL)
-		goto out;
+		goto outu;
 
 	sock->ops = &packet_ops;
 	if (sock->type == SOCK_PACKET)
@@ -2660,7 +2727,12 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 	sock_prot_inuse_add(net, &packet_proto, 1);
 	preempt_enable();
 
+	sk->sk_cgrp = cg;
+
 	return 0;
+
+outu:
+	packet_sk_uncharge(cg);
 out:
 	return err;
 }
-- 
1.8.3.1





More information about the Devel mailing list