[Devel] [PATCH 7/7] packet: Pre-account maximum socket buffer into cg memory (v3)

Pavel Emelyanov xemul at parallels.com
Thu Jun 4 05:00:52 PDT 2015


Packet sockets have incoming queue of packets that is only limited
with per-socket wmem buffer. Strictly speaking we should sum up
all the queues and charge them into kmem once new packet arrives,
but this will result in huge patch. Since there's typically quite
a few of packet sockets in container (tcpdump) we can just forward
charge the maximum socket rmem size into cg memory upon socket
creation.

changes:
* v2: renamed sock_get_current_memcg
* removed sock_put_memcg
* fixed stub prototype
* removed lost rcu_read_unlock
* v3: kmem charge/uncharge routines name changed in patch #2

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c            | 14 +++++++++
 net/packet/af_packet.c     | 73 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9481484..d4baa29 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -82,6 +82,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
+extern struct mem_cgroup *try_get_mem_cgroup_from_current(void);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fa6b2e6..21e0649 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -534,6 +534,20 @@ void sock_update_memcg(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
+struct mem_cgroup *try_get_mem_cgroup_from_current(void)
+{
+	struct mem_cgroup *cg;
+
+	rcu_read_lock();
+	cg = mem_cgroup_from_task(current);
+	if (mem_cgroup_is_root(cg) || !css_tryget(&cg->css))
+		cg = NULL;
+	rcu_read_unlock();
+
+	return cg;
+}
+EXPORT_SYMBOL(try_get_mem_cgroup_from_current);
+
 void sock_release_memcg(struct sock *sk)
 {
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e8b5a0d..b9f09eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2420,6 +2420,66 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return packet_snd(sock, msg, len);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+struct packet_sk_charge {
+	struct mem_cgroup	*memcg;
+	unsigned long		amt;
+};
+
+static struct cg_proto *packet_sk_charge(void)
+{
+	struct packet_sk_charge *psc;
+
+	if (!mem_cgroup_sockets_enabled)
+		return NULL;
+
+	psc = kmalloc(sizeof(*psc), GFP_KERNEL);
+	if (!psc)
+		return ERR_PTR(-ENOMEM);
+
+	psc->memcg = try_get_mem_cgroup_from_current();
+	if (psc->memcg) {
+		/*
+		 * Forcedly charge the maximum amount of data this socket
+		 * may have. It's typically not huge and packet sockets are
+		 * rare guests in containers, so we don't disturb the memory
+		 * consumption much.
+		 */
+		psc->amt = sysctl_rmem_max;
+		memcg_charge_kmem_nofail(psc->memcg, psc->amt);
+	} else {
+		kfree(psc);
+		psc = NULL;
+	}
+
+	/*
+	 * The sk->sk_cgrp is not used for packet sockets,
+	 * so we'll just put the smaller structure into it.
+	 */
+	return (struct cg_proto *)psc;
+}
+
+static void packet_sk_uncharge(struct cg_proto *cg)
+{
+	struct packet_sk_charge *psc = (struct packet_sk_charge *)cg;
+
+	if (psc) {
+		memcg_uncharge_kmem(psc->memcg, psc->amt);
+		css_put(mem_cgroup_css(psc->memcg));
+		kfree(psc);
+	}
+}
+#else
+static struct cg_proto *packet_sk_charge(void)
+{
+	return NULL;
+}
+
+static void packet_sk_uncharge(struct cg_proto *cg)
+{
+}
+#endif
+
 /*
  *	Close a PACKET socket. This is fairly simple. We immediately go
  *	to 'closed' state and remove our protocol entry in the device list.
@@ -2469,6 +2529,8 @@ static int packet_release(struct socket *sock)
 	}
 
 	fanout_release(sk);
+	packet_sk_uncharge(sk->sk_cgrp);
+	sk->sk_cgrp = NULL;
 
 	synchronize_net();
 	/*
@@ -2603,6 +2665,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct packet_sock *po;
+	struct cg_proto *cg;
 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
 	int err;
 
@@ -2613,11 +2676,14 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		return -ESOCKTNOSUPPORT;
 
 	sock->state = SS_UNCONNECTED;
+	cg = packet_sk_charge();
+	if (IS_ERR(cg))
+		goto out;
 
 	err = -ENOBUFS;
 	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
 	if (sk == NULL)
-		goto out;
+		goto outu;
 
 	sock->ops = &packet_ops;
 	if (sock->type == SOCK_PACKET)
@@ -2660,7 +2726,12 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 	sock_prot_inuse_add(net, &packet_proto, 1);
 	preempt_enable();
 
+	sk->sk_cgrp = cg;
+
 	return 0;
+
+outu:
+	packet_sk_uncharge(cg);
 out:
 	return err;
 }
-- 
1.8.3.1





More information about the Devel mailing list