[Devel] [PATCH RHEL7 COMMIT] net/packet: Pre-account maximum socket buffer into cg memory (v3)

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jun 5 12:56:12 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.10
------>
commit b3b3c0a723404801036b9e936f19d0667b105e8c
Author: Pavel Emelyanov <xemul at parallels.com>
Date:   Fri Jun 5 23:56:12 2015 +0400

    net/packet: Pre-account maximum socket buffer into cg memory (v3)
    
    Packet sockets have incoming queue of packets that is only limited
    with per-socket wmem buffer. Strictly speaking we should sum up
    all the queues and charge them into kmem once new packet arrives,
    but this will result in huge patch. Since there's typically quite
    a few of packet sockets in container (tcpdump) we can just forward
    charge the maximum socket rmem size into cg memory upon socket
    creation.
    
    changes:
    * v2: renamed sock_get_current_memcg
    * removed sock_put_memcg
    * fixed stub prototype
    * removed lost rcu_read_unlock
    * v3: kmem charge/uncharge routines name changed in patch #2
    
    https://jira.sw.ru/browse/PSBM-33584
    
    Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c            | 14 +++++++++
 net/packet/af_packet.c     | 73 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2169fcf..2e2bb2a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -82,6 +82,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
+extern struct mem_cgroup *try_get_mem_cgroup_from_current(void);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d3a733..f570e7c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -554,6 +554,20 @@ void sock_update_memcg(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
+struct mem_cgroup *try_get_mem_cgroup_from_current(void)
+{
+	struct mem_cgroup *cg;
+
+	rcu_read_lock();
+	cg = mem_cgroup_from_task(current);
+	if (mem_cgroup_is_root(cg) || !css_tryget(&cg->css))
+		cg = NULL;
+	rcu_read_unlock();
+
+	return cg;
+}
+EXPORT_SYMBOL(try_get_mem_cgroup_from_current);
+
 void sock_release_memcg(struct sock *sk)
 {
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e8b5a0d..b9f09eb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2420,6 +2420,66 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return packet_snd(sock, msg, len);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+struct packet_sk_charge {
+	struct mem_cgroup	*memcg;
+	unsigned long		amt;
+};
+
+static struct cg_proto *packet_sk_charge(void)
+{
+	struct packet_sk_charge *psc;
+
+	if (!mem_cgroup_sockets_enabled)
+		return NULL;
+
+	psc = kmalloc(sizeof(*psc), GFP_KERNEL);
+	if (!psc)
+		return ERR_PTR(-ENOMEM);
+
+	psc->memcg = try_get_mem_cgroup_from_current();
+	if (psc->memcg) {
+		/*
+		 * Forcedly charge the maximum amount of data this socket
+		 * may have. It's typically not huge and packet sockets are
+		 * rare guests in containers, so we don't disturb the memory
+		 * consumption much.
+		 */
+		psc->amt = sysctl_rmem_max;
+		memcg_charge_kmem_nofail(psc->memcg, psc->amt);
+	} else {
+		kfree(psc);
+		psc = NULL;
+	}
+
+	/*
+	 * The sk->sk_cgrp is not used for packet sockets,
+	 * so we'll just put the smaller structure into it.
+	 */
+	return (struct cg_proto *)psc;
+}
+
+static void packet_sk_uncharge(struct cg_proto *cg)
+{
+	struct packet_sk_charge *psc = (struct packet_sk_charge *)cg;
+
+	if (psc) {
+		memcg_uncharge_kmem(psc->memcg, psc->amt);
+		css_put(mem_cgroup_css(psc->memcg));
+		kfree(psc);
+	}
+}
+#else
+static struct cg_proto *packet_sk_charge(void)
+{
+	return NULL;
+}
+
+static void packet_sk_uncharge(struct cg_proto *cg)
+{
+}
+#endif
+
 /*
  *	Close a PACKET socket. This is fairly simple. We immediately go
  *	to 'closed' state and remove our protocol entry in the device list.
@@ -2469,6 +2529,8 @@ static int packet_release(struct socket *sock)
 	}
 
 	fanout_release(sk);
+	packet_sk_uncharge(sk->sk_cgrp);
+	sk->sk_cgrp = NULL;
 
 	synchronize_net();
 	/*
@@ -2603,6 +2665,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct packet_sock *po;
+	struct cg_proto *cg;
 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
 	int err;
 
@@ -2613,11 +2676,14 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		return -ESOCKTNOSUPPORT;
 
 	sock->state = SS_UNCONNECTED;
+	cg = packet_sk_charge();
+	if (IS_ERR(cg))
+		goto out;
 
 	err = -ENOBUFS;
 	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
 	if (sk == NULL)
-		goto out;
+		goto outu;
 
 	sock->ops = &packet_ops;
 	if (sock->type == SOCK_PACKET)
@@ -2660,7 +2726,12 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 	sock_prot_inuse_add(net, &packet_proto, 1);
 	preempt_enable();
 
+	sk->sk_cgrp = cg;
+
 	return 0;
+
+outu:
+	packet_sk_uncharge(cg);
 out:
 	return err;
 }



More information about the Devel mailing list