[Devel] [PATCH RHEL7 COMMIT] tcp: Charge socket buffers into cg memory (v3)

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jun 5 12:55:35 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.10
------>
commit b89a0b604eaf92c97ea922a88ccd758bb73fe99b
Author: Pavel Emelyanov <xemul at parallels.com>
Date:   Fri Jun 5 23:55:35 2015 +0400

    tcp: Charge socket buffers into cg memory (v3)
    
    TCP code already has internal memory management for both -- in
    and out traffic. The outgoing packets are also already auto
    accounted into kmem (and into cg memory), incoming traffic is
    not accounted into kmem. And this management is already per-cg
    thanks to Glauber work some time ago.
    
    So TCP mm fix is -- take existing TCP mem accounting code and
    add/sub those numbers into cg memory. To avoid double accounting
    (via TCP hooks and via slub/buddy) the sk_allocation is set to
    be __GFP_NOACCOUNT.
    
    changes:
    * v2: renamed memcg_(unr)charge_kmem routines
    * v3: charge socket mem into kmem too
    
    https://jira.sw.ru/browse/PSBM-33584
    
    khorenko@ changes:
    - make memcg_uncharge_kmem() global, it's used outside of memcontrol.c
    
    Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
 include/linux/memcontrol.h |  3 +++
 include/net/sock.h         |  2 ++
 mm/memcontrol.c            | 12 +++++++++++-
 net/ipv4/tcp.c             |  5 +++++
 net/ipv4/tcp_input.c       |  2 +-
 net/ipv4/tcp_output.c      |  6 +++---
 6 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d434d6c..2169fcf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -487,6 +487,9 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
 
+void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size);
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/include/net/sock.h b/include/net/sock.h
index 0688f4e..d6b7ba2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1195,6 +1195,7 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 	struct res_counter *fail;
 	int ret;
 
+	memcg_charge_kmem_nofail(prot->memcg, amt << PAGE_SHIFT);
 	ret = res_counter_charge_nofail(prot->memory_allocated,
 					amt << PAGE_SHIFT, &fail);
 	if (ret < 0)
@@ -1205,6 +1206,7 @@ static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
 					      unsigned long amt)
 {
 	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
+	memcg_uncharge_kmem(prot->memcg, amt << PAGE_SHIFT);
 }
 
 static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9292893..99eb092 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3214,7 +3214,17 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	return ret;
 }
 
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size)
+{
+	struct res_counter *fail_res;
+
+	res_counter_charge_nofail(&memcg->kmem, size, &fail_res);
+	res_counter_charge_nofail(&memcg->res, size, &fail_res);
+	if (do_swap_account)
+		res_counter_uncharge(&memcg->memsw, size);
+}
+
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 {
 	res_counter_uncharge(&memcg->res, size);
 	if (do_swap_account)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e641406..8cbf0f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk)
 
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+	/*
+	 * TCP memory is accounted via cg_proto and there's
+	 * no need in additional kmem charging via slub
+	 */
+	sk->sk_allocation |= __GFP_NOACCOUNT;
 
 	icsk->icsk_sync_mss = tcp_sync_mss;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e0a231e..fa94a5a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,7 +4541,7 @@ restart:
 			return;
 		if (end - start < copy)
 			copy = end - start;
-		nskb = alloc_skb(copy + header, GFP_ATOMIC);
+		nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT);
 		if (!nskb)
 			return;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 13d440b..a217305 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 		return -ENOMEM;
 
 	/* Get a new skb... force flag on. */
-	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
 
@@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	if (skb->len != skb->data_len)
 		return tcp_fragment(sk, skb, len, mss_now);
 
-	buff = sk_stream_alloc_skb(sk, 0, gfp);
+	buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT);
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
@@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	}
 
 	/* We're allowed to probe.  Build it now. */
-	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
+	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL)
 		return -1;
 	sk->sk_wmem_queued += nskb->truesize;
 	sk_mem_charge(sk, nskb->truesize);



More information about the Devel mailing list