[Devel] [PATCH 2/7] tcp: Charge socket buffers into cg memory (v2)
Pavel Emelyanov
xemul at parallels.com
Wed Jun 3 06:34:21 PDT 2015
TCP code already has internal memory management for both -- in
and out traffic. The outgoing packets are also already auto
accounted into kmem (and into cg memory), incoming traffic is
not accounted into kmem. And this management is already per-cg
thanks to Glauber work some time ago.
So TCP mm fix is -- take existing TCP mem accounting code and
add/sub those numbers into cg memory. To avoid double accounting
(via TCP hooks and via slub/buddy) the sk_allocation is set to
be __GFP_NOACCOUNT.
changes:
* renamed memcg_(unr)charge_kmem routines
Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
include/linux/memcontrol.h | 2 ++
include/net/sock.h | 2 ++
mm/memcontrol.c | 25 +++++++++++++++++++++++--
net/ipv4/tcp.c | 5 +++++
net/ipv4/tcp_input.c | 2 +-
net/ipv4/tcp_output.c | 6 +++---
6 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5507be5..7077044 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -470,6 +470,8 @@ struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr);
int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
+void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size);
+void __memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
/**
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
diff --git a/include/net/sock.h b/include/net/sock.h
index 0688f4e..2eb414f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1195,6 +1195,7 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
struct res_counter *fail;
int ret;
+ memcg_charge_kmem_nofail(prot->memcg, amt << PAGE_SHIFT);
ret = res_counter_charge_nofail(prot->memory_allocated,
amt << PAGE_SHIFT, &fail);
if (ret < 0)
@@ -1205,6 +1206,7 @@ static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
unsigned long amt)
{
res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
+ __memcg_uncharge_kmem(prot->memcg, amt << PAGE_SHIFT);
}
static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9dda309..d38868c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -498,7 +498,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
/* Writing them here to avoid exposing memcg's inner layout */
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
-
void sock_update_memcg(struct sock *sk)
{
if (mem_cgroup_sockets_enabled) {
@@ -3039,11 +3038,33 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
return ret;
}
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size)
{
+ struct res_counter *fail_res;
+
+ /*
+ * FIXME -- strictly speaking, this value should _also_
+ * be charged into kmem counter. But since res_counter_charge
+ * is sub-optimal (takes locks) AND we do not care much
+ * about kmem limits (at least for now) we can just directly
+ * charge into mem counter.
+ */
+ res_counter_charge_nofail(&memcg->res, size, &fail_res);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, size);
+}
+
+void __memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+{
+ /* FIXME -- uncharge also in kmem counter */
res_counter_uncharge(&memcg->res, size);
if (do_swap_account)
res_counter_uncharge(&memcg->memsw, size);
+}
+
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+{
+ __memcg_uncharge_kmem(memcg, size);
/* Not down to 0 */
if (res_counter_uncharge(&memcg->kmem, size))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e641406..8cbf0f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk)
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+ /*
+ * TCP memory is accounted via cg_proto and there's
+ * no need in additional kmem charging via slub
+ */
+ sk->sk_allocation |= __GFP_NOACCOUNT;
icsk->icsk_sync_mss = tcp_sync_mss;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e0a231e..fa94a5a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,7 +4541,7 @@ restart:
return;
if (end - start < copy)
copy = end - start;
- nskb = alloc_skb(copy + header, GFP_ATOMIC);
+ nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT);
if (!nskb)
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 13d440b..a217305 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
@@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (skb->len != skb->data_len)
return tcp_fragment(sk, skb, len, mss_now);
- buff = sk_stream_alloc_skb(sk, 0, gfp);
+ buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT);
if (unlikely(buff == NULL))
return -ENOMEM;
@@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk)
}
/* We're allowed to probe. Build it now. */
- if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
+ if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL)
return -1;
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
--
1.8.3.1
More information about the Devel
mailing list