[Devel] [PATCH 2/7] tcp: Charge socket buffers into cg memory

Vladimir Davydov vdavydov at parallels.com
Mon Jun 1 07:50:32 PDT 2015


On Fri, May 29, 2015 at 05:18:01PM +0300, Pavel Emelyanov wrote:
> TCP code already has internal memory management for both -- in
> and out traffic. The outgoing packets are also already auto
> accounted into kmem (and into cg memory), incoming traffic is
> not accounted into kmem. And this management is already per-cg
> thanks to Glauber work some time ago.
> 
> So TCP mm fix is -- take existing TCP mem accounting code and
> add/sub those numbers into cg memory. To avoid double accounting
> (via TCP hooks and via slub/buddy) the sk_allocation is set to
> be __GFP_NOACCOUNT.
> 
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
> 
> ---
>  include/linux/memcontrol.h |  3 +++
>  include/net/sock.h         |  2 ++
>  mm/memcontrol.c            | 21 ++++++++++++++++++++-
>  net/ipv4/tcp.c             |  5 +++++
>  net/ipv4/tcp_input.c       |  2 +-
>  net/ipv4/tcp_output.c      |  6 +++---
>  6 files changed, 34 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 5507be5..d250663 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -421,6 +421,9 @@ static inline void sock_release_memcg(struct sock *sk)
>  }
>  #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
>  
> +void memcg_charge_kmem_bytes(struct mem_cgroup *mcg, unsigned long bytes);
> +void memcg_uncharge_kmem_bytes(struct mem_cgroup *mcg, unsigned long bytes);
> +
>  #ifdef CONFIG_MEMCG_KMEM
>  extern struct static_key memcg_kmem_enabled_key;
>  
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 0688f4e..6adab3c 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1195,6 +1195,7 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
>  	struct res_counter *fail;
>  	int ret;
>  
> +	memcg_charge_kmem_bytes(prot->memcg, amt << PAGE_SHIFT);
>  	ret = res_counter_charge_nofail(prot->memory_allocated,
>  					amt << PAGE_SHIFT, &fail);
>  	if (ret < 0)
> @@ -1205,6 +1206,7 @@ static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
>  					      unsigned long amt)
>  {
>  	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
> +	memcg_uncharge_kmem_bytes(prot->memcg, amt << PAGE_SHIFT);
>  }
>  
>  static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9dda309..d8c305f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -496,9 +496,28 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>  	return (memcg == root_mem_cgroup);
>  }
>  
> +void memcg_charge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)

Please name this function memcg_charge_kmem_nofail. And use memcg for
instances of mem_cgroup, please. The code is already messy, we'd better
avoid making it even more inconsistent.

> +{
> +	struct res_counter *fail_res;
> +
> +	/*
> +	 * FIXME -- strictly speaking, this value should _also_
> +	 * be charged into kmem counter. But since res_counter_charge
> +	 * is sub-optimal (takes locks) AND we do not care much
> +	 * about kmem limits (at least for now) we can just directly
> +	 * charge into mem counter.
> +	 */
> +	res_counter_charge_nofail(&cg->res, bytes, &fail_res);

You must also charge cg->memsw if do_swap_account is true.

And I don't think charging kmem will make any difference. OTOH not
charging it may result in mem_cgroup_reparent_charges looping forever.

> +}
> +
> +void memcg_uncharge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)
> +{
> +	/* FIXME -- uncharge also in kmem counter */
> +	res_counter_uncharge(&cg->res, bytes);

You don't check if css reference should be dropped. Please use
memcg_uncharge_kmem, which does.

> +}
> +
>  /* Writing them here to avoid exposing memcg's inner layout */
>  #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
> -

Don't add extra hunks where it is not necessary, please.

>  void sock_update_memcg(struct sock *sk)
>  {
>  	if (mem_cgroup_sockets_enabled) {
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index e641406..8cbf0f5 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk)
>  
>  	sk->sk_write_space = sk_stream_write_space;
>  	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
> +	/*
> +	 * TCP memory is accounted via cg_proto and there's
> +	 * no need in additional kmem charging via slub
> +	 */
> +	sk->sk_allocation |= __GFP_NOACCOUNT;
>  
>  	icsk->icsk_sync_mss = tcp_sync_mss;
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index e0a231e..fa94a5a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4541,7 +4541,7 @@ restart:
>  			return;
>  		if (end - start < copy)
>  			copy = end - start;
> -		nskb = alloc_skb(copy + header, GFP_ATOMIC);
> +		nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT);
>  		if (!nskb)
>  			return;
>  
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 13d440b..a217305 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
>  		return -ENOMEM;
>  
>  	/* Get a new skb... force flag on. */
> -	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
> +	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT);
>  	if (buff == NULL)
>  		return -ENOMEM; /* We'll just try again later. */
>  
> @@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
>  	if (skb->len != skb->data_len)
>  		return tcp_fragment(sk, skb, len, mss_now);
>  
> -	buff = sk_stream_alloc_skb(sk, 0, gfp);
> +	buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT);
>  	if (unlikely(buff == NULL))
>  		return -ENOMEM;
>  
> @@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk)
>  	}
>  
>  	/* We're allowed to probe.  Build it now. */
> -	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
> +	if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL)
>  		return -1;
>  	sk->sk_wmem_queued += nskb->truesize;
>  	sk_mem_charge(sk, nskb->truesize);

What about tcp_send_ack, tcp_xmit_probe_skb, tcp_send_active_reset?



More information about the Devel mailing list