[Devel] [PATCH rh7] net: packet: rework rx/tx ring pages accounting

Vladimir Davydov vdavydov at virtuozzo.com
Thu Jun 2 07:10:36 PDT 2016


To account tx/rx ring pages to kmemcg, we allocate them with
__GFP_ACCOUNT. After commit 1265d3474391 ("mm: charge/uncharge kmemcg
from generic page allocator paths") this implies that these pages have
PAGE_KMEMCG_MAPCOUNT_VALUE stored in page->_mapcount. This is incorrect
as these pages are supposed to be mapped to userspace:

  BUG: Bad page map in process packet_sock_mma  pte:8000000241837025 pmd:2428aa067
  page:ffffea0009060dc0 count:2 mapcount:-255 mapping:          (null) index:0x0
  page flags: 0x2fffff00000004(referenced)
  page dumped because: bad pte
  addr:00007f16c9a8c000 vm_flags:18100073 anon_vma:          (null) mapping:ffff880210caed80 index:0
  vma->vm_ops->fault:           (null)
  vma->vm_file->f_op->mmap: sock_mmap+0x0/0x20
  CPU: 2 PID: 6141 Comm: packet_sock_mma ve: e7eccd35-3ea1-4dc1-9a04-dba948120299 Not tainted 3.10.0-327.18.2.vz7.14.10 #1 14.10
  Hardware name: DEPO Computers To Be Filled By O.E.M./H67DE3, BIOS L1.60c 07/14/2011
  ffffea0009060dc0 000000007be30e48 ffff88024235ba68 ffffffff81633548
  ffff88024235bab0 ffffffff811a908f 8000000241837025 0000000000000000
  ffff8802428aa460 ffffea0009060dc0 00007f16c9a8c000 ffff88024235bc20
  Call Trace:
  [<ffffffff81633548>] dump_stack+0x19/0x1b
  [<ffffffff811a908f>] print_bad_pte+0x1af/0x250
  [<ffffffff811aabeb>] unmap_page_range+0x76b/0x870
  [<ffffffff811aad71>] unmap_single_vma+0x81/0xf0
  [<ffffffff811ac1d9>] unmap_vmas+0x49/0x90
  [<ffffffff811b59dc>] exit_mmap+0xac/0x1a0
  [<ffffffff810784db>] mmput+0x6b/0x140
  [<ffffffff81081d5c>] do_exit+0x2ac/0xb10
  [<ffffffff812f5a36>] ? plist_del+0x46/0x70
  [<ffffffff810f28c2>] ? __unqueue_futex+0x32/0x70
  [<ffffffff810f391d>] ? futex_wait+0x11d/0x280
  [<ffffffff8108263f>] do_group_exit+0x3f/0xa0
  [<ffffffff81093980>] get_signal_to_deliver+0x1d0/0x6d0
  [<ffffffff81014367>] do_signal+0x57/0x6c0
  [<ffffffff810f5aeb>] ? do_futex+0x15b/0x600
  [<ffffffff81014a2f>] do_notify_resume+0x5f/0xb0
  [<ffffffff816440fd>] int_signal+0x12/0x17

To fix that, let's charge these pages directly using memcg_charge_kmem()
to the cgroup the packet socket is accounted to (via ->sk_cgrp).

https://jira.sw.ru/browse/PSBM-47873

Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 net/packet/af_packet.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ecb5464c5622..2a1b15a85928 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3712,7 +3712,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
 static char *alloc_one_pg_vec_page(unsigned long order)
 {
 	char *buffer = NULL;
-	gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_COMP |
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
 			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
 
 	buffer = (char *) __get_free_pages(gfp_flags, order);
@@ -3723,7 +3723,7 @@ static char *alloc_one_pg_vec_page(unsigned long order)
 	/*
 	 * __get_free_pages failed, fall back to vmalloc
 	 */
-	buffer = vzalloc_account((1 << order) * PAGE_SIZE);
+	buffer = vzalloc((1 << order) * PAGE_SIZE);
 
 	if (buffer)
 		return buffer;
@@ -3770,6 +3770,7 @@ out_free_pgvec:
 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		int closing, int tx_ring)
 {
+	struct packet_sk_charge *psc = (struct packet_sk_charge *)sk->sk_cgrp;
 	struct pgv *pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
 	int was_running, order = 0;
@@ -3839,9 +3840,16 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 		err = -ENOMEM;
 		order = get_order(req->tp_block_size);
+		if (psc && memcg_charge_kmem(psc->memcg, GFP_KERNEL,
+				(PAGE_SIZE << order) * req->tp_block_nr))
+			goto out;
 		pg_vec = alloc_pg_vec(req, order);
-		if (unlikely(!pg_vec))
+		if (unlikely(!pg_vec)) {
+			if (psc)
+				memcg_uncharge_kmem(psc->memcg,
+					(PAGE_SIZE << order) * req->tp_block_nr);
 			goto out;
+		}
 		switch (po->tp_version) {
 		case TPACKET_V3:
 		/* Transmit path is not supported. We checked
@@ -3912,8 +3920,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 	release_sock(sk);
 
-	if (pg_vec)
+	if (pg_vec) {
+		if (psc)
+			memcg_uncharge_kmem(psc->memcg,
+				(PAGE_SIZE << order) * req->tp_block_nr);
 		free_pg_vec(pg_vec, order, req->tp_block_nr);
+	}
 out:
 	return err;
 }
-- 
2.1.4



More information about the Devel mailing list