[Devel] [PATCH RHEL7 COMMIT] net: packet: rework rx/tx ring pages accounting

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jun 2 08:06:29 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.10
------>
commit 2edb1b3f01b5445d2a7a4fa951cad7f3e1d8a485
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Thu Jun 2 19:06:29 2016 +0400

    net: packet: rework rx/tx ring pages accounting
    
    To account tx/rx ring pages to kmemcg, we allocate them with
    __GFP_ACCOUNT. After commit 1265d3474391 ("mm: charge/uncharge kmemcg
    from generic page allocator paths") this implies that these pages have
    PAGE_KMEMCG_MAPCOUNT_VALUE stored in page->_mapcount. This is incorrect
    as these pages are supposed to be mapped to userspace:
    
      BUG: Bad page map in process packet_sock_mma  pte:8000000241837025 pmd:2428aa067
      page:ffffea0009060dc0 count:2 mapcount:-255 mapping:          (null) index:0x0
      page flags: 0x2fffff00000004(referenced)
      page dumped because: bad pte
      addr:00007f16c9a8c000 vm_flags:18100073 anon_vma:          (null) mapping:ffff880210caed80 index:0
      vma->vm_ops->fault:           (null)
      vma->vm_file->f_op->mmap: sock_mmap+0x0/0x20
      CPU: 2 PID: 6141 Comm: packet_sock_mma ve: e7eccd35-3ea1-4dc1-9a04-dba948120299 Not tainted 3.10.0-327.18.2.vz7.14.10 #1 14.10
      Hardware name: DEPO Computers To Be Filled By O.E.M./H67DE3, BIOS L1.60c 07/14/2011
      ffffea0009060dc0 000000007be30e48 ffff88024235ba68 ffffffff81633548
      ffff88024235bab0 ffffffff811a908f 8000000241837025 0000000000000000
      ffff8802428aa460 ffffea0009060dc0 00007f16c9a8c000 ffff88024235bc20
      Call Trace:
      [<ffffffff81633548>] dump_stack+0x19/0x1b
      [<ffffffff811a908f>] print_bad_pte+0x1af/0x250
      [<ffffffff811aabeb>] unmap_page_range+0x76b/0x870
      [<ffffffff811aad71>] unmap_single_vma+0x81/0xf0
      [<ffffffff811ac1d9>] unmap_vmas+0x49/0x90
      [<ffffffff811b59dc>] exit_mmap+0xac/0x1a0
      [<ffffffff810784db>] mmput+0x6b/0x140
      [<ffffffff81081d5c>] do_exit+0x2ac/0xb10
      [<ffffffff812f5a36>] ? plist_del+0x46/0x70
      [<ffffffff810f28c2>] ? __unqueue_futex+0x32/0x70
      [<ffffffff810f391d>] ? futex_wait+0x11d/0x280
      [<ffffffff8108263f>] do_group_exit+0x3f/0xa0
      [<ffffffff81093980>] get_signal_to_deliver+0x1d0/0x6d0
      [<ffffffff81014367>] do_signal+0x57/0x6c0
      [<ffffffff810f5aeb>] ? do_futex+0x15b/0x600
      [<ffffffff81014a2f>] do_notify_resume+0x5f/0xb0
      [<ffffffff816440fd>] int_signal+0x12/0x17
    
    To fix that, let's charge these pages directly using memcg_charge_kmem()
    to the cgroup the packet socket is accounted to (via ->sk_cgrp).
    
    https://jira.sw.ru/browse/PSBM-47873
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 net/packet/af_packet.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ecb5464..2a1b15a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3712,7 +3712,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
 static char *alloc_one_pg_vec_page(unsigned long order)
 {
 	char *buffer = NULL;
-	gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_COMP |
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
 			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
 
 	buffer = (char *) __get_free_pages(gfp_flags, order);
@@ -3723,7 +3723,7 @@ static char *alloc_one_pg_vec_page(unsigned long order)
 	/*
 	 * __get_free_pages failed, fall back to vmalloc
 	 */
-	buffer = vzalloc_account((1 << order) * PAGE_SIZE);
+	buffer = vzalloc((1 << order) * PAGE_SIZE);
 
 	if (buffer)
 		return buffer;
@@ -3770,6 +3770,7 @@ out_free_pgvec:
 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		int closing, int tx_ring)
 {
+	struct packet_sk_charge *psc = (struct packet_sk_charge *)sk->sk_cgrp;
 	struct pgv *pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
 	int was_running, order = 0;
@@ -3839,9 +3840,16 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 		err = -ENOMEM;
 		order = get_order(req->tp_block_size);
+		if (psc && memcg_charge_kmem(psc->memcg, GFP_KERNEL,
+				(PAGE_SIZE << order) * req->tp_block_nr))
+			goto out;
 		pg_vec = alloc_pg_vec(req, order);
-		if (unlikely(!pg_vec))
+		if (unlikely(!pg_vec)) {
+			if (psc)
+				memcg_uncharge_kmem(psc->memcg,
+					(PAGE_SIZE << order) * req->tp_block_nr);
 			goto out;
+		}
 		switch (po->tp_version) {
 		case TPACKET_V3:
 		/* Transmit path is not supported. We checked
@@ -3912,8 +3920,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 	release_sock(sk);
 
-	if (pg_vec)
+	if (pg_vec) {
+		if (psc)
+			memcg_uncharge_kmem(psc->memcg,
+				(PAGE_SIZE << order) * req->tp_block_nr);
 		free_pg_vec(pg_vec, order, req->tp_block_nr);
+	}
 out:
 	return err;
 }


More information about the Devel mailing list