[Devel] [PATCH RHEL7 COMMIT] ploop: store exec_ub in ploop request and use it while processing requests

Konstantin Khorenko khorenko at virtuozzo.com
Mon Aug 27 17:52:55 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.11.6.vz7.71.4
------>
commit ae4abe579715359ec3f4de889b516e76f222e970
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date:   Thu Aug 2 17:11:17 2018 +0300

    ploop: store exec_ub in ploop request and use it while processing requests
    
    ploop_req_state_process() is intended to set up proper exec_ub
    while processing ploop requests, but it uses preq->ioc->ioc_ub which is
    almost always incorrect.
    
    ploop requests are created in kworker which is run in host:
    
    kworker/u16:5   319 [005]   841.012218:   probe:ploop_grab_iocontext: (ffffffffc0004542)
                        45b3 ploop_make_request (/lib/modules/3.10.0/kernel/drivers/block/ploop/ploop.ko)
                      5345bb generic_make_request ([kernel.kallsyms])
                      534843 submit_bio ([kernel.kallsyms])
                       12185 ext4_io_submit ([ext4])
                       123e9 ext4_bio_write_page ([ext4])
                        82bd mpage_submit_page ([ext4])
                        8568 mpage_map_and_submit_buffers ([ext4])
                        dff5 ext4_writepages ([ext4])
                      3b0221 do_writepages ([kernel.kallsyms])
                      460dba __writeback_single_inode ([kernel.kallsyms])
                      4613b5 writeback_sb_inodes ([kernel.kallsyms])
                      4617b2 __writeback_inodes_wb ([kernel.kallsyms])
                      461a43 wb_writeback ([kernel.kallsyms])
                      462376 bdi_writeback_workfn ([kernel.kallsyms])
                      2b11d5 process_one_work ([kernel.kallsyms])
                      2b237e worker_thread ([kernel.kallsyms])
                      2b9781 kthread ([kernel.kallsyms])
                      90ebf7 ret_from_fork ([kernel.kallsyms])
    
    "kworker/u16:5" has exec_ub == ub0
    and ploop_make_request() stores io_context from "current" == kworker in bio and
    sets flag BIO_BDEV_REUSED:
    
    queue:
            BUG_ON (bio->bi_bdev != plo->bdev && bio_sectors(bio));
            if (bio->bi_bdev == plo->bdev) {
                    BUG_ON (test_bit(BIO_BDEV_REUSED, &bio->bi_flags));
                    ploop_grab_iocontext(bio);
            }
    ==========
    static void ploop_grab_iocontext(struct bio *bio)
    {
            struct io_context **ioc_pp = (struct io_context **)(&bio->bi_bdev);
            if (current->io_context) {
                    ioc_task_link(current->io_context);
                    *ioc_pp = current->io_context;
    ==========
    Meanwhile ploop_thread() processes bios:
    
      ploop_thread
       process_pending_bios
        ploop_bio_queue
         get a free ploop request
         if BIO_BDEV_REUSED is set to bio, preq->ioc = saved io_context (with ub0)
    
    ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
                    struct list_head *drop_list, int account_blockable)
    {
            if (test_bit(BIO_BDEV_REUSED, &bio->bi_flags)) {
                        preq->ioc = (struct io_context *)(bio->bi_bdev);
                        bio->bi_bdev = plo->bdev;
                        clear_bit(BIO_BDEV_REUSED, &bio->bi_flags);
    
    => now preq->ioc contains exec_ub == ub0
    later ploop_thread() -> ploop_req_state_process() executes
    set_exec_ub(preq->ioc->ioc_ub)          // == ub0
    
    This patch adds preq::preq_ub field to "ploop_request" and sets it up properly
    on ploop request creation.
    
    Q: Why do we have proper current->exec_ub on ploop request creation time?
    A: See calltrace above - it goes through __writeback_single_inode which
       sets proper exec_ub.
    
    __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
    {
    ...
            ub = rcu_dereference(inode->i_mapping->dirtied_ub);
    ...
            ub = set_exec_ub(ub);
            ret = __do_writeback_single_inode(inode, wbc);
            put_beancounter(set_exec_ub(ub));
    ...
    
    https://jira.sw.ru/browse/PSBM-86910
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    v2 changes:
     - get correct ub in ploop_req_state_process()
---
 drivers/block/ploop/dev.c   | 45 ++++++++++++++++++++++++++++++++++++++-------
 include/linux/ploop/ploop.h |  2 ++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index c6094144c7a5..a71f28476eca 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -22,6 +22,7 @@
 
 #include <trace/events/block.h>
 
+#include <bc/beancounter.h>
 #include <linux/ploop/ploop.h>
 #include "ploop_events.h"
 #include "freeblks.h"
@@ -241,6 +242,10 @@ void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
 			put_io_context_active(preq->ioc);
 			preq->ioc = NULL;
 		}
+#ifdef CONFIG_BEANCOUNTERS
+		put_beancounter(preq->preq_ub);
+		preq->preq_ub = NULL;
+#endif
 
 		BUG_ON (test_bit(PLOOP_REQ_ZERO, &preq->state));
 		ploop_test_and_clear_blockable(plo, preq);
@@ -562,6 +567,14 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
 		preq->ioc = NULL;
 	}
 
+#ifdef CONFIG_BEANCOUNTERS
+	/* Proper exec_ub is set by __writeback_single_inode().
+	 * preq->ioc->ioc_ub does not fit here because we may be in "kworker"
+	 * and preq->ioc is a link to current->io_context.
+	 */
+	preq->preq_ub = get_beancounter(get_exec_ub());
+#endif
+
 	if (unlikely(bio->bi_rw & REQ_SYNC))
 		__set_bit(PLOOP_REQ_SYNC, &preq->state);
 	if (unlikely(bio == plo->bio_sync)) {
@@ -1324,6 +1337,7 @@ static void ploop_complete_request(struct ploop_request * preq)
 	struct ploop_device * plo = preq->plo;
 	int nr_completed = 0;
 	struct io_context *ioc;
+	struct user_beancounter *ub;
 
 	trace_complete_request(preq);
 
@@ -1417,6 +1431,10 @@ static void ploop_complete_request(struct ploop_request * preq)
 
 	ioc = preq->ioc;
 	preq->ioc = NULL;
+#ifdef CONFIG_BEANCOUNTERS
+	ub = preq->preq_ub;
+	preq->preq_ub = NULL;
+#endif
 
 	plo->active_reqs--;
 
@@ -1451,6 +1469,9 @@ static void ploop_complete_request(struct ploop_request * preq)
 		atomic_dec(&ioc->nr_tasks);
 		put_io_context_active(ioc);
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	put_beancounter(ub);
+#endif
 }
 
 void ploop_fail_request(struct ploop_request * preq, int err)
@@ -2513,7 +2534,7 @@ static void ploop_req_state_process(struct ploop_request * preq)
 	struct io_context * saved_ioc = NULL;
 	int release_ioc = 0;
 #ifdef CONFIG_BEANCOUNTERS
-	struct user_beancounter * uninitialized_var(saved_ub);
+	struct user_beancounter *saved_ub;
 #endif
 
 	trace_req_state_process(preq);
@@ -2521,12 +2542,13 @@ static void ploop_req_state_process(struct ploop_request * preq)
 	if (preq->ioc) {
 		saved_ioc = current->io_context;
 		current->io_context = preq->ioc;
-#ifdef CONFIG_BEANCOUNTERS
-		saved_ub = set_exec_ub(preq->ioc->ioc_ub);
-#endif
 		atomic_long_inc(&preq->ioc->refcount);
 		release_ioc = 1;
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	get_beancounter(preq->preq_ub);
+	saved_ub = set_exec_ub(preq->preq_ub);
+#endif
 
 	if (preq->eng_state != PLOOP_E_COMPLETE &&
 	    test_bit(PLOOP_REQ_SYNC, &preq->state))
@@ -2586,6 +2608,10 @@ static void ploop_req_state_process(struct ploop_request * preq)
 			preq->eng_state = PLOOP_E_COMPLETE;
 			preq->error = -EOPNOTSUPP;
 			ploop_complete_io_state(preq);
+#ifdef CONFIG_BEANCOUNTERS
+			saved_ub = set_exec_ub(saved_ub);
+			put_beancounter(saved_ub);
+#endif
 			return;
 		}
 
@@ -2861,11 +2887,12 @@ static void ploop_req_state_process(struct ploop_request * preq)
 	if (release_ioc) {
 		struct io_context * ioc = current->io_context;
 		current->io_context = saved_ioc;
-#ifdef CONFIG_BEANCOUNTERS
-		set_exec_ub(saved_ub);
-#endif
 		put_io_context(ioc);
 	}
+#ifdef CONFIG_BEANCOUNTERS
+	saved_ub = set_exec_ub(saved_ub);
+	put_beancounter(saved_ub);
+#endif
 }
 
 static void ploop_wait(struct ploop_device * plo, int once, struct blk_plug *plug)
@@ -3372,6 +3399,7 @@ void ploop_quiesce(struct ploop_device * plo)
 	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_BARRIER);
 	preq->error = 0;
 	preq->tstamp = jiffies;
+	preq->preq_ub = get_beancounter(get_exec_ub());
 
 	init_completion(&qcomp);
 	init_completion(&plo->relax_comp);
@@ -3669,6 +3697,7 @@ static void ploop_merge_process(struct ploop_device * plo)
 		preq->tstamp = jiffies;
 		preq->iblock = 0;
 		preq->prealloc_size = 0;
+		preq->preq_ub = get_beancounter(get_exec_ub());
 
 		atomic_inc(&plo->maintenance_cnt);
 
@@ -4292,6 +4321,7 @@ static void ploop_relocate(struct ploop_device * plo, int grow_stage)
 	preq->tstamp = jiffies;
 	preq->iblock = (reloc_type == PLOOP_REQ_RELOC_A) ? 0 : plo->grow_start;
 	preq->prealloc_size = 0;
+	preq->preq_ub = get_beancounter(get_exec_ub());
 
 	atomic_inc(&plo->maintenance_cnt);
 
@@ -4617,6 +4647,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
 		preq->tstamp = jiffies;
 		preq->iblock = 0;
 		preq->prealloc_size = 0;
+		preq->preq_ub = get_beancounter(get_exec_ub());
 
 		atomic_inc(&plo->maintenance_cnt);
 
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index fd683f79956d..fbab9fbeb9ef 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -618,6 +618,8 @@ struct ploop_request
 	/* if the engine starts operation on particular io, let's finish
 	 * the operation on the same io (see io.ops->post_submit) */
 	struct ploop_io	       *eng_io;
+
+	struct user_beancounter *preq_ub;
 };
 
 static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)


More information about the Devel mailing list