[Devel] [PATCH vz7 1/2] ploop: store exec_ub in ploop request and use it while processing requests

Kirill Tkhai ktkhai at virtuozzo.com
Fri Aug 3 15:12:16 MSK 2018


Good job, Konstantin!

Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>

On 02.08.2018 17:11, Konstantin Khorenko wrote:
> ploop_req_state_process() is intended to set up proper exec_ub
> while processing ploop requests, but it uses preq->ioc->ioc_ub which is
> almost always incorrect.
> 
> ploop requests are created in kworker which is run in host:
> 
> kworker/u16:5   319 [005]   841.012218:   probe:ploop_grab_iocontext: (ffffffffc0004542)
>                     45b3 ploop_make_request (/lib/modules/3.10.0/kernel/drivers/block/ploop/ploop.ko)
>                   5345bb generic_make_request ([kernel.kallsyms])
>                   534843 submit_bio ([kernel.kallsyms])
>                    12185 ext4_io_submit ([ext4])
>                    123e9 ext4_bio_write_page ([ext4])
>                     82bd mpage_submit_page ([ext4])
>                     8568 mpage_map_and_submit_buffers ([ext4])
>                     dff5 ext4_writepages ([ext4])
>                   3b0221 do_writepages ([kernel.kallsyms])
>                   460dba __writeback_single_inode ([kernel.kallsyms])
>                   4613b5 writeback_sb_inodes ([kernel.kallsyms])
>                   4617b2 __writeback_inodes_wb ([kernel.kallsyms])
>                   461a43 wb_writeback ([kernel.kallsyms])
>                   462376 bdi_writeback_workfn ([kernel.kallsyms])
>                   2b11d5 process_one_work ([kernel.kallsyms])
>                   2b237e worker_thread ([kernel.kallsyms])
>                   2b9781 kthread ([kernel.kallsyms])
>                   90ebf7 ret_from_fork ([kernel.kallsyms])
> 
> "kworker/u16:5" has exec_ub == ub0
> and ploop_make_request() stores io_context from "current" == kworker in bio and
> sets flag BIO_BDEV_REUSED:
> 
> queue:
>         BUG_ON (bio->bi_bdev != plo->bdev && bio_sectors(bio));
>         if (bio->bi_bdev == plo->bdev) {
>                 BUG_ON (test_bit(BIO_BDEV_REUSED, &bio->bi_flags));
>                 ploop_grab_iocontext(bio);
>         }
> ==========
> static void ploop_grab_iocontext(struct bio *bio)
> {
>         struct io_context **ioc_pp = (struct io_context **)(&bio->bi_bdev);
>         if (current->io_context) {
>                 ioc_task_link(current->io_context);
>                 *ioc_pp = current->io_context;
> ==========
> Meanwhile ploop_thread() processes bios:
> 
>   ploop_thread
>    process_pending_bios
>     ploop_bio_queue
>      get a free ploop request
>      if BIO_BDEV_REUSED is set to bio, preq->ioc = saved io_context (with ub0)
> 
> ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
>                 struct list_head *drop_list, int account_blockable)
> {
>         if (test_bit(BIO_BDEV_REUSED, &bio->bi_flags)) {
>                     preq->ioc = (struct io_context *)(bio->bi_bdev);
>                     bio->bi_bdev = plo->bdev;
>                     clear_bit(BIO_BDEV_REUSED, &bio->bi_flags);
> 
> => now preq->ioc contains exec_ub == ub0
> later ploop_thread() -> ploop_req_state_process() executes
> set_exec_ub(preq->ioc->ioc_ub)		// == ub0
> 
> This patch adds preq::preq_ub field to "ploop_request" and sets it up properly
> on ploop request creation.
> 
> Q: Why do we have proper current->exec_ub on ploop request creation time?
> A: See calltrace above - it goes through __writeback_single_inode which
>    sets proper exec_ub.
> 
> __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
> {
> ...
>         ub = rcu_dereference(inode->i_mapping->dirtied_ub);
> ...
>         ub = set_exec_ub(ub);
>         ret = __do_writeback_single_inode(inode, wbc);
>         put_beancounter(set_exec_ub(ub));
> ...
> 
> https://jira.sw.ru/browse/PSBM-86910
> 
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c   | 44 +++++++++++++++++++++++++++++++++++++-------
>  include/linux/ploop/ploop.h |  2 ++
>  2 files changed, 39 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 533ddc965af2..2b69580da32b 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -22,6 +22,7 @@
>  
>  #include <trace/events/block.h>
>  
> +#include <bc/beancounter.h>
>  #include <linux/ploop/ploop.h>
>  #include "ploop_events.h"
>  #include "freeblks.h"
> @@ -241,6 +242,10 @@ void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
>  			put_io_context_active(preq->ioc);
>  			preq->ioc = NULL;
>  		}
> +#ifdef CONFIG_BEANCOUNTERS
> +		put_beancounter(preq->preq_ub);
> +		preq->preq_ub = NULL;
> +#endif
>  
>  		BUG_ON (test_bit(PLOOP_REQ_ZERO, &preq->state));
>  		ploop_test_and_clear_blockable(plo, preq);
> @@ -562,6 +567,14 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
>  		preq->ioc = NULL;
>  	}
>  
> +#ifdef CONFIG_BEANCOUNTERS
> +	/* Proper exec_ub is set by __writeback_single_inode().
> +	 * preq->ioc->ioc_ub does not fit here because we may be in "kworker"
> +	 * and preq->ioc is a link to current->io_context.
> +	 */
> +	preq->preq_ub = get_beancounter(get_exec_ub());
> +#endif
> +
>  	if (unlikely(bio->bi_rw & REQ_SYNC))
>  		__set_bit(PLOOP_REQ_SYNC, &preq->state);
>  	if (unlikely(bio == plo->bio_sync)) {
> @@ -1324,6 +1337,7 @@ static void ploop_complete_request(struct ploop_request * preq)
>  	struct ploop_device * plo = preq->plo;
>  	int nr_completed = 0;
>  	struct io_context *ioc;
> +	struct user_beancounter *ub;
>  
>  	trace_complete_request(preq);
>  
> @@ -1417,6 +1431,10 @@ static void ploop_complete_request(struct ploop_request * preq)
>  
>  	ioc = preq->ioc;
>  	preq->ioc = NULL;
> +#ifdef CONFIG_BEANCOUNTERS
> +	ub = preq->preq_ub;
> +	preq->preq_ub = NULL;
> +#endif
>  
>  	plo->active_reqs--;
>  
> @@ -1451,6 +1469,9 @@ static void ploop_complete_request(struct ploop_request * preq)
>  		atomic_dec(&ioc->nr_tasks);
>  		put_io_context_active(ioc);
>  	}
> +#ifdef CONFIG_BEANCOUNTERS
> +	put_beancounter(ub);
> +#endif
>  }
>  
>  void ploop_fail_request(struct ploop_request * preq, int err)
> @@ -2513,7 +2534,7 @@ static void ploop_req_state_process(struct ploop_request * preq)
>  	struct io_context * saved_ioc = NULL;
>  	int release_ioc = 0;
>  #ifdef CONFIG_BEANCOUNTERS
> -	struct user_beancounter * uninitialized_var(saved_ub);
> +	struct user_beancounter *saved_ub;
>  #endif
>  
>  	trace_req_state_process(preq);
> @@ -2521,12 +2542,12 @@ static void ploop_req_state_process(struct ploop_request * preq)
>  	if (preq->ioc) {
>  		saved_ioc = current->io_context;
>  		current->io_context = preq->ioc;
> -#ifdef CONFIG_BEANCOUNTERS
> -		saved_ub = set_exec_ub(preq->ioc->ioc_ub);
> -#endif
>  		atomic_long_inc(&preq->ioc->refcount);
>  		release_ioc = 1;
>  	}
> +#ifdef CONFIG_BEANCOUNTERS
> +	saved_ub = get_beancounter(set_exec_ub(preq->preq_ub));
> +#endif
>  
>  	if (preq->eng_state != PLOOP_E_COMPLETE &&
>  	    test_bit(PLOOP_REQ_SYNC, &preq->state))
> @@ -2586,6 +2607,10 @@ static void ploop_req_state_process(struct ploop_request * preq)
>  			preq->eng_state = PLOOP_E_COMPLETE;
>  			preq->error = -EOPNOTSUPP;
>  			ploop_complete_io_state(preq);
> +#ifdef CONFIG_BEANCOUNTERS
> +			saved_ub = set_exec_ub(saved_ub);
> +			put_beancounter(saved_ub);
> +#endif
>  			return;
>  		}
>  
> @@ -2861,11 +2886,12 @@ static void ploop_req_state_process(struct ploop_request * preq)
>  	if (release_ioc) {
>  		struct io_context * ioc = current->io_context;
>  		current->io_context = saved_ioc;
> -#ifdef CONFIG_BEANCOUNTERS
> -		set_exec_ub(saved_ub);
> -#endif
>  		put_io_context(ioc);
>  	}
> +#ifdef CONFIG_BEANCOUNTERS
> +	saved_ub = set_exec_ub(saved_ub);
> +	put_beancounter(saved_ub);
> +#endif
>  }
>  
>  static void ploop_wait(struct ploop_device * plo, int once, struct blk_plug *plug)
> @@ -3372,6 +3398,7 @@ void ploop_quiesce(struct ploop_device * plo)
>  	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_BARRIER);
>  	preq->error = 0;
>  	preq->tstamp = jiffies;
> +	preq->preq_ub = get_beancounter(get_exec_ub());
>  
>  	init_completion(&qcomp);
>  	init_completion(&plo->relax_comp);
> @@ -3669,6 +3696,7 @@ static void ploop_merge_process(struct ploop_device * plo)
>  		preq->tstamp = jiffies;
>  		preq->iblock = 0;
>  		preq->prealloc_size = 0;
> +		preq->preq_ub = get_beancounter(get_exec_ub());
>  
>  		atomic_inc(&plo->maintenance_cnt);
>  
> @@ -4297,6 +4325,7 @@ static void ploop_relocate(struct ploop_device * plo, int grow_stage)
>  	preq->tstamp = jiffies;
>  	preq->iblock = (reloc_type == PLOOP_REQ_RELOC_A) ? 0 : plo->grow_start;
>  	preq->prealloc_size = 0;
> +	preq->preq_ub = get_beancounter(get_exec_ub());
>  
>  	atomic_inc(&plo->maintenance_cnt);
>  
> @@ -4622,6 +4651,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
>  		preq->tstamp = jiffies;
>  		preq->iblock = 0;
>  		preq->prealloc_size = 0;
> +		preq->preq_ub = get_beancounter(get_exec_ub());
>  
>  		atomic_inc(&plo->maintenance_cnt);
>  
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index fd683f79956d..fbab9fbeb9ef 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -618,6 +618,8 @@ struct ploop_request
>  	/* if the engine starts operation on particular io, let's finish
>  	 * the operation on the same io (see io.ops->post_submit) */
>  	struct ploop_io	       *eng_io;
> +
> +	struct user_beancounter *preq_ub;
>  };
>  
>  static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)
> 


More information about the Devel mailing list