[Devel] [PATCH RHEL7 COMMIT] ms/block: get rid of struct blk_issue_stat

Konstantin Khorenko khorenko at virtuozzo.com
Fri Oct 25 13:20:26 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-1062.1.2.vz7.114.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.1.2.vz7.114.9
------>
commit 5890478e4fe3c304bfa3ef40255cc2cb8fef08d4
Author: Omar Sandoval <osandov at fb.com>
Date:   Fri Oct 25 13:20:24 2019 +0300

    ms/block: get rid of struct blk_issue_stat
    
    struct blk_issue_stat squashes three things into one u64:
    
    - The time the driver started working on a request
    - The original size of the request (for the io.low controller)
    - Flags for writeback throttling
    
    It turns out that on x86_64, we have a 4 byte hole in struct request
    which we can fill with the non-timestamp fields from blk_issue_stat,
    simplifying things quite a bit.
    
    Signed-off-by: Omar Sandoval <osandov at fb.com>
    Signed-off-by: Jens Axboe <axboe at kernel.dk>
    
    https://jira.sw.ru/browse/PSBM-96243
    
    (cherry picked from commit 544ccc8dc904db55d4576c27a1eb66a888ffacea)
    
    Hunks for wbt go to ("blk-wbt: add general throttling mechanism").
    Hunks for throtl_size are skipped.
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    =====================
    Patchset description:
    
    block: backport writeback throttling
    
    We have a problem that if we run heavy write load on one cpu
    simultaneousely with short direct reads on other cpu, the latter will
    hang significantly. Writeback throttling looks like a sollution for
    these reads, as it will decrease the priority of long running writeback.
    
    Running simple dd experiment we see that reads latency decreased after
    wbt patches applied:
    https://docs.google.com/spreadsheets/d/1HLtepwFL_N5zm0JcTqMtJoYnf-b6Slwld8DDgL0gNDI
    
    We've ran vconsolidate on custom kernel with these patches, though it
    does not show any performance improvement (likely because this test does
    not produce high rate of writeback), it does not crash or fail the test.
    
    https://jira.sw.ru/browse/PSBM-96243
    
    Jens Axboe (6):
      block: add REQ_BACKGROUND
      writeback: add wbc_to_write_flags()
      writeback: mark background writeback as such
      writeback: track if we're sleeping on progress in
        balance_dirty_pages()
      blk-wbt: add general throttling mechanism
      block: hook up writeback throttling
    
    Omar Sandoval (1):
      block: get rid of struct blk_issue_stat
    
    Pavel Tikhomirov (2):
      x86/asm: remove the unused get_limit() method
      block: enable CONFIG_BLK_WBT*
      blk-wbt: increase maximum queue depth to increase performance of writes
---
 block/blk-core.c          |  2 +-
 block/blk-mq.c            |  5 +++--
 block/blk-stat.c          |  7 ++-----
 block/blk-stat.h          | 24 ------------------------
 block/kyber-iosched.c     |  6 +++---
 include/linux/blk-mq.h    |  1 -
 include/linux/blk_types.h |  4 ----
 include/linux/blkdev.h    | 19 +++++++++++++------
 8 files changed, 22 insertions(+), 46 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 13be6e4e0f53..264e476e4e0e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2771,7 +2771,7 @@ void blk_start_request(struct request *req)
 	/* blk-stat isn't used on non-mq now, so disable it until it is needed */
 #if 0
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
-		blk_stat_set_issue_time(&req->issue_stat);
+		req->io_start_time_ns = ktime_get_ns();
 		req->cmd_flags |= REQ_STATS;
 	}
 #endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fb66815acc5a..501ad1020669 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -288,10 +288,11 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->rq_disk = NULL;
 	rq->part = NULL;
 	rq->start_time = jiffies;
+	rq->io_start_time_ns = 0;
 #ifdef CONFIG_BLK_CGROUP
 	rq->rl = NULL;
 	set_start_time_ns(rq);
-	rq->io_start_time_ns = 0;
+	rq->cgroup_io_start_time_ns = 0;
 #endif
 	rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -635,7 +636,7 @@ void blk_mq_start_request(struct request *rq)
 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-		blk_stat_set_issue_time(&rq_aux(rq)->issue_stat);
+		rq->io_start_time_ns = ktime_get_ns();
 		rq->cmd_flags |= REQ_STATS;
 	}
 
diff --git a/block/blk-stat.c b/block/blk-stat.c
index d1bdfa182a21..9b1505efe287 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -90,11 +90,8 @@ void blk_stat_add(struct request *rq)
 	int bucket;
 	s64 now, value;
 
-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
-	if (now < blk_stat_time(&rq_aux(rq)->issue_stat))
-		return;
-
-	value = now - blk_stat_time(&rq_aux(rq)->issue_stat);
+	now = ktime_get_ns();
+	value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 7417805588c0..0331a8ca97db 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -7,14 +7,6 @@
 #include <linux/rcupdate.h>
 #include <linux/timer.h>
 
-/*
- * Upper 3 bits can be used elsewhere
- */
-#define BLK_STAT_RES_BITS	3
-#define BLK_STAT_SHIFT		(64 - BLK_STAT_RES_BITS)
-#define BLK_STAT_TIME_MASK	((1ULL << BLK_STAT_SHIFT) - 1)
-#define BLK_STAT_MASK		~BLK_STAT_TIME_MASK
-
 /**
  * struct blk_stat_callback - Block statistics callback.
  *
@@ -74,22 +66,6 @@ void blk_free_queue_stats(struct blk_queue_stats *);
 
 void blk_stat_add(struct request *);
 
-static inline void blk_stat_set_issue_time(struct blk_issue_stat *stat)
-{
-	stat->time = ((stat->time & BLK_STAT_MASK) |
-		      (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK));
-}
-
-static inline u64 __blk_stat_time(u64 time)
-{
-	return time & BLK_STAT_TIME_MASK;
-}
-
-static inline u64 blk_stat_time(struct blk_issue_stat *stat)
-{
-	return __blk_stat_time(stat->time);
-}
-
 /*
  * blk_stat_rq_ddir() - Bucket callback function for the request data direction.
  * @rq: Request.
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 9eae0540ea98..d4a6b82aeac4 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -504,11 +504,11 @@ static void kyber_completed_request(struct request *rq)
 	if (blk_stat_is_active(kqd->cb))
 		return;
 
-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
-	if (now < blk_stat_time(&rq_aux(rq)->issue_stat))
+	now = ktime_get_ns();
+	if (now < rq->io_start_time_ns)
 		return;
 
-	latency = now - blk_stat_time(&rq_aux(rq)->issue_stat);
+	latency = now - rq->io_start_time_ns;
 
 	if (latency > target)
 		blk_stat_activate_msecs(kqd->cb, 10);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ef5dbea983c7..433c5a76ce8b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -134,7 +134,6 @@ struct blk_mq_queue_data {
  */
 struct request_aux {
 	int internal_tag;
-	struct blk_issue_stat issue_stat;
 };
 
 /* None of these function pointers are covered by RHEL kABI */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index aecb3b0e21a6..240e33c474f4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -328,10 +328,6 @@ static inline bool blk_path_error(int error)
 	return true;
 }
 
-struct blk_issue_stat {
-	u64 time;
-};
-
 struct blk_rq_stat {
 	s64 mean;
 	u64 min;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 70b26c967a16..6ea3889d82e8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -186,10 +186,17 @@ struct request {
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
 	unsigned long start_time;
+	/* Time that I/O was submitted to the device. */
+	u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+	unsigned short wbt_flags;
+#endif
+
 #ifdef CONFIG_BLK_CGROUP
 	struct request_list *rl;		/* rl this rq is alloced from */
-	unsigned long long start_time_ns;
-	unsigned long long io_start_time_ns;    /* when passed to hardware */
+	unsigned long long cgroup_start_time_ns;
+	unsigned long long cgroup_io_start_time_ns;    /* when passed to hardware */
 #endif
 	/* Number of scatter-gather DMA addr+len pairs after
 	 * physical address coalescing is performed.
@@ -1670,25 +1677,25 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
 static inline void set_start_time_ns(struct request *req)
 {
 	preempt_disable();
-	req->start_time_ns = sched_clock();
+	req->cgroup_start_time_ns = sched_clock();
 	preempt_enable();
 }
 
 static inline void set_io_start_time_ns(struct request *req)
 {
 	preempt_disable();
-	req->io_start_time_ns = sched_clock();
+	req->cgroup_io_start_time_ns = sched_clock();
 	preempt_enable();
 }
 
 static inline uint64_t rq_start_time_ns(struct request *req)
 {
-        return req->start_time_ns;
+	return req->cgroup_start_time_ns;
 }
 
 static inline uint64_t rq_io_start_time_ns(struct request *req)
 {
-        return req->io_start_time_ns;
+	return req->cgroup_io_start_time_ns;
 }
 #else
 static inline void set_start_time_ns(struct request *req) {}



More information about the Devel mailing list