[Devel] [vzlin-dev] [PATCH vz7] md: propagate QUEUE_FLAG_SG_GAPS
Vasily Averin
vvs at virtuozzo.com
Thu Dec 8 22:12:42 PST 2016
how about RHEL7.3 ?
if they are affected too -- it makes sense to report them.
On 12/09/2016 08:35 AM, Maxim Patlasov wrote:
> Mainline is not affected. (they re-worked the whole engine completely so the problem vanished auto-magically)
>
> On 12/08/2016 09:21 PM, Vasily Averin wrote:
>> Maxim,
>> will you sent this to mainline too ?
>>
>> thank you,
>> Vasily Averin
>>
>> On 12/09/2016 08:17 AM, Maxim Patlasov wrote:
>>> NVMe disk driver disables sg gaps by setting the flag in its queue.
>>> md-raids must propagate the flag to its queue as well. Otherwise, an upper
>>> user will get OK from bio_add_page() even if a gap exists, and that bio
>>> with a gap will crash NVMe driver who doesn't expect gaps.
>>> (see bvec_gap_to_prev for the definotion of gap)
>>>
>>> https://jira.sw.ru/browse/PSBM-56838
>>>
>>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>>> ---
>>> drivers/md/linear.c | 9 +++++++++
>>> drivers/md/raid0.c | 8 ++++++++
>>> drivers/md/raid1.c | 11 +++++++++++
>>> drivers/md/raid10.c | 12 ++++++++++++
>>> drivers/md/raid5.c | 10 ++++++++++
>>> include/linux/blkdev.h | 1 +
>>> 6 files changed, 51 insertions(+)
>>>
>>> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
>>> index 3310b59..e57b8ff 100644
>>> --- a/drivers/md/linear.c
>>> +++ b/drivers/md/linear.c
>>> @@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>> struct md_rdev *rdev;
>>> int i, cnt;
>>> bool discard_supported = false;
>>> + bool sg_gaps_disabled = false;
>>> conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
>>> GFP_KERNEL);
>>> @@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>> if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> discard_supported = true;
>>> +
>>> + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + sg_gaps_disabled = true;
>>> }
>>> if (cnt != raid_disks) {
>>> printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
>>> @@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>> else
>>> queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>> + if (!sg_gaps_disabled)
>>> + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> + else
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> +
>>> /*
>>> * Here we calculate the device offsets.
>>> */
>>> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
>>> index 100ef23..2b77d0f 100644
>>> --- a/drivers/md/raid0.c
>>> +++ b/drivers/md/raid0.c
>>> @@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev)
>>> if (mddev->queue) {
>>> struct md_rdev *rdev;
>>> bool discard_supported = false;
>>> + bool sg_gaps_disabled = false;
>>> blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
>>> blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
>>> @@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev)
>>> rdev->data_offset << 9);
>>> if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> discard_supported = true;
>>> + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + sg_gaps_disabled = true;
>>> }
>>> /* Unfortunately, some devices have awful discard performance,
>>> @@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev)
>>> queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>> else
>>> queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>> +
>>> + if (!sg_gaps_disabled)
>>> + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> + else
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> }
>>> /* calculate array device size */
>>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>>> index b45b64c..a1763b5 100644
>>> --- a/drivers/md/raid1.c
>>> +++ b/drivers/md/raid1.c
>>> @@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
>>> md_integrity_add_rdev(rdev, mddev);
>>> if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>> + if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> print_conf(conf);
>>> return err;
>>> }
>>> @@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev)
>>> struct md_rdev *rdev;
>>> int ret;
>>> bool discard_supported = false;
>>> + bool sg_gaps_disabled = false;
>>> if (mddev->level != 1) {
>>> printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
>>> @@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev)
>>> rdev->data_offset << 9);
>>> if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> discard_supported = true;
>>> + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + sg_gaps_disabled = true;
>>> }
>>> mddev->degraded = 0;
>>> @@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev)
>>> else
>>> queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>> mddev->queue);
>>> + if (sg_gaps_disabled)
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> + else
>>> + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> }
>>> ret = md_integrity_register(mddev);
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index f0c7f3b..52f8d73 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
>>> md_integrity_add_rdev(rdev, mddev);
>>> if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>> + if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>> print_conf(conf);
>>> return err;
>>> @@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev)
>>> sector_t min_offset_diff = 0;
>>> int first = 1;
>>> bool discard_supported = false;
>>> + bool sg_gaps_disabled = false;
>>> if (mddev->private == NULL) {
>>> conf = setup_conf(mddev);
>>> @@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev)
>>> if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>> discard_supported = true;
>>> +
>>> + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + sg_gaps_disabled = true;
>>> }
>>> if (mddev->queue) {
>>> @@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev)
>>> else
>>> queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>> mddev->queue);
>>> + if (sg_gaps_disabled)
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> + else
>>> + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> }
>>> /* need to check that every block has at least one working mirror */
>>> if (!enough(conf, -1)) {
>>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>>> index 50902ad1..862b86f 100644
>>> --- a/drivers/md/raid5.c
>>> +++ b/drivers/md/raid5.c
>>> @@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev)
>>> if (mddev->queue) {
>>> int chunk_size;
>>> bool discard_supported = true;
>>> + bool sg_gaps_disabled = false;
>>> /* read-ahead size must cover two whole stripes, which
>>> * is 2 * (datadisks) * chunksize where 'n' is the
>>> * number of raid devices
>>> @@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev)
>>> }
>>> discard_supported = false;
>>> }
>>> + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>> + sg_gaps_disabled = true;
>>> }
>>> if (discard_supported &&
>>> @@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev)
>>> else
>>> queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>> mddev->queue);
>>> +
>>> + if (sg_gaps_disabled)
>>> + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> + else
>>> + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>> + mddev->queue);
>>> }
>>> return 0;
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8b400c6..e3752b1 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
>>> #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
>>> #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \
>>> test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
>>> +#define blk_queue_sg_gaps(q) test_bit(QUEUE_FLAG_SG_GAPS, &(q)->queue_flags)
>>> #define blk_noretry_request(rq) \
>>> ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
>>>
>>>
>
>
More information about the Devel
mailing list