[Devel] [vzlin-dev] [PATCH vz7] md: propagate QUEUE_FLAG_SG_GAPS

Maxim Patlasov mpatlasov at virtuozzo.com
Fri Dec 9 13:11:28 PST 2016


It's fixed in RHEL7.3 since 3.10.0-514.el7. So all our newer kernels 
(3.10.0-514.vz7.27.*) are not affected.


On 12/08/2016 10:12 PM, Vasily Averin wrote:
> how about RHEL7.3 ?
> if they are affected too -- it makes sense to report them.
>
> On 12/09/2016 08:35 AM, Maxim Patlasov wrote:
>> Mainline is not affected. (they re-worked the whole engine completely so the problem vanished auto-magically)
>>
>> On 12/08/2016 09:21 PM, Vasily Averin wrote:
>>> Maxim,
>>> will you sent this to mainline too ?
>>>
>>> thank you,
>>>      Vasily Averin
>>>
>>> On 12/09/2016 08:17 AM, Maxim Patlasov wrote:
>>>> NVMe disk driver disables sg gaps by setting the flag in its queue.
>>>> md-raids must propagate the flag to its queue as well. Otherwise, an upper
>>>> user will get OK from bio_add_page() even if a gap exists, and that bio
>>>> with a gap will crash NVMe driver who doesn't expect gaps.
>>>> (see bvec_gap_to_prev for the definotion of gap)
>>>>
>>>> https://jira.sw.ru/browse/PSBM-56838
>>>>
>>>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>>>> ---
>>>>    drivers/md/linear.c    |    9 +++++++++
>>>>    drivers/md/raid0.c     |    8 ++++++++
>>>>    drivers/md/raid1.c     |   11 +++++++++++
>>>>    drivers/md/raid10.c    |   12 ++++++++++++
>>>>    drivers/md/raid5.c     |   10 ++++++++++
>>>>    include/linux/blkdev.h |    1 +
>>>>    6 files changed, 51 insertions(+)
>>>>
>>>> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
>>>> index 3310b59..e57b8ff 100644
>>>> --- a/drivers/md/linear.c
>>>> +++ b/drivers/md/linear.c
>>>> @@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>>>        struct md_rdev *rdev;
>>>>        int i, cnt;
>>>>        bool discard_supported = false;
>>>> +    bool sg_gaps_disabled = false;
>>>>          conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
>>>>                GFP_KERNEL);
>>>> @@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>>>              if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>                discard_supported = true;
>>>> +
>>>> +        if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +            sg_gaps_disabled = true;
>>>>        }
>>>>        if (cnt != raid_disks) {
>>>>            printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
>>>> @@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
>>>>        else
>>>>            queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>>>    +    if (!sg_gaps_disabled)
>>>> +        queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>> +    else
>>>> +        queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>> +
>>>>        /*
>>>>         * Here we calculate the device offsets.
>>>>         */
>>>> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
>>>> index 100ef23..2b77d0f 100644
>>>> --- a/drivers/md/raid0.c
>>>> +++ b/drivers/md/raid0.c
>>>> @@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev)
>>>>        if (mddev->queue) {
>>>>            struct md_rdev *rdev;
>>>>            bool discard_supported = false;
>>>> +        bool sg_gaps_disabled = false;
>>>>              blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
>>>>            blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
>>>> @@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev)
>>>>                          rdev->data_offset << 9);
>>>>                if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>                    discard_supported = true;
>>>> +            if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +                sg_gaps_disabled = true;
>>>>            }
>>>>              /* Unfortunately, some devices have awful discard performance,
>>>> @@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev)
>>>>                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>>>            else
>>>>                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>>> +
>>>> +        if (!sg_gaps_disabled)
>>>> +            queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>> +        else
>>>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>>        }
>>>>          /* calculate array device size */
>>>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>>>> index b45b64c..a1763b5 100644
>>>> --- a/drivers/md/raid1.c
>>>> +++ b/drivers/md/raid1.c
>>>> @@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
>>>>        md_integrity_add_rdev(rdev, mddev);
>>>>        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>            queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>>> +    if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +        queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>>        print_conf(conf);
>>>>        return err;
>>>>    }
>>>> @@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev)
>>>>        struct md_rdev *rdev;
>>>>        int ret;
>>>>        bool discard_supported = false;
>>>> +    bool sg_gaps_disabled = false;
>>>>          if (mddev->level != 1) {
>>>>            printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
>>>> @@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev)
>>>>                      rdev->data_offset << 9);
>>>>            if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>                discard_supported = true;
>>>> +        if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +            sg_gaps_disabled = true;
>>>>        }
>>>>          mddev->degraded = 0;
>>>> @@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev)
>>>>            else
>>>>                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>>>                              mddev->queue);
>>>> +        if (sg_gaps_disabled)
>>>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                        mddev->queue);
>>>> +        else
>>>> +            queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                          mddev->queue);
>>>>        }
>>>>          ret =  md_integrity_register(mddev);
>>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>>> index f0c7f3b..52f8d73 100644
>>>> --- a/drivers/md/raid10.c
>>>> +++ b/drivers/md/raid10.c
>>>> @@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
>>>>        md_integrity_add_rdev(rdev, mddev);
>>>>        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>            queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>>> +    if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +        queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>>>          print_conf(conf);
>>>>        return err;
>>>> @@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev)
>>>>        sector_t min_offset_diff = 0;
>>>>        int first = 1;
>>>>        bool discard_supported = false;
>>>> +    bool sg_gaps_disabled = false;
>>>>          if (mddev->private == NULL) {
>>>>            conf = setup_conf(mddev);
>>>> @@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev)
>>>>              if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>>>                discard_supported = true;
>>>> +
>>>> +        if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +            sg_gaps_disabled = true;
>>>>        }
>>>>          if (mddev->queue) {
>>>> @@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev)
>>>>            else
>>>>                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>>>                              mddev->queue);
>>>> +        if (sg_gaps_disabled)
>>>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                        mddev->queue);
>>>> +        else
>>>> +            queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                          mddev->queue);
>>>>        }
>>>>        /* need to check that every block has at least one working mirror */
>>>>        if (!enough(conf, -1)) {
>>>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>>>> index 50902ad1..862b86f 100644
>>>> --- a/drivers/md/raid5.c
>>>> +++ b/drivers/md/raid5.c
>>>> @@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev)
>>>>        if (mddev->queue) {
>>>>            int chunk_size;
>>>>            bool discard_supported = true;
>>>> +        bool sg_gaps_disabled = false;
>>>>            /* read-ahead size must cover two whole stripes, which
>>>>             * is 2 * (datadisks) * chunksize where 'n' is the
>>>>             * number of raid devices
>>>> @@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev)
>>>>                    }
>>>>                    discard_supported = false;
>>>>                }
>>>> +            if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>>>> +                sg_gaps_disabled = true;
>>>>            }
>>>>              if (discard_supported &&
>>>> @@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev)
>>>>            else
>>>>                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>>>                            mddev->queue);
>>>> +
>>>> +        if (sg_gaps_disabled)
>>>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                        mddev->queue);
>>>> +        else
>>>> +            queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>>>> +                        mddev->queue);
>>>>        }
>>>>          return 0;
>>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>>> index 8b400c6..e3752b1 100644
>>>> --- a/include/linux/blkdev.h
>>>> +++ b/include/linux/blkdev.h
>>>> @@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
>>>>    #define blk_queue_discard(q)    test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
>>>>    #define blk_queue_secdiscard(q)    (blk_queue_discard(q) && \
>>>>        test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
>>>> +#define blk_queue_sg_gaps(q)    test_bit(QUEUE_FLAG_SG_GAPS, &(q)->queue_flags)
>>>>      #define blk_noretry_request(rq) \
>>>>        ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
>>>>
>>>>
>>



More information about the Devel mailing list