[Devel] [PATCH vz9] ploop: add interface to request write out BAT

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Mon Jun 24 10:39:19 MSK 2024



On 6/21/24 17:36, Alexander Atanasov wrote:
> On 21.06.24 14:10, Andrey Zhadchenko wrote:
>>
>>
>> On 5/20/24 13:49, Alexander Atanasov wrote:
>>> It is possible that BAT is corrupted on disk but
>>> we have a good copy of it in RAM. Add dmsetup
>>> message to write it out to file write_bat_to_file.
>>> This is very basic interface and it must be used
>>> as a last resort.
> ^^^^^^^^^^^^^^^^^^^^
> 
> There are even more posibilities this function to fail.
> But as a last resort means we have a major bug and try to do best we 
> can. The user of this interface must ensure that device is atleast
> suspended and there are no operations running on the device.
> 

Even if it is some last resort, kernel crashes should be avoided, so we 
will not accidentally explode live node with few vms.

> 
> 
>>>
>>> https://virtuozzo.atlassian.net/browse/PSBM-156501
>>> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
>>> ---
>>>   drivers/md/dm-ploop-cmd.c | 48 +++++++++++++++++++++++++++++++++++++++
>>>   drivers/md/dm-ploop-map.c |  4 ++--
>>>   drivers/md/dm-ploop.h     |  3 +++
>>>   3 files changed, 53 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
>>> index 88519f590148b..71f854c509f82 100644
>>> --- a/drivers/md/dm-ploop-cmd.c
>>> +++ b/drivers/md/dm-ploop-cmd.c
>>> @@ -280,6 +280,45 @@ static void ploop_make_md_wb(struct ploop 
>>> *ploop, struct md_page *md)
>>>       write_unlock_irq(&ploop->bat_rwlock);
>>>   }
>>> +/* write all md to disk */
>>> +static u32 ploop_rewrite_bat_from_memory(struct ploop *ploop)
>>> +{
>>> +    struct rb_node *node;
>>> +    struct md_page *md;
>>> +    struct ploop_index_wb *piwb;
>>> +    blk_status_t bi_status;
>>> +    DECLARE_COMPLETION_ONSTACK(comp);
>>> +    int err;
>>> +
>>> +    ploop_suspend_submitting_pios(ploop);
>> 1. It may fail
>> 2. Deeper in ploop_inflight_bios_ref_switch() there is the following 
>> comment:
>>   * It is called from kwork only, so this can't be executed in parallel.
>> Could it affect us somehow?
>>
>>> +    ploop_for_each_md_page(ploop, md, node) {
>>> +        /* we have the page and want to write so prepare bat update */
>>> +        err = ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC);
>> I see that suspend_submitting_pios() waits for all in-flight IO. But I 
>> am not sure that writeback is included, could you clarify it?
>> ploop_prepare_bat_update() does no checks, so if there is existing 
>> writeback we will probably explode
>>> +        /* does wb_init ploop_index_wb_init */
>>> +        if (err) {
>>> +            /* most likely no memory, notify user he may retry */
>>> +            printk(KERN_ERR "MD prepare WB failed: %d\n", err);
>>> +        }
>>> +        /* mark md for writeback */
>>> +        ploop_make_md_wb(ploop, md);
>>> +        /* piwb is prepared in md->piwb */
>>> +        piwb = md->piwb;
>>> +        init_completion(&comp);
>>> +        piwb->comp = &comp;
>>> +        piwb->comp_bi_status = &bi_status;
>>> +        /* Write index to disk */
>>> +        ploop_index_wb_submit(ploop, piwb);
>>> +        wait_for_completion(&comp);
>>> +        /* piwb is freed on completion */
>>> +        if (bi_status) {
>>> +            printk(KERN_ERR "MD WB failed: %d\n", 
>>> blk_status_to_errno(bi_status));
>> PL_ERR would probably be a bit better
>>> +        }
>>> +    }
>>> +    ploop_resume_submitting_pios(ploop);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static int ploop_grow_relocate_cluster(struct ploop *ploop,
>>>                          struct ploop_cmd *cmd)
>>>   {
>>> @@ -1086,6 +1125,11 @@ static int 
>>> ploop_check_delta_before_flip(struct ploop *ploop, struct file *file)
>>>       return ret;
>>>   }
>>> +static int ploop_write_bat_to_file(struct ploop *ploop)
>> Why not just call ploop_rewrite_bat_from_memory() instead? This 
>> function seems to be excessive
>>> +{
>>> +    return ploop_rewrite_bat_from_memory(ploop);
>>> +}
>>> +
>>>   static int ploop_flip_upper_deltas(struct ploop *ploop)
>>>   {
>>>       struct file *file;
>>> @@ -1197,6 +1241,10 @@ int ploop_message(struct dm_target *ti, 
>>> unsigned int argc, char **argv,
>>>           if (argc != 1)
>>>               goto unlock;
>>>           ret = ploop_flip_upper_deltas(ploop);
>>> +    } else if (!strcmp(argv[0], "write_bat_to_file")) {
>>> +        if (argc != 1)
>>> +            goto unlock;
>>> +        ret = ploop_write_bat_to_file(ploop);
>>>       } else {
>>>           ret = -ENOTSUPP;
>>>       }
>>> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
>>> index ea04b3e53567e..7e16005f1e5c9 100644
>>> --- a/drivers/md/dm-ploop-map.c
>>> +++ b/drivers/md/dm-ploop-map.c
>>> @@ -813,7 +813,7 @@ static void 
>>> ploop_advance_local_after_bat_wb(struct ploop *ploop,
>>>           ploop_dispatch_pios(ploop, NULL, &list);
>>>   }
>>> -static void ploop_free_piwb(struct ploop_index_wb *piwb)
>>> +void ploop_free_piwb(struct ploop_index_wb *piwb)
>>>   {
>>>       ploop_free_pio(piwb->ploop, piwb->pio);
>>>       put_page(piwb->bat_page);
>>> @@ -889,7 +889,7 @@ static void ploop_bat_write_complete(struct 
>>> ploop_index_wb *piwb,
>>>       ploop_put_piwb(piwb);
>>>   }
>>> -static int ploop_prepare_bat_update(struct ploop *ploop, struct 
>>> md_page *md,
>>> +int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
>>>                       enum piwb_type type)
>>>   {
>>>       u32 i, off, last, *bat_entries;
>>> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
>>> index db36687c81696..67dbf8abd4e41 100644
>>> --- a/drivers/md/dm-ploop.h
>>> +++ b/drivers/md/dm-ploop.h
>>> @@ -559,6 +559,8 @@ extern void ploop_free_md_page(struct md_page *md);
>>>   extern void ploop_free_md_pages_tree(struct rb_root *root);
>>>   extern bool ploop_try_update_bat_entry(struct ploop *ploop, u32 clu,
>>>                          u8 level, u32 dst_clu);
>>> +extern int ploop_prepare_bat_update(struct ploop *ploop, struct 
>>> md_page *md,
>>> +                    enum piwb_type type);
>>>   extern int ploop_add_delta(struct ploop *ploop, u32 level,
>>>                  struct file *file, bool is_raw);
>>> @@ -597,6 +599,7 @@ extern int ploop_read_delta_metadata(struct ploop 
>>> *ploop, struct file *file,
>>>                        struct rb_root *md_root, u32 *delta_nr_be);
>>>   extern void ploop_index_wb_init(struct ploop_index_wb *piwb,
>>>                   struct ploop *ploop);
>>> +extern void ploop_free_piwb(struct ploop_index_wb *piwb);
>>>   extern void ploop_call_rw_iter(struct file *file, loff_t pos, 
>>> unsigned rw,
>>>                      struct iov_iter *iter, struct pio *pio);
>>>   extern void ploop_enospc_timer(struct timer_list *timer);
> 
Best regards,
Andrey


More information about the Devel mailing list