[Devel] [PATCH vz9] ploop: add interface to request write out BAT
Andrey Zhadchenko
andrey.zhadchenko at virtuozzo.com
Mon Jun 24 10:39:19 MSK 2024
On 6/21/24 17:36, Alexander Atanasov wrote:
> On 21.06.24 14:10, Andrey Zhadchenko wrote:
>>
>>
>> On 5/20/24 13:49, Alexander Atanasov wrote:
>>> It is possible that BAT is corrupted on disk but
>>> we have a good copy of it in RAM. Add dmsetup
>>> message to write it out to file write_bat_to_file.
>>> This is very basic interface and it must be used
>>> as a last resort.
> ^^^^^^^^^^^^^^^^^^^^
>
> There are even more posibilities this function to fail.
> But as a last resort means we have a major bug and try to do best we
> can. The user of this interface must ensure that device is atleast
> suspended and there are no operations running on the device.
>
Even if it is some last resort, kernel crashes should be avoided, so we
will not accidentally explode live node with few vms.
>
>
>>>
>>> https://virtuozzo.atlassian.net/browse/PSBM-156501
>>> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
>>> ---
>>> drivers/md/dm-ploop-cmd.c | 48 +++++++++++++++++++++++++++++++++++++++
>>> drivers/md/dm-ploop-map.c | 4 ++--
>>> drivers/md/dm-ploop.h | 3 +++
>>> 3 files changed, 53 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
>>> index 88519f590148b..71f854c509f82 100644
>>> --- a/drivers/md/dm-ploop-cmd.c
>>> +++ b/drivers/md/dm-ploop-cmd.c
>>> @@ -280,6 +280,45 @@ static void ploop_make_md_wb(struct ploop
>>> *ploop, struct md_page *md)
>>> write_unlock_irq(&ploop->bat_rwlock);
>>> }
>>> +/* write all md to disk */
>>> +static u32 ploop_rewrite_bat_from_memory(struct ploop *ploop)
>>> +{
>>> + struct rb_node *node;
>>> + struct md_page *md;
>>> + struct ploop_index_wb *piwb;
>>> + blk_status_t bi_status;
>>> + DECLARE_COMPLETION_ONSTACK(comp);
>>> + int err;
>>> +
>>> + ploop_suspend_submitting_pios(ploop);
>> 1. It may fail
>> 2. Deeper in ploop_inflight_bios_ref_switch() there is the following
>> comment:
>> * It is called from kwork only, so this can't be executed in parallel.
>> Could it affect us somehow?
>>
>>> + ploop_for_each_md_page(ploop, md, node) {
>>> + /* we have the page and want to write so prepare bat update */
>>> + err = ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC);
>> I see that suspend_submitting_pios() waits for all in-flight IO. But I
>> am not sure that writeback is included, could you clarify it?
>> ploop_prepare_bat_update() does no checks, so if there is existing
>> writeback we will probably explode
>>> + /* does wb_init ploop_index_wb_init */
>>> + if (err) {
>>> + /* most likely no memory, notify user he may retry */
>>> + printk(KERN_ERR "MD prepare WB failed: %d\n", err);
>>> + }
>>> + /* mark md for writeback */
>>> + ploop_make_md_wb(ploop, md);
>>> + /* piwb is prepared in md->piwb */
>>> + piwb = md->piwb;
>>> + init_completion(&comp);
>>> + piwb->comp = ∁
>>> + piwb->comp_bi_status = &bi_status;
>>> + /* Write index to disk */
>>> + ploop_index_wb_submit(ploop, piwb);
>>> + wait_for_completion(&comp);
>>> + /* piwb is freed on completion */
>>> + if (bi_status) {
>>> + printk(KERN_ERR "MD WB failed: %d\n",
>>> blk_status_to_errno(bi_status));
>> PL_ERR would probably be a bit better
>>> + }
>>> + }
>>> + ploop_resume_submitting_pios(ploop);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int ploop_grow_relocate_cluster(struct ploop *ploop,
>>> struct ploop_cmd *cmd)
>>> {
>>> @@ -1086,6 +1125,11 @@ static int
>>> ploop_check_delta_before_flip(struct ploop *ploop, struct file *file)
>>> return ret;
>>> }
>>> +static int ploop_write_bat_to_file(struct ploop *ploop)
>> Why not just call ploop_rewrite_bat_from_memory() instead? This
>> function seems to be excessive
>>> +{
>>> + return ploop_rewrite_bat_from_memory(ploop);
>>> +}
>>> +
>>> static int ploop_flip_upper_deltas(struct ploop *ploop)
>>> {
>>> struct file *file;
>>> @@ -1197,6 +1241,10 @@ int ploop_message(struct dm_target *ti,
>>> unsigned int argc, char **argv,
>>> if (argc != 1)
>>> goto unlock;
>>> ret = ploop_flip_upper_deltas(ploop);
>>> + } else if (!strcmp(argv[0], "write_bat_to_file")) {
>>> + if (argc != 1)
>>> + goto unlock;
>>> + ret = ploop_write_bat_to_file(ploop);
>>> } else {
>>> ret = -ENOTSUPP;
>>> }
>>> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
>>> index ea04b3e53567e..7e16005f1e5c9 100644
>>> --- a/drivers/md/dm-ploop-map.c
>>> +++ b/drivers/md/dm-ploop-map.c
>>> @@ -813,7 +813,7 @@ static void
>>> ploop_advance_local_after_bat_wb(struct ploop *ploop,
>>> ploop_dispatch_pios(ploop, NULL, &list);
>>> }
>>> -static void ploop_free_piwb(struct ploop_index_wb *piwb)
>>> +void ploop_free_piwb(struct ploop_index_wb *piwb)
>>> {
>>> ploop_free_pio(piwb->ploop, piwb->pio);
>>> put_page(piwb->bat_page);
>>> @@ -889,7 +889,7 @@ static void ploop_bat_write_complete(struct
>>> ploop_index_wb *piwb,
>>> ploop_put_piwb(piwb);
>>> }
>>> -static int ploop_prepare_bat_update(struct ploop *ploop, struct
>>> md_page *md,
>>> +int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
>>> enum piwb_type type)
>>> {
>>> u32 i, off, last, *bat_entries;
>>> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
>>> index db36687c81696..67dbf8abd4e41 100644
>>> --- a/drivers/md/dm-ploop.h
>>> +++ b/drivers/md/dm-ploop.h
>>> @@ -559,6 +559,8 @@ extern void ploop_free_md_page(struct md_page *md);
>>> extern void ploop_free_md_pages_tree(struct rb_root *root);
>>> extern bool ploop_try_update_bat_entry(struct ploop *ploop, u32 clu,
>>> u8 level, u32 dst_clu);
>>> +extern int ploop_prepare_bat_update(struct ploop *ploop, struct
>>> md_page *md,
>>> + enum piwb_type type);
>>> extern int ploop_add_delta(struct ploop *ploop, u32 level,
>>> struct file *file, bool is_raw);
>>> @@ -597,6 +599,7 @@ extern int ploop_read_delta_metadata(struct ploop
>>> *ploop, struct file *file,
>>> struct rb_root *md_root, u32 *delta_nr_be);
>>> extern void ploop_index_wb_init(struct ploop_index_wb *piwb,
>>> struct ploop *ploop);
>>> +extern void ploop_free_piwb(struct ploop_index_wb *piwb);
>>> extern void ploop_call_rw_iter(struct file *file, loff_t pos,
>>> unsigned rw,
>>> struct iov_iter *iter, struct pio *pio);
>>> extern void ploop_enospc_timer(struct timer_list *timer);
>
Best regards,
Andrey
More information about the Devel
mailing list