[Devel] [RFC PATCH vz9 v6 37/62] dm-ploop: convert md page rw lock to spin lock

Alexander Atanasov alexander.atanasov at virtuozzo.com
Mon Dec 16 10:02:06 MSK 2024


On 13.12.24 15:09, Andrey Zhadchenko wrote:
> 
> 
> On 12/5/24 22:56, Alexander Atanasov wrote:
>> Prepare locking for threads. Some operations require to lock bat data
>> (see next patch) and in some cases we need to lock  md page while hodling
>> bat spin lock but we can not take sleeping lock while holding a spin 
>> lock.
>> To address this use a spin lock for md pages instead of rwlock.
> 
> As far as I know, rwlock is a spinlock subtype. So this should not be 
> the problem.
> If you insist on spinlocking, please do merge this patch to the one that 
> implements md_lock


Checking Documentation/locking/locktypes.rst again, you are right -
both spinlock and rwlock are converted to sleeping locks, i missed
that spinlock was converted too.

I observed that writers failed to take the lock, starved in favour of 
readers, despit queued lock and so on. When holding the spin lock,
and trying to take a write lock it caused a lockup. That is the main 
reason of the change. It might be worth to retest this thou.

>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-91821
>> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
>> ---
>>   drivers/md/dm-ploop-bat.c |  6 +++---
>>   drivers/md/dm-ploop-cmd.c | 24 ++++++++++++------------
>>   drivers/md/dm-ploop-map.c | 23 +++++++++++------------
>>   drivers/md/dm-ploop.h     |  6 +++---
>>   4 files changed, 29 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/md/dm-ploop-bat.c b/drivers/md/dm-ploop-bat.c
>> index a6202720927f..efdc82a59abf 100644
>> --- a/drivers/md/dm-ploop-bat.c
>> +++ b/drivers/md/dm-ploop-bat.c
>> @@ -88,7 +88,7 @@ static struct md_page *ploop_alloc_md_page(u32 id)
>>       md->page = page;
>>       md->kmpage = kmap(page);
>>       md->id = id;
>> -    rwlock_init(&md->lock);
>> +    spin_lock_init(&md->md_lock);
>>       return md;
>>   err_page:
>>       kfree(levels);
>> @@ -143,13 +143,13 @@ bool ploop_try_update_bat_entry(struct ploop 
>> *ploop, u32 clu, u8 level, u32 dst_
>>       clu = ploop_bat_clu_idx_in_page(clu); /* relative offset */
>> -    write_lock_irqsave(&md->lock, flags);
>> +    spin_lock_irqsave(&md->md_lock, flags);
>>       if (READ_ONCE(md->bat_levels[clu]) == level) {
>>           bat_entries = md->kmpage;
>>           WRITE_ONCE(bat_entries[clu], dst_clu);
>>           ret = true;
>>       }
>> -    write_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock_irqrestore(&md->md_lock, flags);
>>       return ret;
>>   }
>> diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
>> index d4408cba5f0d..017250328ea3 100644
>> --- a/drivers/md/dm-ploop-cmd.c
>> +++ b/drivers/md/dm-ploop-cmd.c
>> @@ -46,7 +46,7 @@ static void ploop_advance_holes_bitmap(struct ploop 
>> *ploop,
>>           ploop_init_be_iter(ploop, md->id, &i, &end);
>>           bat_entries = md->kmpage;
>> -        read_lock_irqsave(&md->lock, flags);
>> +        spin_lock(&md->md_lock);
>>           for (; i <= end; i++) {
>>               if (!ploop_md_page_cluster_is_in_top_delta(ploop, md, i))
>>                   continue;
>> @@ -57,7 +57,7 @@ static void ploop_advance_holes_bitmap(struct ploop 
>> *ploop,
>>                   ploop_hole_clear_bit(dst_clu, ploop);
>>               }
>>           }
>> -        read_unlock_irqrestore(&md->lock, flags);
>> +        spin_unlock(&md->md_lock);
>>       }
>>       write_unlock_irq(&ploop->bat_rwlock);
>>   }
>> @@ -169,7 +169,7 @@ static u32 ploop_find_bat_entry(struct ploop 
>> *ploop, u32 dst_clu, bool *is_locke
>>           ploop_init_be_iter(ploop, md->id, &i, &end);
>>           bat_entries = md->kmpage;
>> -        read_lock_irqsave(&md->lock, flags);
>> +        spin_lock_irqsave(&md->md_lock, flags);
>>           for (; i <= end; i++) {
>>               if (READ_ONCE(bat_entries[i]) != dst_clu)
>>                   continue;
>> @@ -178,7 +178,7 @@ static u32 ploop_find_bat_entry(struct ploop 
>> *ploop, u32 dst_clu, bool *is_locke
>>                   break;
>>               }
>>           }
>> -        read_unlock_irqrestore(&md->lock, flags);
>> +        spin_unlock_irqrestore(&md->md_lock, flags);
>>           if (clu != UINT_MAX)
>>               break;
>>       }
>> @@ -727,8 +727,8 @@ static void notify_delta_merged(struct ploop 
>> *ploop, u8 level,
>>           bat_entries = md->kmpage;
>>           d_bat_entries = d_md->kmpage;
>> -        write_lock_irqsave(&md->lock, flags);
>> -        write_lock(&d_md->lock);
>> +        spin_lock_irqsave(&md->md_lock, flags);
>> +        spin_lock(&d_md->md_lock);
>>           for (; i <= end; i++) {
>>               clu = ploop_page_clu_idx_to_bat_clu(md->id, i);
>> @@ -763,8 +763,8 @@ static void notify_delta_merged(struct ploop 
>> *ploop, u8 level,
>>                   WRITE_ONCE(md->bat_levels[i], level);
>>           }
>> -        write_unlock(&d_md->lock);
>> -        write_unlock_irqrestore(&md->lock, flags);
>> +        spin_unlock(&d_md->md_lock);
>> +        spin_unlock_irqrestore(&md->md_lock, flags);
>>           if (stop)
>>               break;
>> @@ -1055,8 +1055,8 @@ static int ploop_check_delta_before_flip(struct 
>> ploop *ploop, struct file *file)
>>           init_be_iter(nr_be, md->id, &i, &end);
>>           d_bat_entries = d_md->kmpage;
>> -        read_lock_irqsave(&md->lock, flags);
>> -        read_lock(&d_md->lock);
>> +        spin_lock_irqsave(&md->md_lock, flags);
>> +        spin_lock(&d_md->md_lock);
>>           for (; i <= end; i++) {
>>               if (ploop_md_page_cluster_is_in_top_delta(ploop, md, i) &&
>>                   d_bat_entries[i] != BAT_ENTRY_NONE) {
>> @@ -1065,8 +1065,8 @@ static int ploop_check_delta_before_flip(struct 
>> ploop *ploop, struct file *file)
>>                   goto unmap;
>>               }
>>           }
>> -        read_unlock(&d_md->lock);
>> -        read_unlock_irqrestore(&md->lock, flags);
>> +        spin_unlock(&d_md->md_lock);
>> +        spin_unlock_irqrestore(&md->md_lock, flags);
>>           clu = ploop_page_clu_idx_to_bat_clu(md->id, i);
>>           if (clu == nr_be - 1) {
>> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
>> index 3a741bb9d2db..2a6ff9e2996b 100644
>> --- a/drivers/md/dm-ploop-map.c
>> +++ b/drivers/md/dm-ploop-map.c
>> @@ -387,13 +387,13 @@ static bool ploop_delay_if_md_busy(struct ploop 
>> *ploop, struct md_page *md,
>>       WARN_ON_ONCE(!list_empty(&pio->list));
>>       /* lock protects piwb */
>> -    read_lock_irqsave(&md->lock, flags);
>> +    spin_lock_irqsave(&md->md_lock, flags);
>>       piwb = md->piwb;
>>       if (piwb && (piwb->type != type || test_bit(MD_WRITEBACK, 
>> &md->status))) {
>>           llist_add((struct llist_node *)(&pio->list), &md->wait_llist);
>>           busy = true;
>>       }
>> -    read_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock_irqrestore(&md->md_lock, flags);
>>       return busy;
>>   }
>> @@ -807,7 +807,7 @@ static void 
>> ploop_advance_local_after_bat_wb(struct ploop *ploop,
>>       dst_clu = piwb->kmpage;
>> -    write_lock_irqsave(&md->lock, flags);
>> +    spin_lock(&md->md_lock);
>>       if (piwb->type == PIWB_TYPE_ALLOC)
>>           goto skip_apply;
>> @@ -832,7 +832,7 @@ static void 
>> ploop_advance_local_after_bat_wb(struct ploop *ploop,
>>       WARN_ON_ONCE(!test_bit(MD_WRITEBACK, &md->status));
>>       clear_bit(MD_WRITEBACK, &md->status);
>>       md->piwb = NULL;
>> -    write_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock(&md->md_lock);
>>       wait_llist_pending = llist_del_all(&md->wait_llist);
>>       if (wait_llist_pending) {
>> @@ -957,15 +957,14 @@ static int ploop_prepare_bat_update(struct ploop 
>> *ploop, struct md_page *md,
>>       bat_entries = md->kmpage;
>> -    write_lock_irq(&md->lock);
>> +    spin_lock_irq(&md->md_lock);
>> +    WARN_ON_ONCE(md->piwb);
>>       md->piwb = piwb;
>>       piwb->md = md;
>> -    write_unlock_irq(&md->lock);
>>       piwb->page_id = page_id;
>>       to = piwb->kmpage;
>> -    read_lock_irq(&md->lock);
>>       memcpy((void *)to, bat_entries, PAGE_SIZE);
>>       /* Absolute number of first index in page (negative for page#0) */
>> @@ -990,7 +989,7 @@ static int ploop_prepare_bat_update(struct ploop 
>> *ploop, struct md_page *md,
>>           if (clu == BAT_ENTRY_NONE || level < ploop_top_level(ploop))
>>               to[i] = 0;
>>       }
>> -    read_unlock_irq(&md->lock);
>> +    spin_unlock_irq(&md->md_lock);
>>       if (is_last_page) {
>>       /* Fill tail of page with 0 */
>> @@ -1010,10 +1009,10 @@ void ploop_break_bat_update(struct ploop 
>> *ploop, struct md_page *md)
>>       struct ploop_index_wb *piwb;
>>       unsigned long flags;
>> -    write_lock_irqsave(&md->lock, flags);
>> +    spin_lock_irqsave(&md->md_lock, flags);
>>       piwb = md->piwb;
>>       md->piwb = NULL;
>> -    write_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock_irqrestore(&md->md_lock, flags);
>>       ploop_free_piwb(piwb);
>>   }
>> @@ -1440,11 +1439,11 @@ static void ploop_submit_cow_index_wb(struct 
>> ploop_cow *cow)
>>       WARN_ON(to[clu]);
>>       WRITE_ONCE(to[clu], cow->dst_clu);
>> -    write_lock_irqsave(&md->lock, flags);
>> +    spin_lock_irqsave(&md->md_lock, flags);
>>       to = md->kmpage;
>>       WRITE_ONCE(to[clu], cow->dst_clu);
>>       WRITE_ONCE(md->bat_levels[clu], ploop_top_level(ploop));
>> -    write_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock_irqrestore(&md->md_lock, flags);
>>       ploop_md_up_prio(ploop, md);
>> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
>> index 3c632eab37d7..42a1b4759d75 100644
>> --- a/drivers/md/dm-ploop.h
>> +++ b/drivers/md/dm-ploop.h
>> @@ -128,7 +128,7 @@ struct md_page {
>>       struct list_head wb_link;
>>       struct ploop_index_wb *piwb;
>> -    rwlock_t lock;
>> +    spinlock_t md_lock;
>>       unsigned long dirty_timeout;
>>   };
>> @@ -447,7 +447,7 @@ static inline u32 ploop_bat_entries(struct ploop 
>> *ploop, u32 clu,
>>       /* Cluster index related to the page[page_id] start */
>>       clu = ploop_bat_clu_idx_in_page(clu);
>> -    read_lock_irqsave(&md->lock, flags);
>> +    spin_lock_irqsave(&md->md_lock, flags);
>>       if (bat_level)
>>           *bat_level = READ_ONCE(md->bat_levels[clu]);
>>       if (md_ret)
>> @@ -455,7 +455,7 @@ static inline u32 ploop_bat_entries(struct ploop 
>> *ploop, u32 clu,
>>       bat_entries = md->kmpage;
>>       dst_clu = READ_ONCE(bat_entries[clu]);
>> -    read_unlock_irqrestore(&md->lock, flags);
>> +    spin_unlock_irqrestore(&md->md_lock, flags);
>>       return dst_clu;
>>   }

-- 
Regards,
Alexander Atanasov



More information about the Devel mailing list