[Devel] [PATCH RHEL9 COMMIT] dm-qcow2: Close race between R1/R2 reading and relocate_refcount_table()

Konstantin Khorenko khorenko at virtuozzo.com
Thu Mar 24 19:28:48 MSK 2022


The commit is pushed to "branch-rh9-5.14.0-42.vz9.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-42.vz9.14.4
------>
commit 33d32a7f58e3e960c6375cb3d0ce9fa720d2ce41
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Wed Feb 23 00:57:32 2022 +0300

    dm-qcow2: Close race between R1/R2 reading and relocate_refcount_table()
    
    Currently this is only possible race between revert_clusters_alloc()
    and relocate_refcount_table() since the rest of R1/R1 reading
    in being made from main kwork as relocate_refcount_table() does.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    Feature: dm-qcow2: block device over QCOW2 files driver
---
 drivers/md/dm-qcow2-map.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c
index a48421c7c83b..422db36d8d72 100644
--- a/drivers/md/dm-qcow2-map.c
+++ b/drivers/md/dm-qcow2-map.c
@@ -1871,28 +1871,37 @@ static loff_t parse_l2(struct qcow2 *qcow2, struct qcow2_map *map,
 /*
  * This may be called with @qio == NULL, in case of we sure
  * that R1/R2 are already cached and up to date.
+ * Returned R1 is *unstable* if we are not in main kwork,
+ * since relocate_refcount_table() may move it right after
+ * md_pages_lock release.
  */
 static int __handle_r1r2_maps(struct qcow2 *qcow2, loff_t pos, struct qio **qio,
 			   struct qcow2_map_item *r1, struct qcow2_map_item *r2)
 {
-	int ret;
-
+	int ret = -EIO;
+	/*
+	 * We hold the lock while dereferencing both of R1 and R2
+	 * to close the race with relocate_refcount_table().
+	 */
+	spin_lock_irq(&qcow2->md_pages_lock);
 	if (calc_refcounters_map(qcow2, pos, r1, r2) < 0)
-		return -EIO;
+		goto unlock;
 
 	/* Check R1 table */
-	ret = handle_md_page(qcow2, r1->page_id, qio, &r1->md);
+	ret = __handle_md_page(qcow2, r1->page_id, qio, &r1->md);
 	if (ret <= 0)
-		return ret;
+		goto unlock;
 
 	ret = calc_r2_page_id(qcow2, r1, r2);
 	if (ret < 0)
-		return ret;
+		goto unlock;
 
-	ret = handle_md_page(qcow2, r2->page_id, qio, &r2->md);
+	/* Check R2 table */
+	ret = __handle_md_page(qcow2, r2->page_id, qio, &r2->md);
+unlock:
+	spin_unlock_irq(&qcow2->md_pages_lock);
 	if (ret <= 0)
 		return ret;
-
 	/*
 	 * XXX: we do not care about R1 or R2 may be under writeback,
 	 * since the most actual version of them is cached in memory.
@@ -2286,6 +2295,11 @@ static int relocate_refcount_table(struct qcow2 *qcow2, struct qio **qio)
 		goto err_free_r2_pages;
 	}
 
+	/*
+	 * __handle_r1r2_maps() want to get consistent values:
+	 * refcount_table_offset must match correct md pages.
+	 */
+	spin_lock_irq(&qcow2->md_pages_lock);
 	/* Update cached values */
 	qcow2->hdr.refcount_table_offset = pos;
 	qcow2->hdr.refcount_table_clusters = clus;
@@ -2295,14 +2309,15 @@ static int relocate_refcount_table(struct qcow2 *qcow2, struct qio **qio)
 	index = old_pos / PAGE_SIZE;
 	delta = (pos - old_pos) / PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++, index++) {
-		spin_lock_irq(&qcow2->md_pages_lock);
 		md = md_page_renumber(qcow2, index, index + delta);
 		if (!WARN_ON_ONCE(!md))
 			md_make_dirty(qcow2, md, true);
-		spin_unlock_irq(&qcow2->md_pages_lock);
 		if (!md)
-			goto err_free_r2_pages;
+			break; /* goto err_free_r2_pages */
 	}
+	spin_unlock_irq(&qcow2->md_pages_lock);
+	if (i != nr_pages)
+		goto err_free_r2_pages;
 
 	/* Connect new R2 to new R1 */
 	for (i = end; i < r2_end; i += clu_size) {


More information about the Devel mailing list