[Devel] [PATCH RH7] ploop: Provide interface to pass freezing fs during snapshot

Kirill Tkhai ktkhai at virtuozzo.com
Tue Sep 15 12:40:26 MSK 2020


Userspace passes PLOOP_FLAG_FS_SYNC flag, when it wants
to sync filesystem on snapshot. We need that because
of ext4 may start transaction in one snapshot, and then
the transaction metadata will actually be written in another
snapshot.

So, later, if we jump to old snapshot (snapshot-switch),
ext4 mount code sees that transaction is in progress,
and it tries to replay journal (it tries to write metadata
to appropriate places like it sees it has to be according
to journal).

This is a reason of several problems such as impossibility
to jump to old snapshot in RO. But not only that.

To workaround that, we have PLOOP_FLAG_FS_SYNC, but it does
not work with any configurations of device-mapper stack.
Device mapper is supported by autodetect
(see ploop_set_dm_crypt_bdev()) This autodetect supports
only one configuration:

ploop->dm_crypt->fs,

while now we have another:

ploop->dm_crypt->linear->fs.

Instead of introducing new crutches in the autodetect,
we better introduce a generic interface, which allows
userspace to pass fd of a mountpoint we want to sync
via the same ioctl. So, we won't need more crutches
in autodetect, if configuration will change in the future.
Userspace will just pass appropriate fd, while kernel
remains the same.

We reuse second chunk of struct ploop_ctl for fd of
mountpoint we want to sync/freeze. In case we want to freeze
several mount points in the future, we may reuse more
chunks for that.

(
	We could introduce another interface, say,

	struct ploop_snapshot
	{
		struct ploop_ctl ctl;
		struct ploop_ctl_chunk chunk;
		int mountpoints_fds[0]
	}

	but it requires a new flag in struct ploop_ctl::pctl_flags,
	and it looks worse in general.
)

Userspace will able to detect, whether kernel supports
passing mountpoint fd by a test request with ctl.pctl_chunks == 0.
Old kernel returns -EINVAL, new kernel -ENOTSUPP.

https://jira.sw.ru/browse/PSBM-105630

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/block/ploop/dev.c |   64 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 113394d..24e69e0 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -3643,14 +3643,46 @@ void ploop_relax(struct ploop_device * plo)
 }
 EXPORT_SYMBOL(ploop_relax);
 
+static int get_bdev_from_fd(int fd, struct block_device **bdev)
+{
+	struct file *file = fget(fd);
+	struct inode *inode;
+	int ret = -ENODEV;
+
+	if (!file)
+		return -ENODEV;
+
+	inode = file_inode(file);
+	if (!inode)
+		goto fput;
+
+	*bdev = inode->i_sb->s_bdev;
+	if (*bdev) {
+		bdgrab(*bdev);
+		ret = 0;
+	}
+fput:
+	fput(file);
+	return ret;
+}
+
 /* search disk for first partition bdev with mounted fs and freeze it */
 static struct super_block *find_and_freeze_bdev(struct ploop_device *plo,
-						struct block_device ** bdev_pp)
+						int sync_fd,
+						struct block_device **bdev_pp)
 {
 	struct super_block  * sb   = NULL;
 	struct block_device * bdev = NULL;
 	struct gendisk *disk = plo->disk;
-	int i;
+	int i, ret;
+
+	if (sync_fd != -1) {
+		ret = get_bdev_from_fd(sync_fd, &bdev);
+		if (ret)
+			return ERR_PTR(ret);
+		sb = freeze_bdev(bdev);
+		goto out;
+	}
 
 	bdev = ploop_get_dm_crypt_bdev(plo);
 	if (bdev) {
@@ -3683,12 +3715,12 @@ static struct super_block *find_and_freeze_bdev(struct ploop_device *plo,
 static int ploop_snapshot(struct ploop_device * plo, unsigned long arg,
 			  struct block_device * bdev)
 {
-	int err;
 	struct ploop_ctl ctl;
-	struct ploop_ctl_chunk chunk;
+	struct ploop_ctl_chunk chunk, __user *uchunks;
 	struct ploop_delta * delta, * top_delta;
 	struct ploop_snapdata snapdata;
 	struct super_block * sb;
+	int err, sync_fd = -1;
 
 	if (list_empty(&plo->map.delta_list))
 		return -ENOENT;
@@ -3700,10 +3732,26 @@ static int ploop_snapshot(struct ploop_device * plo, unsigned long arg,
 
 	if (copy_from_user(&ctl, (void*)arg, sizeof(struct ploop_ctl)))
 		return -EFAULT;
+	uchunks = (void *)((void*)arg + sizeof(struct ploop_ctl));
+	if (ctl.pctl_chunks == 2) {
+		/*
+		 * We reused second chunk for fd of mountpoint to sync.
+		 * In case of we need to support more mountpoints,
+		 * we may reuse more chunks (chunks below #0 are mountpoints).
+		 */
+		if (copy_from_user(&chunk, &uchunks[1],
+				   sizeof(struct ploop_ctl_chunk)))
+			return -EFAULT;
+		if (chunk.pctl_type || chunk.pctl_flags || chunk.pctl_offset ||
+		    chunk.pctl_start || chunk.pctl_len)
+			return -EINVAL;
+		/* The rest of fields are ignored */
+		sync_fd = chunk.pctl_fd;
+		ctl.pctl_chunks = 1;
+	}
 	if (ctl.pctl_chunks != 1)
-		return -EINVAL;
-	if (copy_from_user(&chunk, (void*)arg + sizeof(struct ploop_ctl),
-			   sizeof(struct ploop_ctl_chunk)))
+		return -ENOTSUPP;
+	if (copy_from_user(&chunk, uchunks, sizeof(struct ploop_ctl_chunk)))
 		return -EFAULT;
 
 	delta = init_delta(plo, &ctl, -1);
@@ -3739,7 +3787,7 @@ static int ploop_snapshot(struct ploop_device * plo, unsigned long arg,
 		/* freeze_bdev() may trigger ploop_bd_full() */
 		plo->maintenance_type = PLOOP_MNTN_SNAPSHOT;
 		mutex_unlock(&plo->ctl_mutex);
-		sb = find_and_freeze_bdev(plo, &bdev);
+		sb = find_and_freeze_bdev(plo, sync_fd, &bdev);
 		mutex_lock(&plo->ctl_mutex);
 		plo->maintenance_type = PLOOP_MNTN_OFF;
 		if (IS_ERR(sb)) {



More information about the Devel mailing list