[Devel] [PATCH 1/2] fuse: add a new async operation to unmap regions

Dmitry Monakhov dmonakhov at openvz.org
Wed Feb 7 12:44:35 MSK 2018


Andrei Vagin <avagin at virtuozzo.com> writes:

> On Tue, Feb 06, 2018 at 11:49:30PM +0300, Konstantin Khorenko wrote:
>> Andrey, this seems to be a feature and it should be tested.
>> 
>> Please post here a jira id with the feature description, QA task, etc.
>
> 1. Feature
>
> Add support of discard requests via punch-holes for plain ploops
> https://pmc.acronis.com/browse/VSTOR-6962
>
> 2. Description
>
> When ploop receives a discard request, it calls fallocate() to make a
> punch hole in a ploop image file. It allows to drop useless data from a
> storage.
>
> 4. Testing
>
> [root at localhost ploop]# cat test/ploop-fdiscard.sh
> set -e -x
>
> path=$1
> mkdir -p $path
> ploop init $path/root -s 1G -f raw --sparse -t none
> out=$(ploop mount $path/DiskDescriptor.xml)
> echo $out
> dev=$(echo $out | sed "s/.*dev=\(\S*\).*/\1/")
> echo $dev
> filefrag -sv $path/root
> dd if=/dev/urandom of=$dev bs=1M count=1
> dd if=/dev/urandom of=$dev bs=1M count=1 seek=512
> fout1="$(filefrag -sv $path/root | wc -l)"
> filefrag -sv $path/root
> blkdiscard -l 1M -o 512M $dev
> filefrag -sv $path/root
> fout2="$(filefrag -sv $path/root | wc -l)"
> if [ "$fout1" -le "$fout2" ]; then
> 	echo FAIL
> 	exit 1
> fi
> blkdiscard $dev
> filefrag -sv $path/root
> fout3="$(filefrag -sv $path/root | wc -l)"
> if [ "$fout2" -le "$fout3" ]; then
> 	echo FAIL
> 	exit 1
> fi
> ploop umount -d $dev
> rm -rf $path
>
> 5. Known issues
>
> Works only for raw images on a fuse file system (vstorage)
>
> 7. Feature owner
> Andrei Vagin (avagin@)
>
>
>> 
>> And whom to review?
>
> Dima, could you review this patch set?
Ack, with minor request.
It is good moment to add stress test for rw-io vs discard
via fio. I can imagine two types of tests:
1) simple stress read/write/trim
2) integrity test via trimwrite, and  read verify after
>
>> 
>> --
>> Best regards,
>> 
>> Konstantin Khorenko,
>> Virtuozzo Linux Kernel Team
>> 
>> On 02/06/2018 03:25 AM, Andrei Vagin wrote:
>> > The fuse interface allows to run any operation asynchronously, because
>> > the kernel redirect all operations to an user daemon and then waits an
>> > answer.
>> > 
>> > In ploop, we want to handle discard requests via fallocate and
>> > a simplest way to do this is to run fallocate(FALLOC_FL_PUNCH_HOLE)
>> > asynchronously like the write command.
>> > 
>> > This patch adds a new async command IOCB_CMD_UNMAP_ITER, which sends
>> > fallocate(FALLOC_FL_PUNCH_HOLE) to a fuse user daemon.
>> > 
>> > Signed-off-by: Andrei Vagin <avagin at openvz.org>
>> > ---
>> >  fs/aio.c                     |  1 +
>> >  fs/fuse/file.c               | 63 ++++++++++++++++++++++++++++++++++++++------
>> >  fs/fuse/fuse_i.h             |  3 +++
>> >  include/uapi/linux/aio_abi.h |  1 +
>> >  4 files changed, 60 insertions(+), 8 deletions(-)
>> > 
>> > diff --git a/fs/aio.c b/fs/aio.c
>> > index 3a6a9b0..cdc7558 100644
>> > --- a/fs/aio.c
>> > +++ b/fs/aio.c
>> > @@ -1492,6 +1492,7 @@ rw_common:
>> >  		ret = aio_read_iter(req);
>> >  		break;
>> > 
>> > +	case IOCB_CMD_UNMAP_ITER:
>> >  	case IOCB_CMD_WRITE_ITER:
>> >  		ret = aio_write_iter(req);
>> >  		break;
>> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> > index 877c41f..83ea9da 100644
>> > --- a/fs/fuse/file.c
>> > +++ b/fs/fuse/file.c
>> > @@ -920,6 +920,19 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
>> >  	if (!req->bvec)
>> >  		fuse_release_user_pages(req, !io->write);
>> > 
>> > +	if (req->in.h.opcode == FUSE_FALLOCATE) {
>> > +		if (req->out.h.error)
>> > +			printk("fuse_aio_complete_req: request (fallocate fh=0x%llx "
>> > +			       "offset=%lld length=%lld mode=%x) completed with err=%d\n",
>> > +			       req->misc.fallocate.in.fh,
>> > +			       req->misc.fallocate.in.offset,
>> > +			       req->misc.fallocate.in.length,
>> > +			       req->misc.fallocate.in.mode,
>> > +			       req->out.h.error);
>> > +		fuse_aio_complete(io, req->out.h.error, -1);
>> > +		return;
>> > +	}
>> > +
>> >  	if (io->write) {
>> >  		if (req->misc.write.in.size != req->misc.write.out.size)
>> >  			pos = req->misc.write.in.offset - io->offset +
>> > @@ -1322,6 +1335,33 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
>> >  	req->out.args[0].value = outarg;
>> >  }
>> > 
>> > +static size_t fuse_send_unmap(struct fuse_req *req, struct fuse_io_priv *io,
>> > +			      loff_t pos, size_t count, fl_owner_t owner)
>> > +{
>> > +	struct file *file = io->file;
>> > +	struct fuse_file *ff = file->private_data;
>> > +	struct fuse_conn *fc = ff->fc;
>> > +	struct fuse_fallocate_in *inarg = &req->misc.fallocate.in;
>> > +
>> > +	inarg->fh = ff->fh;
>> > +	inarg->offset = pos;
>> > +	inarg->length = count;
>> > +	inarg->mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
>> > +	req->in.h.opcode = FUSE_FALLOCATE;
>> > +	req->in.h.nodeid = ff->nodeid;
>> > +	req->in.numargs = 1;
>> > +	req->in.args[0].size = sizeof(struct fuse_fallocate_in);
>> > +	req->in.args[0].value = inarg;
>> > +
>> > +	fuse_account_request(fc, count);
>> > +
>> > +	if (io->async)
>> > +		return fuse_async_req_send(fc, req, count, io);
>> > +
>> > +	fuse_request_send(fc, req);
>> > +	return count;
>> > +}
>> > +
>> >  static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
>> >  			      loff_t pos, size_t count, fl_owner_t owner)
>> >  {
>> > @@ -3455,7 +3495,7 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb *iocb,
>> >  			req->bvec = bvec;
>> >  		}
>> > 
>> > -		if (filled + bvec->bv_len <= nmax) {
>> > +		if (bvec_len && filled + bvec->bv_len <= nmax) {
>> >  			filled += bvec->bv_len;
>> >  			req->num_bvecs++;
>> >  			bvec++;
>> > @@ -3465,14 +3505,21 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb *iocb,
>> >  				continue;
>> >  		}
>> > 
>> > -		BUG_ON(!filled);
>> > 
>> > -		if (rw == WRITE)
>> > -			nres = fuse_send_write(req, io, pos,
>> > -					filled, NULL);
>> > -		else
>> > -			nres = fuse_send_read(req, io, pos,
>> > -					filled, NULL);
>> > +		if (iocb->ki_opcode == IOCB_CMD_UNMAP_ITER) {
>> > +			req->in.argbvec = 0;
>> > +			nres = fuse_send_unmap(req, io, pos,
>> > +					iocb->ki_nbytes, NULL);
>> > +			filled = nres;
>> > +		} else {
>> > +			BUG_ON(!filled);
>> > +			if (rw == WRITE)
>> > +				nres = fuse_send_write(req, io, pos,
>> > +						filled, NULL);
>> > +			else
>> > +				nres = fuse_send_read(req, io, pos,
>> > +						filled, NULL);
>> > +		}
>> > 
>> >  		BUG_ON(nres != filled);
>> >  		fuse_put_request(fc, req);
>> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> > index 2da4520..b2edcf9 100644
>> > --- a/fs/fuse/fuse_i.h
>> > +++ b/fs/fuse/fuse_i.h
>> > @@ -364,6 +364,9 @@ struct fuse_req {
>> >  			struct fuse_write_in in;
>> >  			struct fuse_write_out out;
>> >  		} write;
>> > +		struct {
>> > +			struct fuse_fallocate_in in;
>> > +		} fallocate;
>> >  		struct fuse_notify_retrieve_in retrieve_in;
>> >  		struct fuse_lk_in lk_in;
>> >  	} misc;
>> > diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
>> > index 22ce4bd..ea2c346 100644
>> > --- a/include/uapi/linux/aio_abi.h
>> > +++ b/include/uapi/linux/aio_abi.h
>> > @@ -46,6 +46,7 @@ enum {
>> >  	IOCB_CMD_PWRITEV = 8,
>> >  	IOCB_CMD_READ_ITER = 9,
>> >  	IOCB_CMD_WRITE_ITER = 10,
>> > +	IOCB_CMD_UNMAP_ITER = 11,
>> >  };
>> > 
>> >  /*
>> > 


More information about the Devel mailing list