[Devel] [PATCH RHEL7 COMMIT] fuse: ioctl(FIEMAP) support

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jul 10 15:13:04 MSK 2017


Are you going to send this patch to mainstream as well?

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 07/10/2017 03:10 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-514.26.1.vz7.33.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-514.26.1.vz7.33.2
> ------>
> commit 1b2b3f9f3414309e2b412d417e6553f8ca1b362c
> Author: Maxim Patlasov <mpatlasov at virtuozzo.com>
> Date:   Mon Jul 10 16:10:30 2017 +0400
>
>     fuse: ioctl(FIEMAP) support
>
>     It is quite tricky due to necessity to split very large
>     extent sets.
>
>     https://jira.sw.ru/browse/PSBM-68226
>
>     Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
>     Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  fs/fuse/dir.c    |   1 +
>  fs/fuse/file.c   | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/fuse_i.h |   6 ++
>  3 files changed, 234 insertions(+)
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index f5ee0f6..1d41bfe 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -2099,6 +2099,7 @@ static const struct inode_operations fuse_common_inode_operations = {
>  	.listxattr	= fuse_listxattr,
>  	.removexattr	= fuse_removexattr,
>  	.update_time	= fuse_update_time,
> +	.fiemap		= fuse_fiemap,
>  };
>
>  static const struct inode_operations fuse_symlink_inode_operations = {
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 52666b8..f362d10 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -3627,6 +3627,233 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
>  	return err;
>  }
>
> +static void copy_fiemap_extent(struct fiemap_extent * le, struct page ** pages, int index)
> +{
> +	struct page * page;
> +	void * addr;
> +	unsigned int linear_off = index * sizeof (struct fiemap_extent);
> +
> +	page = pages[linear_off / PAGE_SIZE];
> +	addr = kmap_atomic(page);
> +	if (((linear_off + sizeof(struct fiemap_extent) - 1) / PAGE_SIZE) == (linear_off / PAGE_SIZE)) {
> +		memcpy(le, addr + (linear_off % PAGE_SIZE), sizeof(struct fiemap_extent));
> +	} else {
> +		int split = PAGE_SIZE - (linear_off % PAGE_SIZE);
> +		memcpy(le, addr + (linear_off % PAGE_SIZE), split);
> +		kunmap_atomic(addr);
> +		page = pages[(linear_off / PAGE_SIZE) + 1];
> +		addr = kmap_atomic(page);
> +		memcpy((void *)le + split, addr, sizeof(struct fiemap_extent) - split);
> +	}
> +	kunmap_atomic(addr);
> +}
> +
> +static int fuse_request_fiemap(struct inode *inode, u32 cur_max,
> +			       __u64 * start_p, __u64 * len_p, int * last_p, struct fiemap_extent_info * dest)
> +{
> +	struct fuse_conn *fc = get_fuse_conn(inode);
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_req *req;
> +	struct fuse_ioctl_in inarg;
> +	struct fuse_ioctl_out outarg;
> +	struct fiemap ifiemap;
> +	struct fiemap ofiemap;
> +	int err;
> +	int npages = 0;
> +	int allocated = 0;
> +
> +	err = 0;
> +	spin_lock(&fc->lock);
> +	if (!list_empty(&fi->write_files)) {
> +		struct fuse_file *ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
> +		inarg.fh = ff->fh;
> +	} else if (!list_empty(&fi->rw_files)) {
> +		struct fuse_file *ff = list_entry(fi->rw_files.next, struct fuse_file, rw_entry);
> +		inarg.fh = ff->fh;
> +	} else {
> +		err = -EINVAL;
> +	}
> +	spin_unlock(&fc->lock);
> +	if (err)
> +		return err;
> +
> +	inarg.cmd = FS_IOC_FIEMAP;
> +	inarg.arg = 0;
> +	inarg.flags = 0;
> +
> +	ifiemap.fm_start = *start_p;
> +	ifiemap.fm_length = *len_p;
> +	ifiemap.fm_flags = dest->fi_flags;
> +	ifiemap.fm_mapped_extents = 0;
> +	ifiemap.fm_extent_count = cur_max;
> +	ifiemap.fm_reserved = 0;
> +
> +	if (cur_max)
> +		npages = (cur_max*sizeof(struct fiemap_extent) + PAGE_SIZE - 1) / PAGE_SIZE;
> +
> +	req = fuse_get_req(fc, npages);
> +	if (IS_ERR(req))
> +		return PTR_ERR(req);
> +
> +	req->in.h.opcode = FUSE_IOCTL;
> +	req->in.h.nodeid = get_node_id(inode);
> +
> +	req->in.numargs = 2;
> +	req->in.args[0].size = sizeof(inarg);
> +	req->in.args[0].value = &inarg;
> +	req->in.args[1].size = sizeof(ifiemap);
> +	req->in.args[1].value = &ifiemap;
> +
> +	req->out.numargs = npages ? 3 : 2;
> +	req->out.args[0].size = sizeof(outarg);
> +	req->out.args[0].value = &outarg;
> +	req->out.args[1].size = sizeof(ofiemap);
> +	req->out.args[1].value = &ofiemap;
> +	if (npages) {
> +		req->out.args[2].size = npages*PAGE_SIZE;
> +		req->out.argvar = 1;
> +		req->out.argpages = 1;
> +		req->num_pages = npages;
> +
> +		err = -ENOMEM;
> +		for (allocated = 0; allocated < npages; allocated++) {
> +			req->pages[allocated] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
> +			if (!req->pages[allocated])
> +				goto out;
> +			req->page_descs[allocated].length = PAGE_SIZE;
> +		}
> +	}
> +
> +	fuse_request_send(fc, req);
> +	err = req->out.h.error;
> +	if (err)
> +		goto out;
> +
> +	if (cur_max == 0) {
> +		dest->fi_extents_mapped += ofiemap.fm_mapped_extents;
> +		goto out;
> +	}
> +	if (ofiemap.fm_mapped_extents == 0) {
> +		/* No extents means all the range is a hole */
> +		*start_p += *len_p;
> +		*len_p = 0;
> +	} else {
> +		struct fiemap_extent fe;
> +		u64 next_start;
> +		int i;
> +
> +		if (ofiemap.fm_mapped_extents > cur_max) {
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		for (i = 0; i < ofiemap.fm_mapped_extents; i++) {
> +			copy_fiemap_extent(&fe, req->pages, i);
> +			err = fiemap_fill_next_extent(dest, fe.fe_logical,
> +						      fe.fe_physical, fe.fe_length, fe.fe_flags);
> +			if (err == 1) {
> +				*last_p = 1;
> +				err = 0;
> +				goto out;
> +			}
> +			if (err)
> +				goto out;
> +		}
> +
> +		if (fe.fe_flags & FIEMAP_EXTENT_LAST)
> +			*last_p = 1;
> +		next_start = fe.fe_logical + fe.fe_length;
> +		if (next_start >= *start_p + *len_p)
> +			*len_p = 0;
> +		else
> +			*len_p = *start_p + *len_p - next_start;
> +		*start_p = next_start;
> +	}
> +
> +out:
> +	while (--allocated >= 0) {
> +		__free_page(req->pages[allocated]);
> +		req->pages[allocated] = NULL;
> +	}
> +	fuse_put_request(fc, req);
> +	return err;
> +}
> +
> +int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> +		__u64 start, __u64 len)
> +{
> +	struct fuse_conn *fc = get_fuse_conn(inode);
> +	int err = 0;
> +
> +	if (is_bad_inode(inode))
> +		return -EIO;
> +
> +	if (fc->no_fiemap)
> +		return -EOPNOTSUPP;
> +
> +	/* It is possible to implement, but implementation is going to be
> +	 * very chumbersome. Ww have to get fiemap from user space daemon,
> +	 * and then on each hole page-by-page we have to scan page cache
> +	 * for dirty and writeback pages and fuse queue for "hidden" writeback
> +	 * pages, merging all the results. It is doable and would give some
> +	 * satisfaction from completed work :-), but still it does not have
> +	 * any practical sense. Current coreutils use FIEMAP_FLAG_SYNC and
> +	 * apparently are not going to fix this, switching to SEEK_DATA instead.
> +	 * So, until the first user appears...
> +	 *
> +	 * Also, we can force FIEMAP_FLAG_SYNC... But for now I think it is better
> +	 * to fail to catch possible users
> +	 */
> +	if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) {
> +		if (printk_ratelimit())
> +			printk(KERN_DEBUG "fuse fiemap w/o sync %s[%u]\n",
> +			       current->comm, current->pid);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	mutex_lock(&inode->i_mutex);
> +
> +	fuse_sync_writes(inode);
> +
> +	if (fieinfo->fi_extents_max == 0) {
> +		err = fuse_request_fiemap(inode, 0, &start, &len, NULL, fieinfo);
> +		goto out;
> +	}
> +
> +	for (;;) {
> +		int res;
> +		int last = 0;
> +		unsigned int npages;
> +		u32 cur_max = fieinfo->fi_extents_max - fieinfo->fi_extents_mapped;
> +
> +		if (cur_max == 0)
> +			break;
> +
> +		npages = (cur_max*sizeof(struct fiemap_extent) + PAGE_SIZE - 1) / PAGE_SIZE;
> +		if (npages > FUSE_MAX_PAGES_PER_REQ) {
> +			npages = FUSE_MAX_PAGES_PER_REQ;
> +			cur_max = (npages * PAGE_SIZE) / sizeof(struct fiemap_extent);
> +		}
> +
> +		res = fuse_request_fiemap(inode, cur_max, &start, &len, &last, fieinfo);
> +		if (res < 0)
> +			goto out;
> +
> +		if (len == 0 || last)
> +			break;
> +	}
> +
> +out:
> +	mutex_unlock(&inode->i_mutex);
> +
> +	if (err == -ENOSYS || err == -ENOIOCTLCMD || err == -ENOTTY) {
> +		fc->no_fiemap = 1;
> +		err = -EOPNOTSUPP;
> +	}
> +	return err;
> +}
> +
> +
>  static const struct file_operations fuse_file_operations = {
>  	.llseek		= fuse_file_llseek,
>  	.read		= do_sync_read,
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index e8e1800..203b028 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -654,6 +654,9 @@ struct fuse_conn {
>  	/** Handle wrong FUSE_NOTIFY_INVAL_FILES from old fused */
>  	unsigned compat_inval_files:1;
>
> +	/** No ioctl(FIEMAP) */
> +	unsigned no_fiemap:1;
> +
>  	/** The number of requests waiting for completion */
>  	atomic_t num_waiting;
>
> @@ -1011,4 +1014,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
>
>  void fuse_set_initialized(struct fuse_conn *fc);
>
> +int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
> +		__u64 start, __u64 len);
> +
>  #endif /* _FS_FUSE_I_H */
> .
>


More information about the Devel mailing list