[Devel] [PATCH 09/14] fuse: Implement writepages and write_begin/write_end callbacks - v2

Maxim V. Patlasov mpatlasov at parallels.com
Wed Mar 27 05:39:09 PDT 2013


Hi Miklos,

01/30/2013 03:08 AM, Miklos Szeredi пишет:
> On Fri, Jan 25, 2013 at 7:25 PM, Maxim V. Patlasov
> <MPatlasov at parallels.com> wrote:
>> The .writepages one is required to make each writeback request carry more than
>> one page on it.
>>
>> Changed in v2:
>>   - fixed fuse_prepare_write() to avoid reads beyond EOF
>>   - fixed fuse_prepare_write() to zero uninitialized part of page
>>
>> Original patch by: Pavel Emelyanov <xemul at openvz.org>
>> Signed-off-by: Maxim V. Patlasov <MPatlasov at parallels.com>
>> ---
>>   fs/fuse/file.c |  282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 files changed, 281 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> index 496e74c..3b4dc98 100644
>> --- a/fs/fuse/file.c
>> +++ b/fs/fuse/file.c
>> @@ -722,7 +722,10 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
>>
>>   struct fuse_fill_data {
>>          struct fuse_req *req;
>> -       struct file *file;
>> +       union {
>> +               struct file *file;
>> +               struct fuse_file *ff;
>> +       };
>>          struct inode *inode;
>>          unsigned nr_pages;
>>   };
>> @@ -1530,6 +1533,280 @@ static int fuse_writepage(struct page *page, struct writeback_control *wbc)
>>          return err;
>>   }
>>
>> +static int fuse_send_writepages(struct fuse_fill_data *data)
>> +{
>> +       int i, all_ok = 1;
>> +       struct fuse_req *req = data->req;
>> +       struct inode *inode = data->inode;
>> +       struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
>> +       struct fuse_conn *fc = get_fuse_conn(inode);
>> +       struct fuse_inode *fi = get_fuse_inode(inode);
>> +       loff_t off = -1;
>> +
>> +       if (!data->ff)
>> +               data->ff = fuse_write_file(fc, fi);
>> +
>> +       if (!data->ff) {
>> +               for (i = 0; i < req->num_pages; i++)
>> +                       end_page_writeback(req->pages[i]);
>> +               return -EIO;
>> +       }
>> +
>> +       req->inode = inode;
>> +       req->misc.write.in.offset = page_offset(req->pages[0]);
>> +
>> +       spin_lock(&fc->lock);
>> +       list_add(&req->writepages_entry, &fi->writepages);
>> +       spin_unlock(&fc->lock);
>> +
>> +       for (i = 0; i < req->num_pages; i++) {
>> +               struct page *page = req->pages[i];
>> +               struct page *tmp_page;
>> +
>> +               tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
>> +               if (tmp_page) {
>> +                       copy_highpage(tmp_page, page);
>> +                       inc_bdi_stat(bdi, BDI_WRITEBACK);
>> +                       inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>> +               } else
>> +                       all_ok = 0;
>> +               req->pages[i] = tmp_page;
>> +               if (i == 0)
>> +                       off = page_offset(page);
>> +
>> +               end_page_writeback(page);
>> +       }
>> +
>> +       if (!all_ok) {
>> +               for (i = 0; i < req->num_pages; i++) {
>> +                       struct page *page = req->pages[i];
>> +                       if (page) {
>> +                               dec_bdi_stat(bdi, BDI_WRITEBACK);
>> +                               dec_zone_page_state(page, NR_WRITEBACK_TEMP);
>> +                               __free_page(page);
>> +                               req->pages[i] = NULL;
>> +                       }
>> +               }
>> +
>> +               spin_lock(&fc->lock);
>> +               list_del(&req->writepages_entry);
>> +               wake_up(&fi->page_waitq);
>> +               spin_unlock(&fc->lock);
>> +               return -ENOMEM;
>> +       }
>> +
>> +       req->ff = fuse_file_get(data->ff);
>> +       fuse_write_fill(req, data->ff, off, 0);
>> +
>> +       req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
>> +       req->in.argpages = 1;
>> +       fuse_page_descs_length_init(req, 0, req->num_pages);
>> +       req->end = fuse_writepage_end;
>> +
>> +       spin_lock(&fc->lock);
>> +       list_add_tail(&req->list, &fi->queued_writes);
>> +       fuse_flush_writepages(data->inode);
>> +       spin_unlock(&fc->lock);
>> +
>> +       return 0;
>> +}
>> +
>> +static int fuse_writepages_fill(struct page *page,
>> +               struct writeback_control *wbc, void *_data)
>> +{
>> +       struct fuse_fill_data *data = _data;
>> +       struct fuse_req *req = data->req;
>> +       struct inode *inode = data->inode;
>> +       struct fuse_conn *fc = get_fuse_conn(inode);
>> +
>> +       if (fuse_page_is_writeback(inode, page->index)) {
>> +               if (wbc->sync_mode != WB_SYNC_ALL) {
>> +                       redirty_page_for_writepage(wbc, page);
>> +                       unlock_page(page);
>> +                       return 0;
>> +               }
>> +               fuse_wait_on_page_writeback(inode, page->index);
>> +       }
>> +
>> +       if (req->num_pages &&
>> +           (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
>> +            (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
>> +            req->pages[req->num_pages - 1]->index + 1 != page->index)) {
>> +               int err;
>> +
>> +               err = fuse_send_writepages(data);
>> +               if (err) {
>> +                       unlock_page(page);
>> +                       return err;
>> +               }
>> +
>> +               data->req = req =
>> +                       fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
>> +               if (!req) {
>> +                       unlock_page(page);
>> +                       return -ENOMEM;
>> +               }
>> +       }
>> +
>> +       req->pages[req->num_pages] = page;
>> +       req->num_pages++;
>> +
>> +       if (test_set_page_writeback(page))
>> +               BUG();
>> +
>> +       unlock_page(page);
>> +
>> +       return 0;
>> +}
>> +
>> +static int fuse_writepages(struct address_space *mapping,
>> +                          struct writeback_control *wbc)
>> +{
>> +       struct inode *inode = mapping->host;
>> +       struct fuse_conn *fc = get_fuse_conn(inode);
>> +       struct fuse_fill_data data;
>> +       int err;
>> +
>> +       if (!fc->writeback_cache)
>> +               return generic_writepages(mapping, wbc);
>> +
>> +       err = -EIO;
>> +       if (is_bad_inode(inode))
>> +               goto out;
>> +
>> +       data.ff = NULL;
>> +       data.inode = inode;
>> +       data.req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
>> +       err = -ENOMEM;
>> +       if (!data.req)
>> +               goto out_put;
>> +
>> +       err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
>> +       if (data.req) {
>> +               if (!err && data.req->num_pages) {
>> +                       err = fuse_send_writepages(&data);
>> +                       if (err)
>> +                               fuse_put_request(fc, data.req);
>> +               } else
>> +                       fuse_put_request(fc, data.req);
>> +       }
>> +out_put:
>> +       if (data.ff)
>> +               fuse_file_put(data.ff, false);
>> +out:
>> +       return err;
>> +}
>> +
>> +/*
>> + * Determine the number of bytes of data the page contains
>> + */
>> +static inline unsigned fuse_page_length(struct page *page)
>> +{
>> +       loff_t i_size = i_size_read(page_file_mapping(page)->host);
>> +
>> +       if (i_size > 0) {
>> +               pgoff_t page_index = page_file_index(page);
>> +               pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
>> +               if (page_index < end_index)
>> +                       return PAGE_CACHE_SIZE;
>> +               if (page_index == end_index)
>> +                       return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int fuse_prepare_write(struct fuse_conn *fc, struct file *file,
>> +               struct page *page, loff_t pos, unsigned len)
>> +{
>> +       struct fuse_req *req;
>> +       unsigned num_read;
>> +       unsigned page_len;
>> +       int err;
>> +
>> +       if (PageUptodate(page) || (len == PAGE_CACHE_SIZE))
>> +               return 0;
>> +
>> +       page_len = fuse_page_length(page);
>> +       if (!page_len) {
>> +               zero_user(page, 0, PAGE_CACHE_SIZE);
>> +               return 0;
>> +       }
>> +
>> +       /*
>> +        * Page writeback can extend beyond the lifetime of the
>> +        * page-cache page, so make sure we read a properly synced
>> +        * page.
>> +        */
>> +       fuse_wait_on_page_writeback(page->mapping->host, page->index);
>> +
>> +       req = fuse_get_req(fc, 1);
>> +       err = PTR_ERR(req);
>> +       if (IS_ERR(req))
>> +               goto out;
>> +
>> +       req->out.page_zeroing = 1;
>> +       req->out.argpages = 1;
>> +       req->num_pages = 1;
>> +       req->pages[0] = page;
>> +       req->page_descs[0].offset = 0;
>> +       req->page_descs[0].length = PAGE_SIZE;
>> +       num_read = fuse_send_read(req, file, page_offset(page), page_len, NULL);
>> +       err = req->out.h.error;
>> +       fuse_put_request(fc, req);
>> +out:
>> +       if (err) {
>> +               unlock_page(page);
>> +               page_cache_release(page);
>> +       } else if (num_read != PAGE_CACHE_SIZE) {
>> +               zero_user_segment(page, num_read, PAGE_CACHE_SIZE);
>> +       }
>> +       return err;
>> +}
> Lots of duplication between this and fuse_readpage().  Can some of
> that be sanely shared?

Yes, I'll factor out the common part. Thanks for finding.

>
>> +
>> +static int fuse_write_begin(struct file *file, struct address_space *mapping,
>> +               loff_t pos, unsigned len, unsigned flags,
>> +               struct page **pagep, void **fsdata)
>> +{
>> +       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
>> +       struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
>> +
>> +       BUG_ON(!fc->writeback_cache);
>> +
>> +       *pagep = grab_cache_page_write_begin(mapping, index, flags);
>> +       if (!*pagep)
>> +               return -ENOMEM;
>> +
>> +       return fuse_prepare_write(fc, file, *pagep, pos, len);
>> +}
>> +
> One interesting aspect of this writeback-cache case is the handling of
> "disk full" errors.  The write-begin function should make sure that
> space is reserved on disk for the write, but this patchset doesn't do
> that.  It's not a huge issue, but maybe worth thinking about (and a
> comment above the fuse_write_begin() function).

Good point! I agree that write-begin may be improved in this direction. 
I'll add a comment not to forget about it in the future.

Thanks,
Maxim

>
>> +static int fuse_commit_write(struct file *file, struct page *page,
>> +               unsigned from, unsigned to)
>> +{
>> +       struct inode *inode = page->mapping->host;
>> +       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
>> +
>> +       if (!PageUptodate(page))
>> +               SetPageUptodate(page);
>> +
>> +       fuse_write_update_size(inode, pos);
>> +       set_page_dirty(page);
>> +       return 0;
>> +}
>> +
>> +static int fuse_write_end(struct file *file, struct address_space *mapping,
>> +               loff_t pos, unsigned len, unsigned copied,
>> +               struct page *page, void *fsdata)
>> +{
>> +       unsigned from = pos & (PAGE_CACHE_SIZE - 1);
>> +
>> +       fuse_commit_write(file, page, from, from+copied);
>> +
>> +       unlock_page(page);
>> +       page_cache_release(page);
>> +
>> +       return copied;
>> +}
>> +
>>   static int fuse_launder_page(struct page *page)
>>   {
>>          int err = 0;
>> @@ -2436,11 +2713,14 @@ static const struct file_operations fuse_direct_io_file_operations = {
>>   static const struct address_space_operations fuse_file_aops  = {
>>          .readpage       = fuse_readpage,
>>          .writepage      = fuse_writepage,
>> +       .writepages     = fuse_writepages,
>>          .launder_page   = fuse_launder_page,
>>          .readpages      = fuse_readpages,
>>          .set_page_dirty = __set_page_dirty_nobuffers,
>>          .bmap           = fuse_bmap,
>>          .direct_IO      = fuse_direct_IO,
>> +       .write_begin    = fuse_write_begin,
>> +       .write_end      = fuse_write_end,
>>   };
>>
>>   void fuse_init_file_inode(struct inode *inode)
>>




More information about the Devel mailing list