[Devel] [vzlin-dev] [PATCH vz7] fuse: optimize writepages search
Kirill Tkhai
ktkhai at virtuozzo.com
Fri Apr 7 07:28:10 PDT 2017
Everything looks good for me except two moments. Please see comments below.
On 07.04.2017 06:35, Maxim Patlasov wrote:
> The patch re-works fi->writepages replacing list with rb-tree.
> This improves performance because kernel fuse iterates through
> fi->writepages for each writeback page and typical number of
> entries is about 800 (for 100MB of fuse writeback).
>
> Before patch:
>
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s
>
> # vmstat 10
> 2 1 0 57445400 40416 6323676 0 0 33 374743 8633 19210 1 8 88 3 0
>
> # perf top
> 29.86% [kernel] [k] _raw_spin_lock
> 26.62% [fuse] [k] fuse_page_is_writeback
>
> After patch:
>
> # dd if=/dev/zero of=/pcs2860/fil bs=1M count=10240
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s
>
> # vmstat 10
> 2 9 0 53676040 31744 10265984 0 0 64 854790 10956 48387 1 6 88 6 0
>
> # perf top
> 23.55% [kernel] [k] copy_user_enhanced_fast_string
> 9.87% [kernel] [k] __memcpy
> 3.10% [kernel] [k] _raw_spin_lock
>
> https://jira.sw.ru/browse/PSBM-59254
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
> fs/fuse/file.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++--------
> fs/fuse/fuse_i.h | 4 +--
> fs/fuse/inode.c | 2 +
> 3 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 41ed6f0..eab5030 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -435,7 +435,7 @@ static int fuse_release(struct inode *inode, struct file *file)
> * enqueued on this file and it is not going to get new one: it is closing.
> */
> if (!ff->fc->close_wait)
> - wait_event(fi->page_waitq, list_empty_careful(&fi->writepages));
> + wait_event(fi->page_waitq, RB_EMPTY_ROOT(&fi->writepages));
> else
> wait_event(fi->page_waitq, atomic_read(&ff->count) == 1);
>
> @@ -501,17 +501,30 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
> {
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_inode *fi = get_fuse_inode(inode);
> - struct fuse_req *req;
> bool found = false;
> + struct rb_node *n;
>
> spin_lock(&fc->lock);
> - list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> + n = fi->writepages.rb_node;
> + if (!n) {
> + spin_unlock(&fc->lock);
> + return false;
> + }
You don't need this "if" branch, because this case is handled in "while".
> +
> + while (n) {
> + struct fuse_req *req;
> pgoff_t curr_index;
>
> + req = rb_entry(n, struct fuse_req, writepages_entry);
> BUG_ON(req->inode != inode);
> curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> - if (!(idx_from >= curr_index + req->num_pages ||
> - idx_to < curr_index)) {
> +
> + if (idx_from >= curr_index + req->num_pages)
> + n = n->rb_right;
> + else if (idx_to < curr_index)
> + n = n->rb_left;
> + else {
> found = true;
> break;
> }
> @@ -531,17 +544,31 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
> {
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_inode *fi = get_fuse_inode(inode);
> - struct fuse_req *req;
> bool found = false;
> + struct rb_node *n;
>
> spin_lock(&fc->lock);
> - list_for_each_entry(req, &fi->writepages, writepages_entry) {
> +
> + n = fi->writepages.rb_node;
> + if (!n) {
> + spin_unlock(&fc->lock);
> + return false;
> + }
The same as above.
> +
> +
> + while (n) {
> + struct fuse_req *req;
> pgoff_t curr_index;
>
> + req = rb_entry(n, struct fuse_req, writepages_entry);
> BUG_ON(req->inode != inode);
> curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> - if (curr_index <= index &&
> - index < curr_index + req->num_pages) {
> +
> + if (index >= curr_index + req->num_pages)
> + n = n->rb_right;
> + else if (index < curr_index)
> + n = n->rb_left;
> + else {
> found = true;
> break;
> }
> @@ -1868,7 +1895,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
> struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
> int i;
>
> - list_del(&req->writepages_entry);
> + rb_erase(&req->writepages_entry, &fi->writepages);
> if (fc->writeback_cache || fc->close_wait)
> __fuse_file_put(req->ff);
> for (i = 0; i < req->num_pages; i++) {
> @@ -1964,6 +1991,36 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
> return ff;
> }
>
> +static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
> +{
> + unsigned i = ins_req->num_pages - 1;
> + pgoff_t idx_from = ins_req->pages[0]->index;
> + pgoff_t idx_to = ins_req->pages[i]->index;
> + struct rb_node **p = &root->rb_node;
> + struct rb_node *parent = NULL;
> +
> + while (*p) {
> + struct fuse_req *req;
> + pgoff_t curr_index;
> +
> + parent = *p;
> + req = rb_entry(parent, struct fuse_req, writepages_entry);
> + BUG_ON(req->inode != ins_req->inode);
> + curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
> +
> + if (idx_from >= curr_index + req->num_pages)
> + p = &(*p)->rb_right;
> + else if (idx_to < curr_index)
> + p = &(*p)->rb_left;
> + else
> + BUG();
> + }
> +
> + rb_link_node(&ins_req->writepages_entry, parent, p);
> + rb_insert_color(&ins_req->writepages_entry, root);
> + return 0;
> +}
> +
> static int fuse_writepage_locked(struct page *page,
> struct writeback_control *wbc,
> struct fuse_file **ff_pp)
> @@ -2034,7 +2091,7 @@ static int fuse_writepage_locked(struct page *page,
> inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
>
> spin_lock(&fc->lock);
> - list_add(&req->writepages_entry, &fi->writepages);
> + tree_insert(&fi->writepages, req);
> list_add_tail(&req->list, &fi->queued_writes);
> fuse_flush_writepages(inode);
> spin_unlock(&fc->lock);
> @@ -2110,7 +2167,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
> req->misc.write.in.offset = page_offset(req->pages[0]);
>
> spin_lock(&fc->lock);
> - list_add(&req->writepages_entry, &fi->writepages);
> + tree_insert(&fi->writepages, req);
> spin_unlock(&fc->lock);
>
> for (i = 0; i < req->num_pages; i++) {
> @@ -2143,7 +2200,7 @@ static int fuse_send_writepages(struct fuse_fill_data *data)
> }
>
> spin_lock(&fc->lock);
> - list_del(&req->writepages_entry);
> + rb_erase(&req->writepages_entry, &fi->writepages);
> wake_up(&fi->page_waitq);
> spin_unlock(&fc->lock);
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index fefa8ff..4601571 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -117,7 +117,7 @@ struct fuse_inode {
> wait_queue_head_t page_waitq;
>
> /** List of writepage requestst (pending or sent) */
> - struct list_head writepages;
> + struct rb_root writepages;
>
> /** Miscellaneous bits describing inode state */
> unsigned long state;
> @@ -404,7 +404,7 @@ struct fuse_req {
> struct fuse_io_priv *io;
>
> /** Link on fi->writepages */
> - struct list_head writepages_entry;
> + struct rb_node writepages_entry;
>
> /** Request completion callback */
> void (*end)(struct fuse_conn *, struct fuse_req *);
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index ddd858c..0da2623 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -101,7 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
> INIT_LIST_HEAD(&fi->write_files);
> INIT_LIST_HEAD(&fi->rw_files);
> INIT_LIST_HEAD(&fi->queued_writes);
> - INIT_LIST_HEAD(&fi->writepages);
> + fi->writepages = RB_ROOT;
> init_waitqueue_head(&fi->page_waitq);
> fi->forget = fuse_alloc_forget();
> if (!fi->forget) {
>
More information about the Devel
mailing list