[Devel] [PATCH rh7 17/30] new helper: iov_iter_get_pages()
Kirill Tkhai
ktkhai at virtuozzo.com
Wed May 20 19:04:56 MSK 2020
From: Al Viro <viro at zeniv.linux.org.uk>
ms commit 7b2c99d15559
iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning
the pages of up to maxsize of (contiguous) data from iter. Returns the
amount of memory grabbed or -error. In case of success, the requested
area begins at offset start in pages[0] and runs through pages[1], etc.
Less than requested amount might be returned - either because the contiguous
area in the beginning of iterator is smaller than requested, or because
the kernel failed to pin that many pages.
direct-io.c switched to using iov_iter_get_pages()
Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
---
fs/direct-io.c | 110 ++++++++++++++++++----------------------------------
include/linux/fs.h | 3 +
mm/iov-iter.c | 27 +++++++++++++
3 files changed, 67 insertions(+), 73 deletions(-)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 39b76e93aaea..17e5938a4511 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -105,19 +105,14 @@ struct dio_submit {
sector_t cur_page_block; /* Where it starts */
loff_t cur_page_fs_offset; /* Offset in file */
- /*
- * Page fetching state. These variables belong to dio_refill_pages().
- */
- int curr_page; /* changes */
- int total_pages; /* doesn't change */
- unsigned long curr_user_address;/* changes */
-
+ struct iov_iter *iter;
/*
* Page queue. These variables belong to dio_refill_pages() and
* dio_get_page().
*/
unsigned head; /* next page to process */
unsigned tail; /* last valid page + 1 */
+ size_t from, to;
};
/* dio_state communicated between submission path and end_io */
@@ -170,15 +165,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
*/
static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
- int ret;
- int nr_pages;
+ ssize_t ret;
- nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
- ret = get_user_pages_fast(
- sdio->curr_user_address, /* Where from? */
- nr_pages, /* How many pages? */
- dio->rw == READ, /* Write to memory? */
- &dio->pages[0]); /* Put results here */
+ ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+ &sdio->from, dio->rw);
if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
struct page *page = ZERO_PAGE(0);
@@ -193,18 +183,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
dio->pages[0] = page;
sdio->head = 0;
sdio->tail = 1;
- ret = 0;
- goto out;
+ sdio->from = 0;
+ sdio->to = PAGE_SIZE;
+ return 0;
}
if (ret >= 0) {
- sdio->curr_user_address += ret * PAGE_SIZE;
- sdio->curr_page += ret;
+ iov_iter_advance(sdio->iter, ret);
+ ret += sdio->from;
sdio->head = 0;
- sdio->tail = ret;
- ret = 0;
+ sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+ sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
+ return 0;
}
-out:
return ret;
}
@@ -215,8 +206,9 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
* L1 cache.
*/
static inline struct page *dio_get_page(struct dio *dio,
- struct dio_submit *sdio)
+ struct dio_submit *sdio, size_t *from, size_t *to)
{
+ int n;
if (dio_pages_present(sdio) == 0) {
int ret;
@@ -225,7 +217,10 @@ static inline struct page *dio_get_page(struct dio *dio,
return ERR_PTR(ret);
BUG_ON(dio_pages_present(sdio) == 0);
}
- return dio->pages[sdio->head++];
+ n = sdio->head++;
+ *from = n ? 0 : sdio->from;
+ *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+ return dio->pages[n];
}
static void dio_iodone_helper(struct dio *dio, loff_t offset,
@@ -500,8 +495,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
*/
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
- while (dio_pages_present(sdio))
- page_cache_release(dio_get_page(dio, sdio));
+ while (sdio->head < sdio->tail)
+ page_cache_release(dio->pages[sdio->head++]);
}
/*
@@ -997,23 +992,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh)
{
const unsigned blkbits = sdio->blkbits;
- const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
- struct page *page;
- unsigned block_in_page;
int ret = 0;
- /* The I/O can start at any block offset within the first page */
- block_in_page = sdio->first_block_in_page;
-
while (sdio->block_in_file < sdio->final_block_in_request) {
- page = dio_get_page(dio, sdio);
+ struct page *page;
+ size_t from, to;
+ page = dio_get_page(dio, sdio, &from, &to);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
- while (block_in_page < blocks_per_page) {
- unsigned offset_in_page = block_in_page << blkbits;
+ while (from < to) {
unsigned this_chunk_bytes; /* # of bytes mapped */
unsigned this_chunk_blocks; /* # of blocks */
unsigned u;
@@ -1084,10 +1074,9 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
page_cache_release(page);
goto out;
}
- zero_user(page, block_in_page << blkbits,
- 1 << blkbits);
+ zero_user(page, from, 1 << blkbits);
sdio->block_in_file++;
- block_in_page++;
+ from += 1 << blkbits;
dio->result += 1 << blkbits;
goto next_block;
}
@@ -1105,7 +1094,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
* can add to this page
*/
this_chunk_blocks = sdio->blocks_available;
- u = (PAGE_SIZE - offset_in_page) >> blkbits;
+ u = (to - from) >> blkbits;
if (this_chunk_blocks > u)
this_chunk_blocks = u;
u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1117,7 +1106,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
if (this_chunk_blocks == sdio->blocks_available)
sdio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, sdio, page,
- offset_in_page,
+ from,
this_chunk_bytes,
sdio->next_block_for_io,
map_bh);
@@ -1128,9 +1117,9 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
sdio->next_block_for_io += this_chunk_blocks;
sdio->block_in_file += this_chunk_blocks;
- block_in_page += this_chunk_blocks;
+ from += this_chunk_bytes;
+ dio->result += this_chunk_bytes;
sdio->blocks_available -= this_chunk_blocks;
- dio->result += this_chunk_blocks << blkbits;
next_block:
BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
if (sdio->block_in_file == sdio->final_block_in_request)
@@ -1139,7 +1128,6 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
/* Drop the ref which was taken in get_user_pages() */
page_cache_release(page);
- block_in_page = 0;
}
out:
return ret;
@@ -1207,7 +1195,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct dio *dio;
struct dio_submit sdio = { 0, };
unsigned long user_addr;
- size_t bytes;
struct buffer_head map_bh = { 0, };
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
@@ -1342,6 +1329,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
spin_lock_init(&dio->bio_lock);
dio->refcount = 1;
+ sdio.iter = iter;
+ sdio.final_block_in_request =
+ (offset + iov_iter_count(iter)) >> blkbits;
+
/*
* In case of non-aligned buffers, we may need 2 more
* pages since we need to zero out first and last block.
@@ -1358,34 +1349,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
blk_start_plug(&plug);
- for (seg = 0; seg < nr_segs; seg++) {
- user_addr = (unsigned long)iov[seg].iov_base;
- sdio.size += bytes = iov[seg].iov_len;
-
- /* Index into the first page of the first block */
- sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
- sdio.final_block_in_request = sdio.block_in_file +
- (bytes >> blkbits);
- /* Page fetching state */
- sdio.head = 0;
- sdio.tail = 0;
- sdio.curr_page = 0;
-
- sdio.total_pages = 0;
- if (user_addr & (PAGE_SIZE-1)) {
- sdio.total_pages++;
- bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
- }
- sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
- sdio.curr_user_address = user_addr;
-
- retval = do_direct_IO(dio, &sdio, &map_bh);
-
- if (retval) {
- dio_cleanup(dio, &sdio);
- break;
- }
- } /* end iovec loop */
+ retval = do_direct_IO(dio, &sdio, &map_bh);
+ if (retval)
+ dio_cleanup(dio, &sdio);
if (retval == -ENOTBLK) {
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63481fb49967..70d57756b83f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3652,7 +3652,8 @@ static inline iop_dentry_open_t get_dentry_open_iop(struct inode *inode)
}
extern unsigned long iov_iter_alignment(struct iov_iter *i);
-
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+ size_t maxsize, size_t *start, int rw);
extern bool path_noexec(const struct path *path);
#endif /* _LINUX_FS_H */
diff --git a/mm/iov-iter.c b/mm/iov-iter.c
index 59e26d833ac4..bf627cf009a6 100644
--- a/mm/iov-iter.c
+++ b/mm/iov-iter.c
@@ -911,3 +911,30 @@ unsigned long iov_iter_alignment(struct iov_iter *i)
return alignment_iovec(i);
}
EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+ struct page **pages, size_t maxsize,
+ size_t *start, int rw)
+{
+ size_t offset = i->iov_offset;
+ const struct iovec *iov = iov_iter_iovec(i);
+ size_t len;
+ unsigned long addr;
+ int n;
+ int res;
+
+ len = iov->iov_len - offset;
+ if (len > i->count)
+ len = i->count;
+ if (len > maxsize)
+ len = maxsize;
+ addr = (unsigned long)iov->iov_base + offset;
+ len += *start = addr & (PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+ res = get_user_pages_fast(addr, n, (rw & WRITE) != WRITE, pages);
+ if (unlikely(res < 0))
+ return res;
+ return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
More information about the Devel
mailing list