[Devel] [PATCH rh7 v2 1/3] fs/cleancache: fix data invalidation in the cleancache during direct_io
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Apr 13 00:39:36 PDT 2017
--
Best regards,
Konstantin Khorenko,
Virtuozzo Linux Kernel Team
On 04/13/2017 09:51 AM, Dmitry Monakhov wrote:
> Andrey Ryabinin <aryabinin at virtuozzo.com> writes:
>
>> Currently some direct_io fs hooks call invalidate_inode_pages2_range()
>> conditionally iff mapping->nrpages is not zero. So if nrpages is zero,
>> data in cleancache wouldn't be invalidated. So the next buffered read
>> may get stale data from the cleancache.
>
>>
>> Fix this by calling invalidate_inode_pages2_range() regardless of nrpages
>> value. And if nrpages is zero, bail out from invalidate_inode_pages2_range()
>> only after cleancache_invalidate_inode(), so that we invalidate cleancache
>> but still avoid pointless page cache lookups.
> BTW, can we please make tcache plugable. So one who do not want fancy
> caching features can simply disable it. As we do with pfcache.
tcache/tswap disable/enable tweak (vz7: patch "Subject: [PATCH rh7] tcache/tswap: enable by default")
tcache/swap can be disabled/enabled using following commands:
echo 'N' > /sys/module/{tcache,tswap}/parameters/active
echo 'Y' > /sys/module/{tcache,tswap}/parameters/active
per-cgroup on the fly: echo 1 > @memory.disable_cleancache
To disable tcache at boot time: "tcache.enabled=0" kernel option
>
>>
>> https://jira.sw.ru/browse/PSBM-63908
>> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
>> ---
>> fs/9p/vfs_file.c | 4 ++--
>> fs/nfs/direct.c | 16 ++++++----------
>> fs/nfs/inode.c | 7 ++++---
>> fs/xfs/xfs_file.c | 30 ++++++++++++++----------------
>> mm/filemap.c | 28 ++++++++++++----------------
>> mm/truncate.c | 4 ++++
>> 6 files changed, 42 insertions(+), 47 deletions(-)
>>
>> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
>> index 7da03f8..afe0036 100644
>> --- a/fs/9p/vfs_file.c
>> +++ b/fs/9p/vfs_file.c
>> @@ -482,7 +482,7 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
>> if (invalidate && (total > 0)) {
>> pg_start = origin >> PAGE_CACHE_SHIFT;
>> pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
>> - if (inode->i_mapping && inode->i_mapping->nrpages)
>> + if (inode->i_mapping)
>> invalidate_inode_pages2_range(inode->i_mapping,
>> pg_start, pg_end);
>> *offset += total;
>> @@ -688,7 +688,7 @@ v9fs_direct_write(struct file *filp, const char __user * data,
>> * about to write. We do this *before* the write so that if we fail
>> * here we fall back to buffered write
>> */
>> - if (mapping->nrpages) {
>> + {
>> pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
>> pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
>>
>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
>> index ab96f01..9bbbb63 100644
>> --- a/fs/nfs/direct.c
>> +++ b/fs/nfs/direct.c
>> @@ -1132,12 +1132,10 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>> if (result)
>> goto out_unlock;
>>
>> - if (mapping->nrpages) {
>> - result = invalidate_inode_pages2_range(mapping,
>> - pos >> PAGE_CACHE_SHIFT, end);
>> - if (result)
>> - goto out_unlock;
>> - }
>> + result = invalidate_inode_pages2_range(mapping,
>> + pos >> PAGE_CACHE_SHIFT, end);
>> + if (result)
>> + goto out_unlock;
>>
>> task_io_account_write(count);
>>
>> @@ -1161,10 +1159,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
>>
>> result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
>>
>> - if (mapping->nrpages) {
>> - invalidate_inode_pages2_range(mapping,
>> - pos >> PAGE_CACHE_SHIFT, end);
>> - }
>> + invalidate_inode_pages2_range(mapping,
>> + pos >> PAGE_CACHE_SHIFT, end);
>>
>> mutex_unlock(&inode->i_mutex);
>>
>> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
>> index 8c06aed..779b05c 100644
>> --- a/fs/nfs/inode.c
>> +++ b/fs/nfs/inode.c
>> @@ -1065,10 +1065,11 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
>> if (ret < 0)
>> return ret;
>> }
>> - ret = invalidate_inode_pages2(mapping);
>> - if (ret < 0)
>> - return ret;
>> }
>> + ret = invalidate_inode_pages2(mapping);
>> + if (ret < 0)
>> + return ret;
>> +
>> if (S_ISDIR(inode->i_mode)) {
>> spin_lock(&inode->i_lock);
>> memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 9a2193b..0b7a35b 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -346,7 +346,7 @@ xfs_file_aio_read(
>> * serialisation.
>> */
>> xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>> - if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
>> + if ((ioflags & XFS_IO_ISDIRECT)) {
>> xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>> xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
>>
>> @@ -361,22 +361,20 @@ xfs_file_aio_read(
>> * flush and reduce the chances of repeated iolock cycles going
>> * forward.
>> */
>> - if (inode->i_mapping->nrpages) {
>> - ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
>> - if (ret) {
>> - xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>> - return ret;
>> - }
>> -
>> - /*
>> - * Invalidate whole pages. This can return an error if
>> - * we fail to invalidate a page, but this should never
>> - * happen on XFS. Warn if it does fail.
>> - */
>> - ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
>> - WARN_ON_ONCE(ret);
>> - ret = 0;
>> + ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
>> + if (ret) {
>> + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>> + return ret;
>> }
>> +
>> + /*
>> + * Invalidate whole pages. This can return an error if
>> + * we fail to invalidate a page, but this should never
>> + * happen on XFS. Warn if it does fail.
>> + */
>> + ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
>> + WARN_ON_ONCE(ret);
>> + ret = 0;
>> xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>> }
>>
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index 16517c6..8e608fc 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -2598,18 +2598,16 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>> * about to write. We do this *before* the write so that we can return
>> * without clobbering -EIOCBQUEUED from ->direct_IO().
>> */
>> - if (mapping->nrpages) {
>> - written = invalidate_inode_pages2_range(mapping,
>> - pos >> PAGE_CACHE_SHIFT, end);
>> - /*
>> - * If a page can not be invalidated, return 0 to fall back
>> - * to buffered write.
>> - */
>> - if (written) {
>> - if (written == -EBUSY)
>> - return 0;
>> - goto out;
>> - }
>> + written = invalidate_inode_pages2_range(mapping,
>> + pos >> PAGE_CACHE_SHIFT, end);
>> + /*
>> + * If a page can not be invalidated, return 0 to fall back
>> + * to buffered write.
>> + */
>> + if (written) {
>> + if (written == -EBUSY)
>> + return 0;
>> + goto out;
>> }
>>
>> written = mapping_direct_IO(mapping, WRITE, iocb, iter, pos);
>> @@ -2622,10 +2620,8 @@ generic_file_direct_write_iter(struct kiocb *iocb, struct iov_iter *iter,
>> * so we don't support it 100%. If this invalidation
>> * fails, tough, the write still worked...
>> */
>> - if (mapping->nrpages) {
>> - invalidate_inode_pages2_range(mapping,
>> - pos >> PAGE_CACHE_SHIFT, end);
>> - }
>> + invalidate_inode_pages2_range(mapping,
>> + pos >> PAGE_CACHE_SHIFT, end);
>>
>> if (written > 0) {
>> pos += written;
>> diff --git a/mm/truncate.c b/mm/truncate.c
>> index f460e67..ce4b1d8 100644
>> --- a/mm/truncate.c
>> +++ b/mm/truncate.c
>> @@ -628,6 +628,10 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
>> int did_range_unmap = 0;
>>
>> cleancache_invalidate_inode(mapping);
>> +
>> + if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
>> + return 0;
>> +
>> pagevec_init(&pvec, 0);
>> index = start;
>> while (index <= end && __pagevec_lookup(&pvec, mapping, index,
>> --
>> 2.10.2
> .
>
More information about the Devel
mailing list