[Devel] [vzlin-dev] [PATCH vz7] fuse: trust server file size unless opened

Maxim Patlasov mpatlasov at virtuozzo.com
Thu Dec 15 12:42:31 PST 2016


On 12/15/2016 04:35 AM, Dmitry Monakhov wrote:

> Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
>
>> Before the patch, the only way to pick up updated file size from server (in a
>> scenario when local inode was created earlier, then the file was updated
>> from another node) was in fuse_open_common():
>>
>>> 		atomic_inc(&fi->num_openers);
>>>
>>> 		if (atomic_read(&fi->num_openers) == 1) {
>>> 			err = fuse_getattr_size(inode, file, &size);
>>> 			...
>>> 			spin_lock(&fc->lock);
>>> 			i_size_write(inode, size);
>>> 			spin_unlock(&fc->lock);
>>> 		}
>> This is correct, but someone may ask about i_size w/o open, e.g.: ls -l foo.
>> The patch ensures that every time the server reports us some file size, if no
>> open-s happened yet (num_openers=0), fuse stores that server size in local
>> inode->i_size. This resolves the following problem:
>>
>> # pstorage-mount -c test -l /var/log/f1.log /pcs1
>> # pstorage-mount -c test -l /var/log/f2.log /pcs2
>>
>> # date > /pcs1/foo; ls -l /pcs1/foo /pcs2/foo
>> -rwx------ 1 root root 29 Dec 14 16:31 /pcs1/foo
>> -rwx------ 1 root root 29 Dec 14 16:31 /pcs2/foo
>>
>> # date >> /pcs1/foo; ls -l /pcs1/foo /pcs2/foo
>> -rwx------ 1 root root 58 Dec 14 16:31 /pcs1/foo
>> -rwx------ 1 root root 29 Dec 14 16:31 /pcs2/foo
>>
>> https://jira.sw.ru/browse/PSBM-57047
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> Ok. But IMHO fi->num_openers is redundant it protects special metadata,
> but thre are other cases where we may get client/server mdata out of sync.

Yeah, fi->num_openers is a nasty hack. I'd like to implement an 
universal solution for that problem (determining when to trust server 
about particular mdata), but I don't know one. Historically, all these 
hacks are the price we pay for an optimization: avoidance of immediate 
flush of data and mdata on each buffered write.

>
>> ---
>>   fs/fuse/file.c   |   12 +++++++++++-
>>   fs/fuse/fuse_i.h |    3 +++
>>   fs/fuse/inode.c  |    4 +++-
>>   3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> index 9cad8c5..62967d2 100644
>> --- a/fs/fuse/file.c
>> +++ b/fs/fuse/file.c
>> @@ -296,12 +296,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
>>   		u64 size;
>>   
>>   		mutex_lock(&inode->i_mutex);
>> +
>> +		spin_lock(&fc->lock);
>>   		atomic_inc(&fi->num_openers);
>>   
>>   		if (atomic_read(&fi->num_openers) == 1) {
>> +			fi->i_size_unstable = 1;
>> +			spin_unlock(&fc->lock);
>>   			err = fuse_getattr_size(inode, file, &size);
>>   			if (err) {
>> +				spin_lock(&fc->lock);
>>   				atomic_dec(&fi->num_openers);
>> +				fi->i_size_unstable = 0;
>> +				spin_unlock(&fc->lock);
>> +
>>   				mutex_unlock(&inode->i_mutex);
>>   				fuse_release_common(file, FUSE_RELEASE);
>>   				return err;
>> @@ -309,8 +317,10 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
>>   
>>   			spin_lock(&fc->lock);
>>   			i_size_write(inode, size);
>> +			fi->i_size_unstable = 0;
>> +			spin_unlock(&fc->lock);
>> +		} else
>>   			spin_unlock(&fc->lock);
>> -		}
>>   
>>   		mutex_unlock(&inode->i_mutex);
>>   	}
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index 1d24bf6..22eb9c9 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -124,6 +124,9 @@ struct fuse_inode {
>>   
>>   	/** Mostly to detect very first open */
>>   	atomic_t num_openers;
>> +
>> +	/** Even though num_openers>0, trust server i_size */
>> +	int i_size_unstable;
>>   };
>>   
>>   /** FUSE inode state bits */
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index 5ccecae..f606deb 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
>>   	fi->writectr = 0;
>>   	fi->orig_ino = 0;
>>   	fi->state = 0;
>> +	fi->i_size_unstable = 0;
>>   	INIT_LIST_HEAD(&fi->write_files);
>>   	INIT_LIST_HEAD(&fi->rw_files);
>>   	INIT_LIST_HEAD(&fi->queued_writes);
>> @@ -226,7 +227,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
>>   	 * extend local i_size without keeping userspace server in sync. So,
>>   	 * attr->size coming from server can be stale. We cannot trust it.
>>   	 */
>> -	if (!is_wb || !S_ISREG(inode->i_mode))
>> +	if (!is_wb || !S_ISREG(inode->i_mode) ||
>> +	    !atomic_read(&fi->num_openers) || fi->i_size_unstable)
>>   		i_size_write(inode, attr->size);
>>   	spin_unlock(&fc->lock);
>>   



More information about the Devel mailing list