[Devel] [PATCH rh7] memcg: allow to disable cleancache per memcg

Kirill Tkhai ktkhai at odin.com
Thu Oct 8 08:41:40 PDT 2015


Add CC

On 08.10.2015 18:39, Kirill Tkhai wrote:
> 
> 
> On 25.09.2015 16:10, Vladimir Davydov wrote:
>> Currently, it is only possible to disable tcache system-wide, but it
>> might be useful to disable it for a particular container while leaving
>> it enabled for others, e.g. in case the container is known to generate
>> tons of used-once page cache (backup).
>>
>> This patch introduces memory.disable_cleancache per memcg knob. Writing
>> 1 to it disables cleancache for the corresponding cgroup. Cleancache may
>> be re-enabled by writing 0 to it.
>>
>> https://jira.sw.ru/browse/PSBM-34163
>>
>> Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
>> ---
>>  include/linux/memcontrol.h |  6 +++++
>>  mm/cleancache.c            | 10 ++++++--
>>  mm/memcontrol.c            | 58 ++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index ac3f16f0ee28..85a3b2182c9d 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -117,6 +117,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
>>   */
>>  int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
>>  bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
>> +bool mem_cgroup_cleancache_disabled(struct page *page);
>>  int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
>>  unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
>>  void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
>> @@ -333,6 +334,11 @@ static inline bool mem_cgroup_low(struct mem_cgroup *root,
>>  	return false;
>>  }
>>  
>> +static inline bool mem_cgroup_cleancache_disabled(struct page *page)
>> +{
>> +	return false;
>> +}
>> +
>>  static inline unsigned long
>>  mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
>>  {
>> diff --git a/mm/cleancache.c b/mm/cleancache.c
>> index a3308bcaa116..16fc1d7c90ae 100644
>> --- a/mm/cleancache.c
>> +++ b/mm/cleancache.c
>> @@ -15,6 +15,7 @@
>>  #include <linux/fs.h>
>>  #include <linux/exportfs.h>
>>  #include <linux/mm.h>
>> +#include <linux/memcontrol.h>
>>  #include <linux/debugfs.h>
>>  #include <linux/cleancache.h>
>>  
>> @@ -227,8 +228,13 @@ void __cleancache_put_page(struct page *page)
>>  	pool_id = page->mapping->host->i_sb->cleancache_poolid;
>>  	if (pool_id >= 0 &&
>>  		cleancache_get_key(page->mapping->host, &key) >= 0) {
>> -		cleancache_ops->put_page(pool_id, key, page->index, page);
>> -		cleancache_puts++;
>> +		if (!mem_cgroup_cleancache_disabled(page)) {
>> +			cleancache_ops->put_page(pool_id, key,
>> +						 page->index, page);
>> +			cleancache_puts++;
>> +		} else
>> +			cleancache_ops->invalidate_page(pool_id, key,
>> +							page->index);
>>  	}
>>  }
>>  EXPORT_SYMBOL(__cleancache_put_page);
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 00cc66db5052..30782b37e2c2 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -326,6 +326,10 @@ struct mem_cgroup {
>>  	/* For oom notifier event fd */
>>  	struct list_head oom_notify;
>>  
>> +#ifdef CONFIG_CLEANCACHE
>> +	bool cleancache_disabled;
>> +#endif
> 

This is not a good place to put 1-byte variable. Above and below are
unsigned long variables, so this lose 7 byte just by design.
 
I advise you to put it together with any another bool variable.
 
>> +
>>  	/*
>>  	 * Should we move charges of a task when a task is moved into this
>>  	 * mem_cgroup ? And what type of charges should we move ?
>> @@ -1577,6 +1581,37 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
>>  	return true;
>>  }
>>  
>> +#ifdef CONFIG_CLEANCACHE
>> +bool mem_cgroup_cleancache_disabled(struct page *page)
>> +{
>> +	struct mem_cgroup *memcg = NULL;
>> +	struct page_cgroup *pc;
>> +	bool ret = false;
>> +
>> +	if (mem_cgroup_disabled())
>> +		goto out;
>> +
>> +	pc = lookup_page_cgroup(page);
>> +	if (!PageCgroupUsed(pc))
>> +		goto out;
>> +
>> +	lock_page_cgroup(pc);
>> +	if (!PageCgroupUsed(pc))
>> +		goto out_unlock;
>> +
>> +	for (memcg = pc->mem_cgroup; memcg; memcg = parent_mem_cgroup(memcg)) {
>> +		if (memcg->cleancache_disabled) {
>> +			ret = true;
>> +			break;
>> +		}
>> +	}
>> +out_unlock:
>> +	unlock_page_cgroup(pc);
>> +out:
>> +	return ret;
>> +}
> 
I really dislike the design. This code is likely case and it should be as lightweight
as possible. But the patch complicates it instead of making that on unlikely path.
 
Can't we in mem_cgroup_disable_cleancache_write() propagate mem_cgroup::cleancache value
to all of our children? Yeah, we will have to have a possibility to differ manually and
propagated values, but an implementation of a couple of flags instead of bool value should
not be difficult here.
> 
>> +#endif
>> +
>>  static bool __mem_cgroup_below_oom_guarantee(struct mem_cgroup *root,
>>  					     struct mem_cgroup *memcg)
>>  {
>> @@ -5292,6 +5327,21 @@ static int mem_cgroup_oom_guarantee_write(struct cgroup *cont,
>>  	return 0;
>>  }
>>  
>> +#ifdef CONFIG_CLEANCACHE
>> +static u64 mem_cgroup_disable_cleancache_read(struct cgroup *cgrp,
>> +					      struct cftype *cft)
>> +{
>> +	return mem_cgroup_from_cont(cgrp)->cleancache_disabled;
>> +}
>> +
>> +static int mem_cgroup_disable_cleancache_write(struct cgroup *cgrp,
>> +					       struct cftype *cft, u64 val)
>> +{
>> +	mem_cgroup_from_cont(cgrp)->cleancache_disabled = !!val;
>> +	return 0;
>> +}
>> +#endif
>> +
>>  static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
>>  		unsigned long long *mem_limit, unsigned long long *memsw_limit)
>>  {
>> @@ -6226,6 +6276,14 @@ static struct cftype mem_cgroup_files[] = {
>>  		.read_seq_string = memcg_numa_stat_show,
>>  	},
>>  #endif
>> +#ifdef CONFIG_CLEANCACHE
>> +	{
>> +		.name = "disable_cleancache",
>> +		.flags = CFTYPE_NOT_ON_ROOT,
>> +		.read_u64 = mem_cgroup_disable_cleancache_read,
>> +		.write_u64 = mem_cgroup_disable_cleancache_write,
>> +	},
>> +#endif
>>  #ifdef CONFIG_MEMCG_KMEM
>>  	{
>>  		.name = "kmem.limit_in_bytes",
>>
> 
Regards,
Kirill




More information about the Devel mailing list