[Devel] [PATCH RHEL v2] mm: Reduce access frequency to shrinker_rwsem during shrink_slab
Vasily Averin
vvs at virtuozzo.com
Fri Aug 21 10:15:51 MSK 2020
Andrey,
I'm waiting for your approval here.
On 8/20/20 5:51 PM, Valeriy Vdovin wrote:
> Bug https://jira.sw.ru/browse/PSBM-99181 has introduced a problem: when
> the kernel has opened NFS delegations and NFS server is not accessible
> at the time when NFS shrinker is called, the whole shrinker list
> execution gets stuck until NFS server is back. Being a problem in itself
> it also introduces bigger problem - during that hang, the shrinker_rwsem
> also gets locked, consequently no new mounts can be done at that time
> because new superblock tries to register it's own shrinker and also gets
> stuck at aquiring shrinker_rwsem.
>
> Commit 9e9e35d050955648449498827deb2d43be0564e1 is a workaround for that
> problem. It is known that during signle shrinker execution we do not
> actually need to hold shrinker_rwsem so we release and reacqiure the
> rwsem for each shrinker in the list.
>
> Because of this workaround shrink_slab function now experiences a major
> slowdown, because shrinker_rwsem gets accessed for each shrinker in the
> list twice. On an idle fresh-booted system shrinker_list could be
> iterated up to 1600 times a second, although originally the problem was
> local to only one NFS shrinker.
>
> This patch fixes commit 9e9e35d050955648449498827deb2d43be0564e1 in a
> way that before calling for up_read for shrinker_rwsem, we check that
> this is really an NFS shrinker by checking NFS magic in superblock, if
> it is accessible from shrinker.
>
> https://jira.sw.ru/browse/PSBM-99181
>
> Co-authored-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
>
> Changes:
> v2: Added missing 'rwsem_is_contented' check
> ---
> fs/super.c | 2 +-
> mm/vmscan.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++---------------
> 2 files changed, 50 insertions(+), 17 deletions(-)
>
> diff --git a/fs/super.c b/fs/super.c
> index f131d14..1cf377a 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -80,7 +80,7 @@ EXPORT_SYMBOL(dcache_is_low);
> * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
> * take a passive reference to the superblock to avoid this from occurring.
> */
> -static unsigned long super_cache_scan(struct shrinker *shrink,
> +unsigned long super_cache_scan(struct shrinker *shrink,
> struct shrink_control *sc)
> {
> struct super_block *sb;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index d7082d2..4fa86e7 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -453,6 +453,20 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> return freed;
> }
>
> +unsigned long super_cache_scan(struct shrinker *shrink,
> + struct shrink_control *sc);
> +
> +static inline bool is_nfs_shrinker(struct shrinker *shrinker)
> +{
> + struct super_block *sb = container_of(shrinker,
> + struct super_block, s_shrink);
> +
> + if (shrinker->scan_objects == &super_cache_scan)
> + return sb->s_magic == NFS_SUPER_MAGIC;
> +
> + return false;
> +}
> +
> struct shrinker *get_shrinker(struct shrinker *shrinker)
> {
> /*
> @@ -511,6 +525,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> .memcg = memcg,
> };
> struct shrinker *shrinker;
> + bool is_nfs;
>
> shrinker = idr_find(&shrinker_idr, i);
> if (unlikely(!shrinker)) {
> @@ -518,6 +533,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> continue;
> }
>
> + is_nfs = is_nfs_shrinker(shrinker);
> +
> /*
> * Take a refcnt on a shrinker so that it can't be freed or
> * removed from shrinker_idr (and shrinker_list). These way we
> @@ -527,10 +544,16 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> * take too much time to finish (e.g. on nfs). And holding
> * global shrinker_rwsem can block registring and unregistring
> * of shrinkers.
> + *
> + * The up_read logic should only be executed for nfs shrinker
> + * path, because it has proven to hang. For others it should be
> + * skipped to reduce performance penalties.
> */
> - if(!get_shrinker(shrinker))
> - continue;
> - up_read(&shrinker_rwsem);
> + if (is_nfs) {
> + if (!get_shrinker(shrinker))
> + continue;
> + up_read(&shrinker_rwsem);
> + }
>
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY) {
> @@ -565,14 +588,18 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> * memcg_expand_one_shrinker_map if new shrinkers
> * were registred in the meanwhile.
> */
> - if (!down_read_trylock(&shrinker_rwsem)) {
> - freed = freed ? : 1;
> + if (is_nfs) {
> + if (!down_read_trylock(&shrinker_rwsem)) {
> + freed = freed ? : 1;
> + put_shrinker(shrinker);
> + return freed;
> + }
> put_shrinker(shrinker);
> - return freed;
> + map = memcg_nid_shrinker_map(memcg, nid);
> + nr_max = min(shrinker_nr_max, map->nr_max);
> + } else if (rwsem_is_contended(&shrinker_rwsem)) {
> + break;
> }
> - put_shrinker(shrinker);
> - map = memcg_nid_shrinker_map(memcg, nid);
> - nr_max = min(shrinker_nr_max, map->nr_max);
> }
> unlock:
> up_read(&shrinker_rwsem);
> @@ -648,13 +675,17 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> .for_drop_caches = for_drop_caches,
> };
>
> + bool is_nfs = is_nfs_shrinker(shrinker);
> +
> if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
> sc.nid = 0;
>
> /* See comment in shrink_slab_memcg. */
> - if(!get_shrinker(shrinker))
> - continue;
> - up_read(&shrinker_rwsem);
> + if (is_nfs) {
> + if (!get_shrinker(shrinker))
> + continue;
> + up_read(&shrinker_rwsem);
> + }
>
> ret = do_shrink_slab(&sc, shrinker, priority);
> if (ret == SHRINK_EMPTY)
> @@ -665,12 +696,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> * We can safely continue traverse of the shrinker_list as
> * our shrinker is still on the list due to refcount.
> */
> - if (!down_read_trylock(&shrinker_rwsem)) {
> - freed = freed ? : 1;
> + if (is_nfs) {
> + if (!down_read_trylock(&shrinker_rwsem)) {
> + freed = freed ? : 1;
> + put_shrinker(shrinker);
> + goto out;
> + }
> put_shrinker(shrinker);
> - goto out;
> }
> - put_shrinker(shrinker);
> }
>
> up_read(&shrinker_rwsem);
>
More information about the Devel
mailing list