[Devel] [RFC PATCH v2 1/1] selftests: cgroup: test page cache limiting feature

Dmitry Sepp dmitry.sepp at virtuozzo.com
Fri Nov 14 15:51:57 MSK 2025


The tests validate memory.cache file functionality in terms of limiting
the amount of page cache used and being able to correctly account it.

The test makes use of a supplementary program that maps the test file
and accesses it with a random pattern.

https://virtuozzo.atlassian.net/browse/VSTOR-112174

Signed-off-by: Dmitry Sepp <dmitry.sepp at virtuozzo.com>

Feature: mm: Memory cgroup page cache limit
Signed-off-by: Dmitry Sepp <dmitry.sepp at virtuozzo.com>
---
 tools/testing/selftests/cgroup/Makefile      |   4 +-
 tools/testing/selftests/cgroup/test_cache.sh | 255 +++++++++++++++++++
 tools/testing/selftests/cgroup/touch_pages.c |  97 +++++++
 3 files changed, 355 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/cgroup/test_cache.sh
 create mode 100644 tools/testing/selftests/cgroup/touch_pages.c

diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6..05c6f6785ff0 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -5,7 +5,8 @@ all: ${HELPER_PROGS}
 
 TEST_FILES     := with_stress.sh
 TEST_PROGS     := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh
-TEST_GEN_FILES := wait_inotify
+TEST_PROGS     += test_cache.sh
+TEST_GEN_FILES := wait_inotify touch_pages
 # Keep the lists lexicographically sorted
 TEST_GEN_PROGS  = test_core
 TEST_GEN_PROGS += test_cpu
@@ -32,3 +33,4 @@ $(OUTPUT)/test_kmem: cgroup_util.c
 $(OUTPUT)/test_memcontrol: cgroup_util.c
 $(OUTPUT)/test_pids: cgroup_util.c
 $(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/touch_pages: LDLIBS += -lz
diff --git a/tools/testing/selftests/cgroup/test_cache.sh b/tools/testing/selftests/cgroup/test_cache.sh
new file mode 100755
index 000000000000..1206cff17d78
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cache.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+#
+# Copyright (c) 2025 Virtuozzo International GmbH. All rights reserved.
+#
+# Output directory handling is derived from tools/testing/selftests/bpf/vmtest.sh
+#
+
+# shellcheck disable=SC2329
+
+skip_test() {
+	echo "$1"
+	echo "Test SKIPPED"
+	exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+cgroup2=$(grep cgroup2 /proc/mounts | cut -d ' ' -f 2)
+[[ ! -z "$cgroup2" ]] || skip_test "cgroup v2 isn't mounted"
+
+fstype=$(findmnt -n -o FSTYPE -T "$PWD")
+[[ "$fstype" != "tmpfs" ]] || skip_test "$PWD is on tmpfs"
+
+is_rel_path()
+{
+	local path="$1"
+
+	[[ ${path:0:1} != "/" ]]
+}
+
+out_dir="kselftest/cgroup"
+# Figure out where the kernel is being built.
+# O takes precedence over KBUILD_OUTPUT.
+if [[ "${O:=""}" != "" ]]; then
+	if is_rel_path "${O}"; then
+		O="$(realpath "${PWD}/${O}")"
+	fi
+	touch_pages="${O}/${out_dir}/touch_pages"
+elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
+	if is_rel_path "${KBUILD_OUTPUT}"; then
+		KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+	fi
+	touch_pages="${KBUILD_OUTPUT}/${out_dir}/touch_pages"
+else
+	touch_pages="./touch_pages"
+fi
+
+bin_file="./testfile.bin"
+group0_max=$((2**25))
+group1_max=$((2**22))
+ts=$(date +%s%N)
+group0="kselftest0-$ts"
+group1="kselftest1-$ts"
+
+dd if=/dev/urandom of="$bin_file" bs=1M count=1000 2>/dev/null
+sync
+
+mkdir -p "$cgroup2"/{"$group0","$group1"}
+
+cache_get_current() {
+	local group="$1"
+	cat "$cgroup2/$group/memory.cache.current"
+}
+
+cache_get_max() {
+	local group="$1"
+	cat "$cgroup2/$group/memory.cache.max"
+}
+
+cache_set_max() {
+	local group="$1"
+	local value="$2"
+	echo "$value" > "$cgroup2/$group/memory.cache.max"
+}
+
+group_set() {
+	local group="$1"
+	local proc="$2"
+	echo "$proc" > "$cgroup2/$group/cgroup.procs"
+}
+
+check_usage() {
+	local cache_curr
+	local deviation
+	local upper
+	local lower
+	local group="$1"
+	local caller=${FUNCNAME[1]}
+	cache_curr=$(cache_get_current "$group")
+	cache_max=$(cache_get_max "$group")
+	deviation=$((cache_max * 15 / 100))
+	upper=$((cache_max + deviation))
+	lower=$((cache_max - deviation))
+	if [[ $cache_curr -gt $upper || $cache_curr -lt $lower ]]; then
+		>&2 echo -n "$caller: Failed to limit cache usage: "
+		>&2 echo "current=$cache_curr max=$cache_max"
+		return 1
+	fi
+	return 0
+}
+
+check_accounting() {
+	local res
+	local cache_curr
+	local group="$1"
+	local caller=${FUNCNAME[1]}
+	cache_curr=$(cache_get_current "$group")
+	res=$(fincore -brno res "$bin_file")
+	if [[ $cache_curr -lt $res ]]; then
+		>&2 echo -n "$caller: Incorrect cache accounting: "
+		>&2 echo "cgroup=$cache_curr res=$res"
+		return 1
+	fi
+	return 0
+}
+
+limit_before_read() {
+	echo 3 > /proc/sys/vm/drop_caches
+	cache_set_max "$group0" "$group0_max"
+	(
+		local cache_curr
+		local res
+		local self=$BASHPID
+		group_set "$group0" "$self"
+
+		cat "$bin_file" > /dev/null
+		sleep 2
+		if ! check_usage "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		if ! check_accounting "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		group_set "" "$self"
+		exit 0
+	)
+	local result=$?
+	cache_set_max "$group0" "max"
+	return "$result"
+}
+
+limit_after_read() {
+	echo 3 > /proc/sys/vm/drop_caches
+	(
+		local cache_curr
+		local res
+		local self=$BASHPID
+		group_set "$group0" "$self"
+		cat "$bin_file" > /dev/null
+		sleep 2
+		if ! check_accounting "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		cache_set_max "$group0" "$group0_max"
+		if ! check_usage "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		group_set "" "$self"
+		exit 0
+	)
+	local result=$?
+	cache_set_max "$group0" "max"
+	return "$result"
+}
+
+cgroup_migrate() {
+	echo 3 > /proc/sys/vm/drop_caches
+	cache_set_max "$group0" "$group0_max"
+	cache_set_max "$group1" "$group1_max"
+	(
+		local cache_curr
+		local res
+		local self=$BASHPID
+		group_set "$group0" "$self"
+		cat "$bin_file" > /dev/null
+		sleep 2
+		group_set "$group1" "$self"
+		sleep 2
+		if ! check_usage "$group1"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		if ! check_accounting "$group1"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		group_set "" "$self"
+		exit 0
+	)
+	local result=$?
+	cache_set_max "$group0" "max"
+	return "$result"
+}
+
+numa_migrate() {
+	nodes=$(numactl -H | head -1 | cut -d ' ' -f 2)
+	if [[ $nodes -lt 2 ]]; then
+		>&2 echo "${FUNCNAME[0]}:At least 2 nodes are required, skipping!"
+		return 4
+	fi
+	cache_set_max "$group0" "$group0_max"
+	(
+		local self=$BASHPID
+		group_set "$group0" "$self"
+		numactl --membind=0 "$touch_pages" "$bin_file" &
+		pid=$!
+		sleep 1
+		migratepages "$pid" 0 1
+		sleep 1
+		if ! check_usage "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		if ! check_accounting "$group0"; then
+			group_set "" "$self"
+			exit 1
+		fi
+		group_set "" "$self"
+		exit 0
+	)
+	local result=$?
+	cache_set_max "$group0" "max"
+	return "$result"
+}
+
+cache_tests=(
+	limit_before_read
+	limit_after_read
+	cgroup_migrate
+	numa_migrate
+)
+
+result=0
+for test in "${cache_tests[@]}"; do
+	$test
+	test_result=$?
+	if [[ $test_result -eq 0 ]]; then
+		echo -n "ok "
+	elif [[ $test_result -eq 4 ]]; then
+		echo -n "skip "
+	else
+		echo -n "not ok "
+		result=1
+	fi
+	echo "$test"
+done
+
+rmdir "$cgroup2"/{"$group0","$group1"}
+rm -rf "$tmp_dir"
+
+exit $result
diff --git a/tools/testing/selftests/cgroup/touch_pages.c b/tools/testing/selftests/cgroup/touch_pages.c
new file mode 100644
index 000000000000..05bd5427ba1c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/touch_pages.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2025 Virtuozzo International GmbH. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <zlib.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/prctl.h>
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	int ret;
+	void *mmap_ptr;
+	volatile uint8_t *ptr;
+	struct stat st;
+	uint32_t crc;
+	size_t file_size;
+	size_t data_size;
+	uint32_t *crc_ptr;
+	bool op_is_store;
+	char *file_path;
+
+	if (argc < 2) {
+		fprintf(stderr, "no input file provided\n");
+		return -1;
+	}
+
+	file_path = argv[1];
+	fd = open(file_path, O_RDWR);
+	if (fd < 0) {
+		perror("failed to open input file");
+		return -1;
+	}
+
+	ret = fstat(fd, &st);
+	if (ret < 0) {
+		perror("failed to stat input file");
+		goto err_close;
+	}
+	file_size = st.st_size;
+
+	mmap_ptr = mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (mmap_ptr == MAP_FAILED) {
+		perror("failed to map input file");
+		ret = -1;
+		goto err_close;
+	}
+
+	ret = prctl(PR_SET_PDEATHSIG, SIGTERM);
+	if (ret < 0) {
+		perror("failed to set the parent-death signal");
+		goto err_unmap;
+	}
+
+	srand(getpid());
+
+	data_size = file_size - sizeof(uint32_t);
+	crc_ptr = mmap_ptr + data_size;
+
+	crc = crc32_z(0, mmap_ptr, data_size);
+	*crc_ptr = crc;
+	while (1) {
+		crc = crc32_z(0, mmap_ptr, data_size);
+		if (crc != *crc_ptr) {
+			fprintf(stderr, "crc32 mismatch: calc=%08x read=%08x\n",
+				crc, *crc_ptr);
+			ret = -1;
+			goto err_unmap;
+		}
+
+		ptr = mmap_ptr + rand() % data_size;
+		op_is_store = !!(rand() % 2);
+		if (op_is_store) {
+			*ptr = rand() % UINT8_MAX;
+			crc = crc32_z(0, mmap_ptr, data_size);
+			*crc_ptr = crc;
+		} else {
+			*ptr;
+		}
+	}
+
+err_unmap:
+	munmap(mmap_ptr, file_size);
+err_close:
+	close(fd);
+
+	return ret;
+}
-- 
2.51.0



More information about the Devel mailing list