[Devel] [RFC PATCH v2 1/1] selftests: cgroup: test page cache limiting feature
Dmitry Sepp
dmitry.sepp at virtuozzo.com
Fri Nov 14 15:51:57 MSK 2025
The tests validate memory.cache file functionality in terms of limiting
the amount of page cache used and being able to correctly account it.
The test makes use of a supplementary program that maps the test file
and accesses it with a random pattern.
https://virtuozzo.atlassian.net/browse/VSTOR-112174
Signed-off-by: Dmitry Sepp <dmitry.sepp at virtuozzo.com>
Feature: mm: Memory cgroup page cache limit
Signed-off-by: Dmitry Sepp <dmitry.sepp at virtuozzo.com>
---
tools/testing/selftests/cgroup/Makefile | 4 +-
tools/testing/selftests/cgroup/test_cache.sh | 255 +++++++++++++++++++
tools/testing/selftests/cgroup/touch_pages.c | 97 +++++++
3 files changed, 355 insertions(+), 1 deletion(-)
create mode 100755 tools/testing/selftests/cgroup/test_cache.sh
create mode 100644 tools/testing/selftests/cgroup/touch_pages.c
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6..05c6f6785ff0 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -5,7 +5,8 @@ all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh
-TEST_GEN_FILES := wait_inotify
+TEST_PROGS += test_cache.sh
+TEST_GEN_FILES := wait_inotify touch_pages
# Keep the lists lexicographically sorted
TEST_GEN_PROGS = test_core
TEST_GEN_PROGS += test_cpu
@@ -32,3 +33,4 @@ $(OUTPUT)/test_kmem: cgroup_util.c
$(OUTPUT)/test_memcontrol: cgroup_util.c
$(OUTPUT)/test_pids: cgroup_util.c
$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/touch_pages: LDLIBS += -lz
diff --git a/tools/testing/selftests/cgroup/test_cache.sh b/tools/testing/selftests/cgroup/test_cache.sh
new file mode 100755
index 000000000000..1206cff17d78
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cache.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+#
+# Copyright (c) 2025 Virtuozzo International GmbH. All rights reserved.
+#
+# Output directory handling is derived from tools/testing/selftests/bpf/vmtest.sh
+#
+
+# shellcheck disable=SC2329
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+cgroup2=$(grep cgroup2 /proc/mounts | cut -d ' ' -f 2)
+[[ ! -z "$cgroup2" ]] || skip_test "cgroup v2 isn't mounted"
+
+fstype=$(findmnt -n -o FSTYPE -T "$PWD")
+[[ "$fstype" != "tmpfs" ]] || skip_test "$PWD is on tmpfs"
+
+is_rel_path()
+{
+ local path="$1"
+
+ [[ ${path:0:1} != "/" ]]
+}
+
+out_dir="kselftest/cgroup"
+# Figure out where the kernel is being built.
+# O takes precedence over KBUILD_OUTPUT.
+if [[ "${O:=""}" != "" ]]; then
+ if is_rel_path "${O}"; then
+ O="$(realpath "${PWD}/${O}")"
+ fi
+ touch_pages="${O}/${out_dir}/touch_pages"
+elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
+ if is_rel_path "${KBUILD_OUTPUT}"; then
+ KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+ fi
+ touch_pages="${KBUILD_OUTPUT}/${out_dir}/touch_pages"
+else
+ touch_pages="./touch_pages"
+fi
+
+bin_file="./testfile.bin"
+group0_max=$((2**25))
+group1_max=$((2**22))
+ts=$(date +%s%N)
+group0="kselftest0-$ts"
+group1="kselftest1-$ts"
+
+dd if=/dev/urandom of="$bin_file" bs=1M count=1000 2>/dev/null
+sync
+
+mkdir -p "$cgroup2"/{"$group0","$group1"}
+
+cache_get_current() {
+ local group="$1"
+ cat "$cgroup2/$group/memory.cache.current"
+}
+
+cache_get_max() {
+ local group="$1"
+ cat "$cgroup2/$group/memory.cache.max"
+}
+
+cache_set_max() {
+ local group="$1"
+ local value="$2"
+ echo "$value" > "$cgroup2/$group/memory.cache.max"
+}
+
+group_set() {
+ local group="$1"
+ local proc="$2"
+ echo "$proc" > "$cgroup2/$group/cgroup.procs"
+}
+
+check_usage() {
+ local cache_curr
+ local deviation
+ local upper
+ local lower
+ local group="$1"
+ local caller=${FUNCNAME[1]}
+ cache_curr=$(cache_get_current "$group")
+ cache_max=$(cache_get_max "$group")
+ deviation=$((cache_max * 15 / 100))
+ upper=$((cache_max + deviation))
+ lower=$((cache_max - deviation))
+ if [[ $cache_curr -gt $upper || $cache_curr -lt $lower ]]; then
+ >&2 echo -n "$caller: Failed to limit cache usage: "
+ >&2 echo "current=$cache_curr max=$cache_max"
+ return 1
+ fi
+ return 0
+}
+
+check_accounting() {
+ local res
+ local cache_curr
+ local group="$1"
+ local caller=${FUNCNAME[1]}
+ cache_curr=$(cache_get_current "$group")
+ res=$(fincore -brno res "$bin_file")
+ if [[ $cache_curr -lt $res ]]; then
+ >&2 echo -n "$caller: Incorrect cache accounting: "
+ >&2 echo "cgroup=$cache_curr res=$res"
+ return 1
+ fi
+ return 0
+}
+
+limit_before_read() {
+ echo 3 > /proc/sys/vm/drop_caches
+ cache_set_max "$group0" "$group0_max"
+ (
+ local cache_curr
+ local res
+ local self=$BASHPID
+ group_set "$group0" "$self"
+
+ cat "$bin_file" > /dev/null
+ sleep 2
+ if ! check_usage "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ if ! check_accounting "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ group_set "" "$self"
+ exit 0
+ )
+ local result=$?
+ cache_set_max "$group0" "max"
+ return "$result"
+}
+
+limit_after_read() {
+ echo 3 > /proc/sys/vm/drop_caches
+ (
+ local cache_curr
+ local res
+ local self=$BASHPID
+ group_set "$group0" "$self"
+ cat "$bin_file" > /dev/null
+ sleep 2
+ if ! check_accounting "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ cache_set_max "$group0" "$group0_max"
+ if ! check_usage "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ group_set "" "$self"
+ exit 0
+ )
+ local result=$?
+ cache_set_max "$group0" "max"
+ return "$result"
+}
+
+cgroup_migrate() {
+ echo 3 > /proc/sys/vm/drop_caches
+ cache_set_max "$group0" "$group0_max"
+ cache_set_max "$group1" "$group1_max"
+ (
+ local cache_curr
+ local res
+ local self=$BASHPID
+ group_set "$group0" "$self"
+ cat "$bin_file" > /dev/null
+ sleep 2
+ group_set "$group1" "$self"
+ sleep 2
+ if ! check_usage "$group1"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ if ! check_accounting "$group1"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ group_set "" "$self"
+ exit 0
+ )
+ local result=$?
+ cache_set_max "$group0" "max"
+ return "$result"
+}
+
+numa_migrate() {
+ nodes=$(numactl -H | head -1 | cut -d ' ' -f 2)
+ if [[ $nodes -lt 2 ]]; then
+ >&2 echo "${FUNCNAME[0]}:At least 2 nodes are required, skipping!"
+ return 4
+ fi
+ cache_set_max "$group0" "$group0_max"
+ (
+ local self=$BASHPID
+ group_set "$group0" "$self"
+ numactl --membind=0 "$touch_pages" "$bin_file" &
+ pid=$!
+ sleep 1
+ migratepages "$pid" 0 1
+ sleep 1
+ if ! check_usage "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ if ! check_accounting "$group0"; then
+ group_set "" "$self"
+ exit 1
+ fi
+ group_set "" "$self"
+ exit 0
+ )
+ local result=$?
+ cache_set_max "$group0" "max"
+ return "$result"
+}
+
+cache_tests=(
+ limit_before_read
+ limit_after_read
+ cgroup_migrate
+ numa_migrate
+)
+
+result=0
+for test in "${cache_tests[@]}"; do
+ $test
+ test_result=$?
+ if [[ $test_result -eq 0 ]]; then
+ echo -n "ok "
+ elif [[ $test_result -eq 4 ]]; then
+ echo -n "skip "
+ else
+ echo -n "not ok "
+ result=1
+ fi
+ echo "$test"
+done
+
+rmdir "$cgroup2"/{"$group0","$group1"}
+rm -rf "$tmp_dir"
+
+exit $result
diff --git a/tools/testing/selftests/cgroup/touch_pages.c b/tools/testing/selftests/cgroup/touch_pages.c
new file mode 100644
index 000000000000..05bd5427ba1c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/touch_pages.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2025 Virtuozzo International GmbH. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <zlib.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/prctl.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ int ret;
+ void *mmap_ptr;
+ volatile uint8_t *ptr;
+ struct stat st;
+ uint32_t crc;
+ size_t file_size;
+ size_t data_size;
+ uint32_t *crc_ptr;
+ bool op_is_store;
+ char *file_path;
+
+ if (argc < 2) {
+ fprintf(stderr, "no input file provided\n");
+ return -1;
+ }
+
+ file_path = argv[1];
+ fd = open(file_path, O_RDWR);
+ if (fd < 0) {
+ perror("failed to open input file");
+ return -1;
+ }
+
+ ret = fstat(fd, &st);
+ if (ret < 0) {
+ perror("failed to stat input file");
+ goto err_close;
+ }
+ file_size = st.st_size;
+
+ mmap_ptr = mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (mmap_ptr == MAP_FAILED) {
+ perror("failed to map input file");
+ ret = -1;
+ goto err_close;
+ }
+
+ ret = prctl(PR_SET_PDEATHSIG, SIGTERM);
+ if (ret < 0) {
+ perror("failed to set the parent-death signal");
+ goto err_unmap;
+ }
+
+ srand(getpid());
+
+ data_size = file_size - sizeof(uint32_t);
+ crc_ptr = mmap_ptr + data_size;
+
+ crc = crc32_z(0, mmap_ptr, data_size);
+ *crc_ptr = crc;
+ while (1) {
+ crc = crc32_z(0, mmap_ptr, data_size);
+ if (crc != *crc_ptr) {
+ fprintf(stderr, "crc32 mismatch: calc=%08x read=%08x\n",
+ crc, *crc_ptr);
+ ret = -1;
+ goto err_unmap;
+ }
+
+ ptr = mmap_ptr + rand() % data_size;
+ op_is_store = !!(rand() % 2);
+ if (op_is_store) {
+ *ptr = rand() % UINT8_MAX;
+ crc = crc32_z(0, mmap_ptr, data_size);
+ *crc_ptr = crc;
+ } else {
+ *ptr;
+ }
+ }
+
+err_unmap:
+ munmap(mmap_ptr, file_size);
+err_close:
+ close(fd);
+
+ return ret;
+}
--
2.51.0
More information about the Devel
mailing list