[Devel] [PATCH VZ10 6/6] ve: Introduce per-VE failcount

Tue Jun 30 20:13:32 MSK 2026

It may be useful to have a history of resource limit hits for every VE,
this may simplify debugging and provide some information about the
resources usage.

This information is provided by ve.failcount file, any write to it
resets all failcounts.

To add a new failcounter we need to create a new atomic_t field
name_failcount in ve structure and add a new VE_FC_ENTRY in
ve_failcounts array.

One change, unrelated to failcounts: aio fields are now initialized
in ve0.

https://virtuozzo.atlassian.net/browse/VSTOR-135520

Feature: per-ve failcounters
Signed-off-by: Vladimir Riabchun <vladimir.riabchun at virtuozzo.com>
---
 fs/aio.c                 |  1 +
 fs/namespace.c           |  2 ++
 include/linux/ve.h       |  6 ++++
 kernel/bpf/syscall.c     |  1 +
 kernel/ve/ve.c           | 65 ++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c           |  2 ++
 net/core/neighbour.c     |  1 +
 net/core/net_namespace.c |  4 ++-
 8 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/fs/aio.c b/fs/aio.c
index cb63416af135..3fa07cc626f8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -814,6 +814,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	spin_lock(&ve->aio_nr_lock);
 	if (ve->aio_nr + ctx->max_reqs > ve->aio_max_nr ||
 	    ve->aio_nr + ctx->max_reqs < ve->aio_nr) {
+		atomic_inc(&ve->aio_failcount);
 		spin_unlock(&ve->aio_nr_lock);
 		err = -EAGAIN;
 		goto err_ctx;
diff --git a/fs/namespace.c b/fs/namespace.c
index 32452898aadb..7f569c03c1ca 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3274,6 +3274,8 @@ static inline int ve_try_reserve_mount(struct ve_struct *ve)
 		  atomic_dec_if_positive(&ve->mnt_avail_nr) >= 0;
 	if (ret)
 		get_ve(ve);
+	else
+		atomic_inc(&ve->mnt_failcount);
 	return ret;
 }
 
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 5a5413107eef..1f8b6fd0c289 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -71,12 +71,15 @@ struct ve_struct {
 	struct kmapset_key	sysfs_perms_key;
 
 	atomic_t		netns_avail_nr;
+	atomic_t		netns_failcount;
 	int			netns_max_nr;
 
 	atomic_t		netif_avail_nr;
+	atomic_t		netif_failcount;
 	int			netif_max_nr;
 
 	atomic_t		bpf_prog_avail_nr;
+	atomic_t		bpf_prog_failcount;
 	int			bpf_prog_max_nr;
 
 	atomic64_t		_uevent_seqnum;
@@ -85,6 +88,7 @@ struct ve_struct {
 
 	atomic_t		arp_neigh_nr;
 	atomic_t		nd_neigh_nr;
+	atomic_t		neigh_tbl_failcount;
 	unsigned long		meminfo_val;
 
 	/*
@@ -93,6 +97,7 @@ struct ve_struct {
 	 * other containers.
 	 */
 	atomic_t		mnt_avail_nr; /* number of present VE mounts */
+	atomic_t		mnt_failcount;
 	int			mnt_max_nr;
 
 #ifdef CONFIG_COREDUMP
@@ -120,6 +125,7 @@ struct ve_struct {
 	spinlock_t		aio_nr_lock;
 	unsigned long		aio_nr;
 	unsigned long		aio_max_nr;
+	atomic_t		aio_failcount;
 #endif
 	struct vfsmount		*devtmpfs_mnt;
 };
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ff2a51c59f04..95e806fa19f4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2891,6 +2891,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	if (!bpf_cap && type == BPF_PROG_TYPE_CGROUP_DEVICE) {
 		load_ve = get_exec_env();
 		if (atomic_dec_if_positive(&load_ve->bpf_prog_avail_nr) < 0) {
+			atomic_inc(&load_ve->bpf_prog_failcount);
 			load_ve = NULL;
 			err = -ENOSPC;
 			goto put_token;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index ba18141cce9d..ac953c553908 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -96,10 +96,13 @@ struct ve_struct ve0 = {
 	.features		= -1,
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netns_failcount	= ATOMIC_INIT(0),
 	.netns_max_nr		= INT_MAX,
 	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netif_failcount	= ATOMIC_INIT(0),
 	.netif_max_nr		= INT_MAX,
 	.bpf_prog_avail_nr	= ATOMIC_INIT(INT_MAX),
+	.bpf_prog_failcount	= ATOMIC_INIT(0),
 	.bpf_prog_max_nr	= INT_MAX,
 	.fsync_enable		= FSYNC_FILTERED,
 	._randomize_va_space	=
@@ -111,8 +114,16 @@ struct ve_struct ve0 = {
 
 	.arp_neigh_nr		= ATOMIC_INIT(0),
 	.nd_neigh_nr		= ATOMIC_INIT(0),
+	.neigh_tbl_failcount	= ATOMIC_INIT(0),
 	.mnt_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.mnt_max_nr		= INT_MAX,
+	.mnt_failcount		= ATOMIC_INIT(0),
+#ifdef CONFIG_AIO
+	.aio_nr_lock		= __SPIN_LOCK_UNLOCKED(aio_nr_lock),
+	.aio_nr			= 0,
+	.aio_max_nr		= AIO_MAX_NR_DEFAULT,
+	.aio_failcount		= ATOMIC_INIT(0),
+#endif
 	.meminfo_val		= VE_MEMINFO_SYSTEM,
 	.umh_running_helpers	= ATOMIC_INIT(0),
 	.umh_helpers_waitq	= __WAIT_QUEUE_HEAD_INITIALIZER(ve0.umh_helpers_waitq),
@@ -777,12 +788,15 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	ve->fsync_enable = FSYNC_FILTERED;
 
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
+	atomic_set(&ve->netns_failcount, 0);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 
 	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
+	atomic_set(&ve->netif_failcount, 0);
 	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
 
 	atomic_set(&ve->bpf_prog_avail_nr, BPF_PROG_MAX_NR_DEFAULT);
+	atomic_set(&ve->bpf_prog_failcount, 0);
 	ve->bpf_prog_max_nr = BPF_PROG_MAX_NR_DEFAULT;
 
 	err = ve_log_init(ve);
@@ -808,7 +822,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 
 	atomic_set(&ve->arp_neigh_nr, 0);
 	atomic_set(&ve->nd_neigh_nr, 0);
+	atomic_set(&ve->neigh_tbl_failcount, 0);
 	ve->mnt_max_nr = MNT_MAX_NR_DEFAULT;
+	atomic_set(&ve->mnt_failcount, 0);
 	atomic_set(&ve->mnt_avail_nr, MNT_MAX_NR_DEFAULT);
 
 #ifdef CONFIG_COREDUMP
@@ -821,6 +837,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	spin_lock_init(&ve->aio_nr_lock);
 	ve->aio_nr = 0;
 	ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
+	atomic_set(&ve->aio_failcount, 0);
 #endif
 
 	return &ve->css;
@@ -1059,6 +1076,48 @@ VE_RESOURCE(mnt);
 VE_RESOURCE(netif);
 VE_RESOURCE(bpf_prog);
 
+struct ve_failcount_entry {
+	const char *name;
+	size_t offset;
+} ve_failcounts[] = {
+#define VE_FC_ENTRY(name) { #name, offsetof(struct ve_struct, name##_failcount) }
+	VE_FC_ENTRY(netns),
+	VE_FC_ENTRY(mnt),
+	VE_FC_ENTRY(netif),
+	VE_FC_ENTRY(bpf_prog),
+	VE_FC_ENTRY(neigh_tbl),
+	VE_FC_ENTRY(aio),
+	{}
+};
+
+static int ve_failcount_read(struct seq_file *sf, void *v)
+{
+	struct ve_struct *ve = css_to_ve(seq_css(sf));
+	struct ve_failcount_entry *entry;
+	atomic_t *fc;
+
+	for (entry = ve_failcounts; entry->name; entry++) {
+		fc = (void *)ve + entry->offset;
+		seq_printf(sf, "%s: %d\n", entry->name, atomic_read(fc));
+	}
+	return 0;
+}
+
+static ssize_t ve_failcount_write(struct kernfs_open_file *of, char *buf,
+				  size_t nbytes, loff_t off)
+{
+	struct ve_struct *ve = css_to_ve(of_css(of));
+	struct ve_failcount_entry *entry;
+	atomic_t *fc;
+
+	for (entry = ve_failcounts; entry->name; entry++) {
+		fc = (void *)ve + entry->offset;
+		atomic_set(fc, 0);
+	}
+
+	return nbytes;
+}
+
 static int ve_os_release_read(struct seq_file *sf, void *v)
 {
 	struct cgroup_subsys_state *css = seq_css(sf);
@@ -1596,6 +1655,12 @@ static struct cftype ve_cftypes[] = {
 		.flags			= CFTYPE_NOT_ON_ROOT,
 		.write_u64		= ve_rpc_kill_write,
 	},
+	{
+		.name			= "failcount",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.seq_show		= ve_failcount_read,
+		.write			= ve_failcount_write,
+	},
 	{ }
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index f198fc3a2632..bea49da4fd02 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10992,6 +10992,7 @@ int register_netdevice(struct net_device *dev)
 
 	ret = -ENOMEM;
 	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		atomic_inc(&net->owner_ve->netif_failcount);
 		ve_pr_warn_ratelimited(VE_LOG_BOTH,
 			"CT%s: hits max number of network devices, "
 			"increase ve::netif_max_nr parameter\n",
@@ -12206,6 +12207,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 
 	err = -ENOMEM;
 	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		atomic_inc(&net->owner_ve->netif_failcount);
 		ve_pr_warn_ratelimited(VE_LOG_BOTH,
 			"CT%s: hits max number of network devices, "
 			"increase ve::netif_max_nr parameter\n",
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f90deb17fb25..57a49d9c98a7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -520,6 +520,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 	    (glob_entries >= READ_ONCE(tbl->gc_thresh2) &&
 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
 		if (!neigh_forced_gc(tbl, ve) && entries >= gc_thresh3) {
+			atomic_inc(&ve->neigh_tbl_failcount);
 			net_info_ratelimited("%s: neighbor table overflow!\n",
 					     tbl->id);
 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b3d54cad984a..9a3376d2682f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -486,8 +486,10 @@ void net_drop_ns(void *p)
 #ifdef CONFIG_VE
 static int dec_netns_avail(struct ve_struct *ve)
 {
-	if (atomic_dec_if_positive(&ve->netns_avail_nr) < 0)
+	if (atomic_dec_if_positive(&ve->netns_avail_nr) < 0) {
+		atomic_inc(&ve->netns_failcount);
 		return -ENOSPC;
+	}
 	return 0;
 }
 
-- 
2.47.1