[Devel] [PATCH RHEL7 COMMIT] bc: implement ubc "numproc" limit based on pids cgroup
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Aug 13 19:13:27 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-862.9.1.vz7.70.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.9.1.vz7.70.3
------>
commit 5d040bf27681ff132b0027a5406c8bd2a87bc63a
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date: Mon Aug 13 19:13:27 2018 +0300
bc: implement ubc "numproc" limit based on pids cgroup
Wire pids cgroup into Container's ub.
All real work on accounting and limiting of threads (pids) is done by
pids cgroup, appropriate counters are just synced into beancounter on
/proc/user_beancounters or /proc/bc/$CTID/resources read.
pids cgroup lacks the fail counter, so add it.
Note: vzctl must configure
/sys/fs/cgroup/beancounter/$CTID/beancounter.pids with the path to
Container's pids cgroup like it does for beancounter.{memory,blkio}.
Usually the path looks like "/machine.slice/$CTID".
https://jira.sw.ru/browse/PSBM-86044
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
======================================================
Patchset description:
bc: rework "numproc" ubc limit to be based on pids cgroup
It's time to rework our home brew ubc "numproc" limit to be based on stock pids
cgroup.
Note: the semantic will change a bit: now we limit number of possible threads
in a Container while old ubc "numproc" limited number of tasks.
Note1: vzctl update is required.
https://jira.sw.ru/browse/PSBM-86044
Konstantin Khorenko (2):
bc: roll back beancounters-based numer of processes accounting
bc: implement ubc "numproc" limit based on pids cgroup
---
include/bc/beancounter.h | 10 ++++++++++
kernel/bc/beancounter.c | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/bc/proc.c | 2 ++
kernel/cgroup_pids.c | 38 ++++++++++++++++++++++++++++++++++++++
4 files changed, 91 insertions(+)
diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h
index 8a371f20736b..d4ac3259930a 100644
--- a/include/bc/beancounter.h
+++ b/include/bc/beancounter.h
@@ -82,6 +82,7 @@ struct ub_percpu_struct {
enum {
UB_MEM_CGROUP,
UB_BLKIO_CGROUP,
+ UB_PIDS_CGROUP,
NR_UB_BOUND_CGROUPS,
};
@@ -145,6 +146,12 @@ ub_get_blkio_css(struct user_beancounter *ub)
return __ub_get_css(ub, UB_BLKIO_CGROUP);
}
+static inline struct cgroup_subsys_state *
+ub_get_pids_css(struct user_beancounter *ub)
+{
+ return __ub_get_css(ub, UB_PIDS_CGROUP);
+}
+
static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
{
return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
@@ -288,6 +295,9 @@ extern int ub_update_memcg(struct user_beancounter *ub);
extern void ub_sync_memcg(struct user_beancounter *ub);
extern unsigned long ub_total_pages(struct user_beancounter *ub, bool swap);
+extern void ub_sync_pids(struct user_beancounter *ub);
+extern struct pids_cgroup *pids_cgroup_from_cont(struct cgroup *cont);
+
extern const char *ub_rnames[];
/*
* Put a beancounter reference
diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
index 31e5904df708..4c6d1b7f5f25 100644
--- a/kernel/bc/beancounter.c
+++ b/kernel/bc/beancounter.c
@@ -89,6 +89,7 @@ static struct vfsmount *ub_bound_cgroup_mnt[NR_UB_BOUND_CGROUPS];
#define mem_cgroup_mnt (ub_bound_cgroup_mnt[UB_MEM_CGROUP])
#define blkio_cgroup_mnt (ub_bound_cgroup_mnt[UB_BLKIO_CGROUP])
+#define pids_cgroup_mnt (ub_bound_cgroup_mnt[UB_PIDS_CGROUP])
static void __ub_set_css(struct user_beancounter *ub, int idx,
struct cgroup_subsys_state *css)
@@ -154,6 +155,12 @@ static void ub_set_blkio_css(struct user_beancounter *ub,
__ub_set_css(ub, UB_BLKIO_CGROUP, css);
}
+static void ub_set_pids_css(struct user_beancounter *ub,
+ struct cgroup_subsys_state *css)
+{
+ __ub_set_css(ub, UB_PIDS_CGROUP, css);
+}
+
/*
* Used to attach a task to a beancounter in the legacy API.
*/
@@ -199,6 +206,8 @@ extern void mem_cgroup_get_nr_pages(struct mem_cgroup *memcg, int nid,
unsigned long *pages);
extern unsigned long mem_cgroup_total_pages(struct mem_cgroup *memcg,
bool swap);
+extern void pids_cgroup_sync_beancounter(struct pids_cgroup *pids,
+ struct user_beancounter *ub);
/*
* Update memcg limits according to beancounter configuration.
@@ -215,6 +224,18 @@ int ub_update_memcg(struct user_beancounter *ub)
return ret;
}
+/*
+ * Synchronize pids cgroup stats with beancounter.
+ */
+void ub_sync_pids(struct user_beancounter *ub)
+{
+ struct cgroup_subsys_state *css;
+
+ css = ub_get_pids_css(ub);
+ pids_cgroup_sync_beancounter(pids_cgroup_from_cont(css->cgroup), ub);
+ css_put(css);
+}
+
/*
* Synchronize memcg stats with beancounter.
*/
@@ -644,6 +665,10 @@ static int ub_cgroup_write(struct cgroup *cg, struct cftype *cft,
ub_set_blkio_css(ub, cgroup_subsys_state(bound_cg,
blkio_subsys_id));
break;
+ case UB_PIDS_CGROUP:
+ ub_set_pids_css(ub, cgroup_subsys_state(bound_cg,
+ pids_subsys_id));
+ break;
}
cgroup_kernel_close(bound_cg);
@@ -665,6 +690,13 @@ static struct cftype ub_cgroup_files[] = {
.write_string = ub_cgroup_write,
.read = ub_cgroup_read,
},
+ {
+ .name = "pids",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .private = UB_PIDS_CGROUP,
+ .write_string = ub_cgroup_write,
+ .read = ub_cgroup_read,
+ },
{ }, /* terminate */
};
@@ -1169,6 +1201,7 @@ void __init ub_init_late(void)
{
ub_set_mem_css(&ub0, task_subsys_state_check(&init_task, mem_cgroup_subsys_id, true));
ub_set_blkio_css(&ub0, task_subsys_state_check(&init_task, blkio_subsys_id, true));
+ ub_set_pids_css(&ub0, task_subsys_state_check(&init_task, pids_subsys_id, true));
register_sysctl_table(ub_sysctl_root);
}
@@ -1181,6 +1214,9 @@ int __init ub_init_cgroup(void)
struct cgroup_sb_opts mem_opts = {
.subsys_mask = (1ul << mem_cgroup_subsys_id),
};
+ struct cgroup_sb_opts pids_opts = {
+ .subsys_mask = (1ul << pids_subsys_id),
+ };
struct cgroup_sb_opts ub_opts = {
.subsys_mask = (1ul << ub_subsys_id),
};
@@ -1195,6 +1231,11 @@ int __init ub_init_cgroup(void)
panic("Failed to mount memory cgroup: %ld\n",
PTR_ERR(mem_cgroup_mnt));
+ pids_cgroup_mnt = cgroup_kernel_mount(&pids_opts);
+ if (IS_ERR(pids_cgroup_mnt))
+ panic("Failed to mount pids cgroup: %ld\n",
+ PTR_ERR(pids_cgroup_mnt));
+
ub_cgroup_mnt = cgroup_kernel_mount(&ub_opts);
if (IS_ERR(ub_cgroup_mnt))
panic("Failed to mount beancounter cgroup: %ld\n",
diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c
index 9f60d9991e0a..e5aef9c11001 100644
--- a/kernel/bc/proc.c
+++ b/kernel/bc/proc.c
@@ -71,6 +71,7 @@ static void __show_resources(struct seq_file *f, struct user_beancounter *ub,
int i, precharge[UB_RESOURCES];
ub_sync_memcg(ub);
+ ub_sync_pids(ub);
ub_precharge_snapshot(ub, precharge);
for (i = 0; i < UB_RESOURCES_COMPAT; i++)
@@ -180,6 +181,7 @@ static int ub_show(struct seq_file *f, void *v)
struct user_beancounter *ub = v;
ub_sync_memcg(ub);
+ ub_sync_pids(ub);
ub_precharge_snapshot(ub, precharge);
for (i = 0; i < UB_RESOURCES_COMPAT; i++)
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 610f8df7a25c..465de7ebd817 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -50,8 +50,43 @@ struct pids_cgroup {
*/
atomic64_t counter;
int64_t limit;
+#ifdef CONFIG_BEANCOUNTERS
+ /* beancounter-related stats */
+ atomic_long_t pids_failcnt;
+#endif /* CONFIG_BEANCOUNTERS */
};
+#ifdef CONFIG_BEANCOUNTERS
+static inline
+struct pids_cgroup *pids_cgroup_from_css(struct cgroup_subsys_state *s)
+{
+ return container_of(s, struct pids_cgroup, css);
+}
+
+struct pids_cgroup *pids_cgroup_from_cont(struct cgroup *cont)
+{
+ return pids_cgroup_from_css(
+ cgroup_subsys_state(cont, pids_subsys_id));
+}
+
+#include <bc/beancounter.h>
+void pids_cgroup_sync_beancounter(struct pids_cgroup *pids,
+ struct user_beancounter *ub)
+{
+ unsigned long lim;
+ volatile struct ubparm *p;
+
+ p = &ub->ub_parms[UB_NUMPROC];
+ p->held = p->maxheld = (unsigned long)atomic64_read(&pids->counter);
+ p->failcnt = atomic_long_read(&pids->pids_failcnt);
+
+ lim = pids->limit;
+ lim = lim >= PIDS_MAX ? UB_MAXVALUE :
+ min_t(unsigned long, lim, UB_MAXVALUE);
+ p->barrier = p->limit = lim;
+}
+#endif /* CONFIG_BEANCOUNTERS */
+
static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css)
{
return container_of(css, struct pids_cgroup, css);
@@ -164,6 +199,9 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
for (q = pids; q != p; q = parent_pids(q))
pids_cancel(q, num);
pids_cancel(p, num);
+#ifdef CONFIG_BEANCOUNTERS
+ atomic_long_inc(&pids->pids_failcnt);
+#endif /* CONFIG_BEANCOUNTERS */
return -EAGAIN;
}
More information about the Devel
mailing list