[Devel] [PATCH RHEL7 COMMIT] Drop fairsched syscalls
Vladimir Davydov
vdavydov at virtuozzo.com
Mon Jun 20 10:06:44 PDT 2016
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.16
------>
commit 13985cb1990d71a321504c58daa16b50ac9a0ec7
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date: Mon Jun 20 19:40:14 2016 +0300
Drop fairsched syscalls
Everything that can be configured via fairsched syscalls is accessible
via cpu cgroup. Since it's getting difficult to maintain the syscalls
due to the upcoming move of containers to machine.slice, drop them.
Also, drop all functions from sched and cpuset which were used only by
fairsched syscalls.
Note, I make CFS_BANDWIDTH select CFS_CPULIMIT config option. This is,
because otherwise it won't get selected, because its only user was
VZ_FAIRSCHED config option dropped by this patch. I think we need to
merge this option with CFS_BANDWIDTH eventually, but let's leave it as
is for now.
Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
arch/powerpc/include/asm/systbl.h | 16 +-
arch/powerpc/include/uapi/asm/unistd.h | 8 -
arch/x86/syscalls/syscall_32.tbl | 9 -
arch/x86/syscalls/syscall_64.tbl | 8 -
configs/kernel-3.10.0-x86_64-debug.config | 1 -
configs/kernel-3.10.0-x86_64.config | 1 -
include/linux/cpuset.h | 5 -
include/linux/fairsched.h | 58 ---
include/linux/sched.h | 20 -
include/uapi/linux/Kbuild | 1 -
include/uapi/linux/fairsched.h | 8 -
init/Kconfig | 20 +-
kernel/Makefile | 1 -
kernel/cpuset.c | 26 --
kernel/fairsched.c | 705 ------------------------------
kernel/sched/core.c | 69 +--
kernel/sched/cpuacct.h | 2 +
kernel/sys_ni.c | 10 -
18 files changed, 25 insertions(+), 943 deletions(-)
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index ce9d2d7977e5..8a44bbd2bee6 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -374,14 +374,14 @@ SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
SYSCALL(ni_syscall)
-SYSCALL(fairsched_mknod)
-SYSCALL(fairsched_rmnod)
-SYSCALL(fairsched_chwt)
-SYSCALL(fairsched_mvpr)
-SYSCALL(fairsched_rate)
-SYSCALL(fairsched_vcpus)
-SYSCALL(fairsched_cpumask)
-SYSCALL(fairsched_nodemask)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
SYSCALL(getluid)
SYSCALL(setluid)
SYSCALL(setublimit)
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index e90207158a12..41fc69c6822b 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -387,14 +387,6 @@
#define __NR_execveat 362
#define __NR_switch_endian 363
-#define __NR_fairsched_mknod 360
-#define __NR_fairsched_rmnod 361
-#define __NR_fairsched_chwt 362
-#define __NR_fairsched_mvpr 363
-#define __NR_fairsched_rate 364
-#define __NR_fairsched_vcpus 365
-#define __NR_fairsched_cpumask 366
-#define __NR_fairsched_nodemask 367
#define __NR_getluid 368
#define __NR_setluid 369
#define __NR_setublimit 370
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index e60fd32ebba3..f8ed67d66913 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,15 +360,6 @@
356 i386 memfd_create sys_memfd_create
374 i386 userfaultfd sys_userfaultfd
-500 i386 fairsched_mknod sys_fairsched_mknod
-501 i386 fairsched_rmnod sys_fairsched_rmnod
-502 i386 fairsched_chwt sys_fairsched_chwt
-503 i386 fairsched_mvpr sys_fairsched_mvpr
-504 i386 fairsched_rate sys_fairsched_rate
-505 i386 fairsched_vcpus sys_fairsched_vcpus
-506 i386 fairsched_cpumask sys_fairsched_cpumask
-507 i386 fairsched_nodemask sys_fairsched_nodemask
-
510 i386 getluid sys_getluid
511 i386 setluid sys_setluid
512 i386 setublimit sys_setublimit compat_sys_setublimit
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 846183e5a9f0..7f009985158e 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,18 +325,10 @@
320 common kexec_file_load sys_kexec_file_load
323 common userfaultfd sys_userfaultfd
-497 64 fairsched_nodemask sys_fairsched_nodemask
-498 64 fairsched_cpumask sys_fairsched_cpumask
-499 64 fairsched_vcpus sys_fairsched_vcpus
500 64 getluid sys_getluid
501 64 setluid sys_setluid
502 64 setublimit sys_setublimit
503 64 ubstat sys_ubstat
-504 64 fairsched_mknod sys_fairsched_mknod
-505 64 fairsched_rmnod sys_fairsched_rmnod
-506 64 fairsched_chwt sys_fairsched_chwt
-507 64 fairsched_mvpr sys_fairsched_mvpr
-508 64 fairsched_rate sys_fairsched_rate
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/configs/kernel-3.10.0-x86_64-debug.config b/configs/kernel-3.10.0-x86_64-debug.config
index 8ca31ce70a7f..4142b41946ce 100644
--- a/configs/kernel-3.10.0-x86_64-debug.config
+++ b/configs/kernel-3.10.0-x86_64-debug.config
@@ -5489,7 +5489,6 @@ CONFIG_VE_IPTABLES=y
CONFIG_VZ_WDOG=m
CONFIG_VZ_EVENT=m
-CONFIG_VZ_FAIRSCHED=y
CONFIG_BLK_DEV_PLOOP=m
CONFIG_VZ_QUOTA=m
CONFIG_SIM_FS=m
diff --git a/configs/kernel-3.10.0-x86_64.config b/configs/kernel-3.10.0-x86_64.config
index 6f087a774d01..3be34a6bcea5 100644
--- a/configs/kernel-3.10.0-x86_64.config
+++ b/configs/kernel-3.10.0-x86_64.config
@@ -5457,7 +5457,6 @@ CONFIG_VE_IPTABLES=y
CONFIG_VZ_WDOG=m
CONFIG_VZ_EVENT=m
-CONFIG_VZ_FAIRSCHED=y
CONFIG_BLK_DEV_PLOOP=m
CONFIG_VZ_QUOTA=m
CONFIG_SIM_FS=m
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ff708ea11083..f4550a9dbf36 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -117,11 +117,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current);
}
-extern int cgroup_set_cpumask(struct cgroup *cgrp,
- const struct cpumask *cpus_allowed);
-extern int cgroup_set_nodemask(struct cgroup *cgrp,
- const nodemask_t *nodes_allowed);
-
#else /* !CONFIG_CPUSETS */
static inline int cpuset_init(void) { return 0; }
diff --git a/include/linux/fairsched.h b/include/linux/fairsched.h
deleted file mode 100644
index 5f2ab0c15fa5..000000000000
--- a/include/linux/fairsched.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * include/linux/fairsched.h
- *
- * Fair Scheduler
- *
- * Copyright (c) 2000-2008 SWsoft
- * Copyright (c) 2009-2015 Parallels IP Holdings GmbH
- *
- */
-
-#ifndef __LINUX_FAIRSCHED_H__
-#define __LINUX_FAIRSCHED_H__
-
-#include <linux/cgroup.h>
-#include <linux/seq_file.h>
-
-#include <uapi/linux/fairsched.h>
-
-#ifdef __KERNEL__
-
-struct kernel_cpustat;
-
-#ifdef CONFIG_VZ_FAIRSCHED
-
-#define FSCHWEIGHT_MAX ((1 << 16) - 1)
-#define FSCHRATE_SHIFT 10
-#define FSCH_TIMESLICE 16
-
-/******************************************************************************
- * cfs group shares = FSCHWEIGHT_BASE / fairsched weight
- *
- * vzctl cpuunits default 1000
- * cfs shares default value is 1024 (see init_task_group_load in sched.c)
- * cpuunits = 1000 --> weight = 500000 / cpuunits = 500 --> shares = 1024
- * ^--- from vzctl
- * weight in 1..65535 --> shares in 7..512000
- * shares should be >1 (see comment in sched_group_set_shares function)
- *****************************************************************************/
-
-#define FSCHWEIGHT_BASE 512000UL
-
-asmlinkage long sys_fairsched_mknod(unsigned int parent, unsigned int weight,
- unsigned int newid);
-asmlinkage long sys_fairsched_rmnod(unsigned int id);
-asmlinkage long sys_fairsched_mvpr(pid_t pid, unsigned int id);
-asmlinkage long sys_fairsched_vcpus(unsigned int id, unsigned int vcpus);
-asmlinkage long sys_fairsched_chwt(unsigned int id, unsigned int weight);
-asmlinkage long sys_fairsched_rate(unsigned int id, int op, unsigned int rate);
-asmlinkage long sys_fairsched_cpumask(unsigned int id, unsigned int len,
- unsigned long __user *user_mask_ptr);
-asmlinkage long sys_fairsched_nodemask(unsigned int id, unsigned int len,
- unsigned long __user *user_mask_ptr);
-
-#endif /* CONFIG_VZ_FAIRSCHED */
-
-#endif /* __KERNEL__ */
-
-#endif /* __LINUX_FAIRSCHED_H__ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 84a9888b2483..ab5cfed780ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2149,14 +2149,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif
-#ifdef CONFIG_CFS_CPULIMIT
-struct cgroup;
-extern int sched_cgroup_set_rate(struct cgroup *cgrp, unsigned long rate);
-extern unsigned long sched_cgroup_get_rate(struct cgroup *cgrp);
-extern int sched_cgroup_set_nr_cpus(struct cgroup *cgrp, unsigned int nr_cpus);
-extern unsigned int sched_cgroup_get_nr_cpus(struct cgroup *cgrp);
-#endif
-
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
@@ -2957,18 +2949,6 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#ifdef CONFIG_CGROUP_SCHED
extern struct task_group root_task_group;
-#ifdef CONFIG_FAIR_GROUP_SCHED
-extern int sched_cgroup_set_shares(struct cgroup *cgrp, unsigned long shares);
-unsigned long sched_cgroup_get_shares(struct cgroup *cgrp);
-extern unsigned long sched_cgroup_get_nr_running(struct cgroup *cgrp);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-extern int sched_cgroup_set_rt_runtime(struct cgroup *cgrp,
- long rt_runtime_us);
-#else
-static inline int sched_cgroup_set_rt_runtime(struct cgroup *cgrp,
- long rt_runtime_us) { return 0; }
-#endif
#endif /* CONFIG_CGROUP_SCHED */
extern int task_can_switch_user(struct user_struct *up,
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 331a11730f21..9be4ca485dbd 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -111,7 +111,6 @@ header-y += errqueue.h
header-y += ethtool.h
header-y += eventpoll.h
header-y += fadvise.h
-header-y += fairsched.h
header-y += falloc.h
header-y += fanotify.h
header-y += fb.h
diff --git a/include/uapi/linux/fairsched.h b/include/uapi/linux/fairsched.h
index 812782bfb3c6..d5aa96928aee 100644
--- a/include/uapi/linux/fairsched.h
+++ b/include/uapi/linux/fairsched.h
@@ -5,11 +5,3 @@
*
*/
-#ifndef _UAPI_LINUX_FAIRSCHED_H
-#define _UAPI_LINUX_FAIRSCHED_H
-
-#define FAIRSCHED_SET_RATE 0
-#define FAIRSCHED_DROP_RATE 1
-#define FAIRSCHED_GET_RATE 2
-
-#endif /* _UAPI_LINUX_FAIRSCHED_H */
diff --git a/init/Kconfig b/init/Kconfig
index a24a88f6bffa..2be2b3d74835 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -815,18 +815,6 @@ config NUMA_BALANCING
This system will be inactive on UMA systems.
-config VZ_FAIRSCHED
- bool "OpenVZ fairsched compat"
- select CPUSETS
- select CGROUP_SCHED
- select FAIR_GROUP_SCHED
- select CFS_BANDWIDTH
- select CFS_CPULIMIT
- default y
- help
- This option add task cpu cgroup control with OpenVZ compatible
- syscall and procfs interface.
-
menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
@@ -996,9 +984,13 @@ config FAIR_GROUP_SCHED
depends on CGROUP_SCHED
default CGROUP_SCHED
+config CFS_CPULIMIT
+ bool
+
config CFS_BANDWIDTH
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
depends on FAIR_GROUP_SCHED
+ select CFS_CPULIMIT
default n
help
This option allows users to define CPU bandwidth rates (limits) for
@@ -1007,10 +999,6 @@ config CFS_BANDWIDTH
restriction.
See tip/Documentation/scheduler/sched-bwc.txt for more information.
-config CFS_CPULIMIT
- bool
- depends on CFS_BANDWIDTH
-
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
depends on CGROUP_SCHED
diff --git a/kernel/Makefile b/kernel/Makefile
index 73a73ed39313..a28269854d27 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -98,7 +98,6 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
-obj-$(CONFIG_VZ_FAIRSCHED) += fairsched.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 689428d9a9dd..9faac5b17596 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1540,32 +1540,6 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
mutex_unlock(&cpuset_mutex);
}
-/*
- * cgroup_set_[cpumask|nodemask] - set a cgroup's cpu/mem affinity mask.
- *
- * Call holding cgroup mutex.
- */
-
-int cgroup_set_cpumask(struct cgroup *cgrp, const struct cpumask *cpus_allowed)
-{
- int retval;
-
- mutex_lock(&cpuset_mutex);
- retval = __update_cpumask(cgroup_cs(cgrp), cpus_allowed);
- mutex_unlock(&cpuset_mutex);
- return retval;
-}
-
-int cgroup_set_nodemask(struct cgroup *cgrp, const nodemask_t *nodes_allowed)
-{
- int retval;
-
- mutex_lock(&cpuset_mutex);
- retval = __update_nodemask(cgroup_cs(cgrp), nodes_allowed);
- mutex_unlock(&cpuset_mutex);
- return retval;
-}
-
/* The various types of files and directories in a cpuset file system */
typedef enum {
diff --git a/kernel/fairsched.c b/kernel/fairsched.c
deleted file mode 100644
index 4d96b4a9a3e0..000000000000
--- a/kernel/fairsched.c
+++ /dev/null
@@ -1,705 +0,0 @@
-/*
- * kernel/fairsched.c
- *
- * Fair Scheduler
- *
- * Copyright (c) 2000-2008 SWsoft
- * Copyright (c) 2009-2015 Parallels IP Holdings GmbH
- *
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/mount.h>
-#include <linux/cgroup.h>
-#include <linux/cpumask.h>
-#include <linux/cpuset.h>
-#include <linux/pid_namespace.h>
-#include <linux/syscalls.h>
-#include <linux/fairsched.h>
-#include <linux/ve.h>
-#include <linux/uaccess.h>
-
-struct fairsched_node {
- struct cgroup *cpu;
- struct cgroup *cpuset;
-};
-
-static struct fairsched_node root_node = {NULL, NULL};
-
-/* fairsched use node id = INT_MAX for ve0 tasks */
-#define FAIRSCHED_HOST_NODE 2147483647
-
-#define fairsched_id(id) (id == FAIRSCHED_HOST_NODE ? 0 : id)
-
-static int fairsched_id_parse(const char *s)
-{
- unsigned long id;
-
- if (kstrtoul(s, 10, &id))
- return -1;
-
- return id ?: FAIRSCHED_HOST_NODE;
-}
-
-static int fairsched_open(struct fairsched_node *node, int id)
-{
- envid_t veid = fairsched_id(id);
-
- node->cpu = ve_cgroup_open(root_node.cpu, 0, veid);
- if (IS_ERR(node->cpu))
- return PTR_ERR(node->cpu);
-
- node->cpuset = ve_cgroup_open(root_node.cpuset, 0, veid);
- if (IS_ERR(node->cpuset)) {
- cgroup_kernel_close(node->cpu);
- return PTR_ERR(node->cpuset);
- }
- return 0;
-}
-
-static int fairsched_create(struct fairsched_node *node, int id)
-{
- envid_t veid = fairsched_id(id);
- int err;
-
- node->cpu = ve_cgroup_open(root_node.cpu, CGRP_CREAT|CGRP_EXCL, veid);
- if (IS_ERR(node->cpu))
- return PTR_ERR(node->cpu);
-
- node->cpuset = ve_cgroup_open(root_node.cpuset, CGRP_CREAT, veid);
- if (IS_ERR(node->cpuset)) {
- cgroup_kernel_close(node->cpu);
- err = ve_cgroup_remove(root_node.cpu, veid);
- if (err)
- printk(KERN_ERR "Cleanup error, fairsched id=%d, err=%d\n", id, err);
- return PTR_ERR(node->cpuset);
- }
- return 0;
-}
-
-static int fairsched_remove(int id)
-{
- envid_t veid = fairsched_id(id);
- int ret;
-
- ret = ve_cgroup_remove(root_node.cpuset, veid);
- if (ret < 0)
- return ret;
-
- return ve_cgroup_remove(root_node.cpu, veid);
-}
-
-static int fairsched_move(struct fairsched_node *node, struct task_struct *tsk)
-{
- int ret, err;
-
- ret = cgroup_kernel_attach(node->cpu, tsk);
- if (ret)
- return ret;
-
- ret = cgroup_kernel_attach(node->cpuset, tsk);
- if (ret) {
- err = cgroup_kernel_attach(root_node.cpu, tsk);
- if (err)
- printk(KERN_ERR "Cleanup error, fairsched id=, err=%d\n", err);
- }
- return ret;
-}
-
-static void fairsched_close(struct fairsched_node *node)
-{
- cgroup_kernel_close(node->cpu);
- cgroup_kernel_close(node->cpuset);
-}
-
-SYSCALL_DEFINE3(fairsched_mknod, unsigned int, parent, unsigned int, weight,
- unsigned int, newid)
-{
- int retval;
- struct fairsched_node node = {NULL, NULL};
-
- if (!capable_setveid())
- return -EPERM;
-
- retval = -EINVAL;
- if (weight < 1 || weight > FSCHWEIGHT_MAX)
- goto out;
- if (newid < 0 || newid > INT_MAX)
- goto out;
-
- retval = fairsched_create(&node, newid);
- if (retval)
- return retval;
-
- fairsched_close(&node);
- retval = newid;
-out:
- return retval;
-}
-
-SYSCALL_DEFINE1(fairsched_rmnod, unsigned int, id)
-{
- if (!capable_setveid())
- return -EPERM;
-
- return fairsched_remove(id);
-}
-
-SYSCALL_DEFINE2(fairsched_chwt, unsigned int, id, unsigned, weight)
-{
- struct cgroup *cgrp;
- int retval;
-
- if (!capable_setveid())
- return -EPERM;
-
- if (id == 0)
- return -EINVAL;
- if (weight < 1 || weight > FSCHWEIGHT_MAX)
- return -EINVAL;
-
- cgrp = ve_cgroup_open(root_node.cpu, 0, fairsched_id(id));
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- retval = sched_cgroup_set_shares(cgrp, FSCHWEIGHT_BASE / weight);
- cgroup_kernel_close(cgrp);
-
- return retval;
-}
-
-SYSCALL_DEFINE2(fairsched_vcpus, unsigned int, id, unsigned int, vcpus)
-{
- struct cgroup *cgrp;
- int retval = 0;
-
- if (!capable_setveid())
- return -EPERM;
-
- if (id == 0)
- return -EINVAL;
-
- cgrp = ve_cgroup_open(root_node.cpu, 0, fairsched_id(id));
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- retval = sched_cgroup_set_nr_cpus(cgrp, vcpus);
- cgroup_kernel_close(cgrp);
-
- return retval;
-}
-
-SYSCALL_DEFINE3(fairsched_rate, unsigned int, id, int, op, unsigned, rate)
-{
- struct cgroup *cgrp;
- long ret;
-
- if (!capable_setveid())
- return -EPERM;
-
- if (id == 0)
- return -EINVAL;
- if (op == FAIRSCHED_SET_RATE && (rate < 1 || rate >= (1UL << 31)))
- return -EINVAL;
-
- cgrp = ve_cgroup_open(root_node.cpu, 0, fairsched_id(id));
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- switch (op) {
- case FAIRSCHED_SET_RATE:
- ret = sched_cgroup_set_rate(cgrp, rate);
- if (!ret)
- ret = sched_cgroup_get_rate(cgrp);
- break;
- case FAIRSCHED_DROP_RATE:
- ret = sched_cgroup_set_rate(cgrp, 0);
- break;
- case FAIRSCHED_GET_RATE:
- ret = sched_cgroup_get_rate(cgrp);
- if (!ret)
- ret = -ENODATA;
- break;
- default:
- ret = -EINVAL;
- break;
- }
- cgroup_kernel_close(cgrp);
-
- return ret;
-}
-
-SYSCALL_DEFINE2(fairsched_mvpr, pid_t, pid, unsigned int, id)
-{
- struct task_struct *tsk;
- struct fairsched_node node = {NULL, NULL};
- int retval;
-
- if (!capable_setveid())
- return -EPERM;
-
- retval = fairsched_open(&node, id);
- if (retval)
- return retval;
-
- rcu_read_lock();
- tsk = current;
- if (pid != task_pid_vnr(tsk))
- tsk = find_task_by_vpid(pid);
- if (tsk == NULL) {
- rcu_read_unlock();
- retval = -ESRCH;
- goto out;
- }
- get_task_struct(tsk);
- rcu_read_unlock();
-
- retval = fairsched_move(&node, tsk);
-
- put_task_struct(tsk);
-out:
- fairsched_close(&node);
- return retval;
-}
-
-static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
- struct cpumask *new_mask)
-{
- if (len < cpumask_size())
- cpumask_clear(new_mask);
- else if (len > cpumask_size())
- len = cpumask_size();
-
- return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-}
-
-SYSCALL_DEFINE3(fairsched_cpumask, unsigned int, id, unsigned int, len,
- unsigned long __user *, user_mask_ptr)
-{
- struct cgroup *cgrp;
- int retval;
- cpumask_var_t new_mask, in_mask;
-
- if (!capable_setveid())
- return -EPERM;
-
- if (id == 0)
- return -EINVAL;
-
- cgrp = ve_cgroup_open(root_node.cpuset, 0, fairsched_id(id));
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- if (!alloc_cpumask_var(&in_mask, GFP_KERNEL)) {
- retval = -ENOMEM;
- goto out;
- }
- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
- retval = -ENOMEM;
- goto out_free_in_mask;
- }
-
- retval = get_user_cpu_mask(user_mask_ptr, len, in_mask);
- if (retval == 0) {
- cpumask_and(new_mask, in_mask, cpu_active_mask);
- retval = cgroup_set_cpumask(cgrp, new_mask);
- }
-
- free_cpumask_var(new_mask);
-
-out_free_in_mask:
- free_cpumask_var(in_mask);
-out:
- cgroup_kernel_close(cgrp);
- return retval;
-}
-
-static int get_user_node_mask(unsigned long __user *user_mask_ptr, unsigned len,
- nodemask_t *new_mask)
-{
- if (len < sizeof(nodemask_t))
- nodes_clear(*new_mask);
- else if (len > sizeof(nodemask_t))
- len = sizeof(nodemask_t);
-
- return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-}
-
-SYSCALL_DEFINE3(fairsched_nodemask, unsigned int, id, unsigned int, len,
- unsigned long __user *, user_mask_ptr)
-{
- struct cgroup *cgrp;
- int retval;
- nodemask_t new_mask, in_mask;
-
- if (!capable_setveid())
- return -EPERM;
-
- if (id == 0)
- return -EINVAL;
-
- cgrp = ve_cgroup_open(root_node.cpuset, 0, fairsched_id(id));
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
- if (cgrp == NULL)
- return -ENOENT;
-
- retval = get_user_node_mask(user_mask_ptr, len, &in_mask);
- if (retval == 0) {
- nodes_and(new_mask, in_mask, node_states[N_HIGH_MEMORY]);
- retval = cgroup_set_nodemask(cgrp, &new_mask);
- }
-
- cgroup_kernel_close(cgrp);
- return retval;
-}
-
-#ifdef CONFIG_PROC_FS
-
-/*********************************************************************/
-/*
- * proc interface
- */
-/*********************************************************************/
-
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
-
-struct fairsched_node_dump {
- int id;
- unsigned weight;
- unsigned rate;
- int nr_pcpu;
- int nr_tasks, nr_runtasks;
-};
-
-struct fairsched_dump {
- int len;
- struct fairsched_node_dump nodes[0];
-};
-
-static struct fairsched_dump *fairsched_do_dump(int compat)
-{
- struct fairsched_dump *dump;
- struct fairsched_node_dump *p;
- int nr_nodes;
- struct dentry *root, *dentry;
- struct cgroup *cgrp;
- struct ve_struct *ve = get_exec_env();
- int id;
-
- root = root_node.cpu->dentry;
- mutex_lock(&root->d_inode->i_mutex);
-
- spin_lock(&root->d_lock);
- nr_nodes = 0;
- list_for_each_entry(dentry, &root->d_subdirs, d_u.d_child) {
- if (d_unhashed(dentry) || !dentry->d_inode ||
- !S_ISDIR(dentry->d_inode->i_mode))
- continue;
- nr_nodes++;
- }
- spin_unlock(&root->d_lock);
-
- nr_nodes = ve_is_super(get_exec_env()) ? nr_nodes + 16 : 1;
-
- dump = vmalloc_account(sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]));
- if (dump == NULL)
- goto out;
-
- spin_lock(&root->d_lock);
-
- p = dump->nodes;
- list_for_each_entry_reverse(dentry, &root->d_subdirs, d_u.d_child) {
- if (d_unhashed(dentry) || !dentry->d_inode ||
- !S_ISDIR(dentry->d_inode->i_mode))
- continue;
- id = fairsched_id_parse(dentry->d_name.name);
- if (id < 0)
- continue;
- if (!ve_is_super(ve) &&
- strcmp(ve_name(ve), dentry->d_name.name))
- continue;
- cgrp = dentry->d_fsdata; /* __d_cgrp */
- p->id = id;
- p->nr_tasks = cgroup_task_count(cgrp);
- p->nr_runtasks = sched_cgroup_get_nr_running(cgrp);
- p->weight = FSCHWEIGHT_BASE / sched_cgroup_get_shares(cgrp);
- p->nr_pcpu = num_online_cpus();
- p->rate = sched_cgroup_get_rate(cgrp);
- p++;
- if (!--nr_nodes)
- break;
- }
- dump->len = p - dump->nodes;
-
- spin_unlock(&root->d_lock);
-out:
- mutex_unlock(&root->d_inode->i_mutex);
- return dump;
-}
-
-#define FAIRSCHED_PROC_HEADLINES 2
-
-#define FAIRSHED_DEBUG " debug"
-
-#ifdef CONFIG_VE
-/*
- * File format is dictated by compatibility reasons.
- */
-static int fairsched_seq_show(struct seq_file *m, void *v)
-{
- struct fairsched_dump *dump;
- struct fairsched_node_dump *p;
- unsigned vid, nid, pid, r;
-
- dump = m->private;
- p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
- if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
- if (p == dump->nodes)
- seq_printf(m, "Version: 2.6 debug\n");
- else if (p == dump->nodes + 1)
- seq_printf(m,
- " veid "
- " id "
- " parent "
- "weight "
- " rate "
- "tasks "
- " run "
- "cpus"
- " "
- "flg "
- "ready "
- " start_tag "
- " value "
- " delay"
- "\n");
- } else {
- p -= FAIRSCHED_PROC_HEADLINES;
- vid = nid = pid = 0;
- r = (unsigned long)v & 3;
- if (p == dump->nodes) {
- if (r == 2)
- nid = p->id;
- } else {
- if (!r)
- nid = p->id;
- else if (r == 1)
- vid = pid = p->id;
- else
- vid = p->id, nid = 1;
- }
- seq_printf(m,
- "%10u "
- "%10u %10u %6u %5u %5u %5u %4u"
- " "
- " %c%c %5u %20Lu %20Lu %20Lu"
- "\n",
- vid,
- nid,
- pid,
- p->weight,
- p->rate,
- p->nr_tasks,
- p->nr_runtasks,
- p->nr_pcpu,
- p->rate ? 'L' : '.',
- '.',
- p->nr_runtasks,
- 0ll, 0ll, 0ll);
- }
-
- return 0;
-}
-
-static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
-{
- struct fairsched_dump *dump;
- unsigned long l;
-
- dump = m->private;
- if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
- return NULL;
- if (*pos < FAIRSCHED_PROC_HEADLINES)
- return dump->nodes + *pos;
- /* guess why... */
- l = (unsigned long)(dump->nodes +
- ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
- l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
- return (void *)l;
-}
-static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return fairsched_seq_start(m, pos);
-}
-#endif /* CONFIG_VE */
-
-static int fairsched2_seq_show(struct seq_file *m, void *v)
-{
- struct fairsched_dump *dump;
- struct fairsched_node_dump *p;
-
- dump = m->private;
- p = v;
- if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
- if (p == dump->nodes)
- seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
- else if (p == dump->nodes + 1)
- seq_printf(m,
- " id "
- "weight "
- " rate "
- " run "
- "cpus"
-#ifdef FAIRSHED_DEBUG
- " "
- "flg "
- "ready "
- " start_tag "
- " value "
- " delay"
-#endif
- "\n");
- } else {
- p -= FAIRSCHED_PROC_HEADLINES;
- seq_printf(m,
- "%10u %6u %5u %5u %4u"
-#ifdef FAIRSHED_DEBUG
- " "
- " %c%c %5u %20Lu %20Lu %20Lu"
-#endif
- "\n",
- p->id,
- p->weight,
- p->rate,
- p->nr_runtasks,
- p->nr_pcpu
-#ifdef FAIRSHED_DEBUG
- ,
- p->rate ? 'L' : '.',
- '.',
- p->nr_runtasks,
- 0ll, 0ll, 0ll
-#endif
- );
- }
-
- return 0;
-}
-
-static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
-{
- struct fairsched_dump *dump;
-
- dump = m->private;
- if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
- return NULL;
- return dump->nodes + *pos;
-}
-static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return fairsched2_seq_start(m, pos);
-}
-static void fairsched2_seq_stop(struct seq_file *m, void *v)
-{
-}
-
-#ifdef CONFIG_VE
-static struct seq_operations fairsched_seq_op = {
- .start = fairsched_seq_start,
- .next = fairsched_seq_next,
- .stop = fairsched2_seq_stop,
- .show = fairsched_seq_show
-};
-#endif
-static struct seq_operations fairsched2_seq_op = {
- .start = fairsched2_seq_start,
- .next = fairsched2_seq_next,
- .stop = fairsched2_seq_stop,
- .show = fairsched2_seq_show
-};
-static int fairsched_seq_open(struct inode *inode, struct file *file)
-{
- int ret;
- struct seq_file *m;
- int compat;
-
-#ifdef CONFIG_VE
- compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
- ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
-#else
- compat = 0;
- ret = seq_open(file, &fairsched2_seq_op);
-#endif
- if (ret)
- return ret;
- m = file->private_data;
- m->private = fairsched_do_dump(compat);
- if (m->private == NULL) {
- seq_release(inode, file);
- ret = -ENOMEM;
- }
- return ret;
-}
-static int fairsched_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- struct fairsched_dump *dump;
-
- m = file->private_data;
- dump = m->private;
- m->private = NULL;
- vfree(dump);
- seq_release(inode, file);
- return 0;
-}
-static struct file_operations proc_fairsched_operations = {
- .open = fairsched_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = fairsched_seq_release
-};
-
-#endif /* CONFIG_PROC_FS */
-
-extern int sysctl_sched_rt_runtime;
-
-int __init fairsched_init(void)
-{
- struct vfsmount *cpu_mnt, *cpuset_mnt;
- struct cgroup_sb_opts cpu_opts = {
- .subsys_mask =
- (1ul << cpu_cgroup_subsys_id) |
- (1ul << cpuacct_subsys_id),
- };
-
- struct cgroup_sb_opts cpuset_opts = {
- .subsys_mask =
- (1ul << cpuset_subsys_id),
- };
-
- cpu_mnt = cgroup_kernel_mount(&cpu_opts);
- if (IS_ERR(cpu_mnt))
- return PTR_ERR(cpu_mnt);
- root_node.cpu = cgroup_get_root(cpu_mnt);
-
- cpuset_mnt = cgroup_kernel_mount(&cpuset_opts);
- if (IS_ERR(cpuset_mnt)) {
- kern_unmount(cpu_mnt);
- return PTR_ERR(cpuset_mnt);
- }
- root_node.cpuset = cgroup_get_root(cpuset_mnt);
-
-#ifdef CONFIG_PROC_FS
- proc_create("fairsched", S_ISVTX, NULL, &proc_fairsched_operations);
- proc_create("fairsched2", S_ISVTX, NULL, &proc_fairsched_operations);
- proc_mkdir_mode("fairsched", 0, proc_vz_dir);
-#endif /* CONFIG_PROC_FS */
- return 0;
-}
-late_initcall(fairsched_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a3ff8c8fe5c..e88554993e3d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8420,28 +8420,6 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
return (u64) scale_load_down(tg->shares);
}
-int sched_cgroup_set_shares(struct cgroup *cgrp, unsigned long shares)
-{
- return sched_group_set_shares(cgroup_tg(cgrp), shares);
-}
-
-unsigned long sched_cgroup_get_shares(struct cgroup *cgrp)
-{
- return cgroup_tg(cgrp)->shares;
-}
-
-unsigned long sched_cgroup_get_nr_running(struct cgroup *cgrp)
-{
- struct task_group *tg = cgroup_tg(cgrp);
- unsigned long i, sum = 0;
-
- /* FIXME make it recursive over sub-cgroups */
- for_each_online_cpu(i)
- sum += tg->cfs_rq[i]->nr_running;
-
- return sum;
-}
-
#ifdef CONFIG_CFS_BANDWIDTH
static DEFINE_MUTEX(cfs_constraints_mutex);
@@ -8728,7 +8706,13 @@ static int tg_set_cpu_limit(struct task_group *tg,
return ret;
}
-int sched_cgroup_set_rate(struct cgroup *cgrp, unsigned long rate)
+static u64 cpu_rate_read_u64(struct cgroup *cgrp, struct cftype *cft)
+{
+ return cgroup_tg(cgrp)->cpu_rate;
+}
+
+static int cpu_rate_write_u64(struct cgroup *cgrp, struct cftype *cftype,
+ u64 rate)
{
struct task_group *tg = cgroup_tg(cgrp);
@@ -8737,12 +8721,13 @@ int sched_cgroup_set_rate(struct cgroup *cgrp, unsigned long rate)
return tg_set_cpu_limit(tg, rate, tg->nr_cpus);
}
-unsigned long sched_cgroup_get_rate(struct cgroup *cgrp)
+static u64 nr_cpus_read_u64(struct cgroup *cgrp, struct cftype *cft)
{
- return cgroup_tg(cgrp)->cpu_rate;
+ return cgroup_tg(cgrp)->nr_cpus;
}
-int sched_cgroup_set_nr_cpus(struct cgroup *cgrp, unsigned int nr_cpus)
+static int nr_cpus_write_u64(struct cgroup *cgrp, struct cftype *cftype,
+ u64 nr_cpus)
{
struct task_group *tg = cgroup_tg(cgrp);
@@ -8750,33 +8735,6 @@ int sched_cgroup_set_nr_cpus(struct cgroup *cgrp, unsigned int nr_cpus)
nr_cpus = num_online_cpus();
return tg_set_cpu_limit(tg, tg->cpu_rate, nr_cpus);
}
-
-unsigned int sched_cgroup_get_nr_cpus(struct cgroup *cgrp)
-{
- return cgroup_tg(cgrp)->nr_cpus;
-}
-
-static u64 cpu_rate_read_u64(struct cgroup *cgrp, struct cftype *cft)
-{
- return sched_cgroup_get_rate(cgrp);
-}
-
-static int cpu_rate_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 rate)
-{
- return sched_cgroup_set_rate(cgrp, rate);
-}
-
-static u64 nr_cpus_read_u64(struct cgroup *cgrp, struct cftype *cft)
-{
- return sched_cgroup_get_nr_cpus(cgrp);
-}
-
-static int nr_cpus_write_u64(struct cgroup *cgrp, struct cftype *cftype,
- u64 nr_cpus)
-{
- return sched_cgroup_set_nr_cpus(cgrp, nr_cpus);
-}
#else
static void tg_update_cpu_limit(struct task_group *tg)
{
@@ -8786,11 +8744,6 @@ static void tg_update_cpu_limit(struct task_group *tg)
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
-int sched_cgroup_set_rt_runtime(struct cgroup *cgrp, long rt_runtime_us)
-{
- return sched_group_set_rt_runtime(cgroup_tg(cgrp), rt_runtime_us);
-}
-
static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
s64 val)
{
diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
index cf129eae1003..aaaf048ec99d 100644
--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h
@@ -2,6 +2,8 @@
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
+
+struct cgroup;
extern struct kernel_cpustat *cpuacct_cpustat(struct cgroup *cgrp, int cpu);
#else
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0fb2921fc149..b445eda7e368 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -222,13 +222,3 @@ cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */
cond_syscall(sys_kcmp);
-
-/* fairsched compat */
-cond_syscall(sys_fairsched_mknod);
-cond_syscall(sys_fairsched_rmnod);
-cond_syscall(sys_fairsched_mvpr);
-cond_syscall(sys_fairsched_vcpus);
-cond_syscall(sys_fairsched_chwt);
-cond_syscall(sys_fairsched_rate);
-cond_syscall(sys_fairsched_cpumask);
-cond_syscall(sys_fairsched_nodemask);
More information about the Devel
mailing list