[Devel] [PATCH COMMIT] diff-fairsched-ve-20060530

xemul at sw.ru xemul at sw.ru
Tue May 30 07:32:43 PDT 2006


Added to 026test013

Patch from OpenVZ team <devel at openvz.org>
Virtualization fixes in fairsched.

This includes capability tuning, some per-ve statistics
and /proc/fairsched file with old-format data that may
be needed by some utils (vzcpucheck at least).

http://bugzilla.openvz.org/show_bug.cgi?id=176

-------------- next part --------------
--- ./include/linux/fairsched.h.vemix	2006-04-21 16:25:26.000000000 +0400
+++ ./include/linux/fairsched.h	2006-04-21 16:25:27.000000000 +0400
@@ -57,6 +58,9 @@ struct fairsched_node {
 
 	struct list_head nodelist;
 	int id;
+#ifdef CONFIG_VE
+	struct ve_struct *owner_env;
+#endif
 	struct vcpu_scheduler *vsched;
 };
 
--- ./kernel/fairsched.c.vemix	2006-04-21 16:25:26.000000000 +0400
+++ ./kernel/fairsched.c	2006-04-21 16:25:27.000000000 +0400
@@ -174,6 +174,9 @@ static fschvalue_t max_value;
 
 struct fairsched_node fairsched_init_node = {
 	.id		= INT_MAX,
+#ifdef CONFIG_VE
+	.owner_env	= get_ve0(),
+#endif
 	.weight		= 1,
 };
 EXPORT_SYMBOL(fairsched_init_node);
@@ -192,6 +195,15 @@ static fschtag_t max_latency;
 
 static DECLARE_MUTEX(fairsched_mutex);
 
+/*********************************************************************/
+/*
+ * Small helper routines
+ */
+/*********************************************************************/
+
+/* this didn't proved to be very valuable statistics... */
+#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
+#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
 
 /*********************************************************************/
 /*
@@ -455,11 +482,13 @@ void fairsched_decrun(struct fairsched_n
 void fairsched_inccpu(struct fairsched_node *node)
 {
 	node->nr_pcpu++;
+	fairsched_dec_ve_strv(node, cycles);
 }
 
 static inline void __fairsched_deccpu(struct fairsched_node *node)
 {
 	node->nr_pcpu--;
+	fairsched_inc_ve_strv(node, cycles);
 }
 
 void fairsched_deccpu(struct fairsched_node *node)
@@ -476,6 +505,9 @@ static void fairsched_account(struct fai
 	fschdur_t duration;
 
 	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
+#ifdef CONFIG_VE
+	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
+#endif
 
 	/*
 	 * The duration is not greater than TICK_DUR since
@@ -576,6 +608,9 @@ static int do_fairsched_mknod(unsigned i
 	node->weight = weight;
 	INIT_LIST_HEAD(&node->runlist);
 	node->id = newid;
+#ifdef CONFIG_VE
+	node->owner_env = get_exec_env();
+#endif
 
 	spin_lock_irq(&fairsched_lock);
 	list_add(&node->nodelist, &fairsched_node_head);
@@ -593,7 +628,7 @@ asmlinkage int sys_fairsched_mknod(unsig
 {
 	int retval;
 
-	if (!capable(CAP_SYS_NICE))
+	if (!capable(CAP_SETVEID))
 		return -EPERM;
 
 	down(&fairsched_mutex);
@@ -637,7 +672,7 @@ asmlinkage int sys_fairsched_rmnod(unsig
 {
 	int retval;
 
-	if (!capable(CAP_SYS_NICE))
+	if (!capable(CAP_SETVEID))
 		return -EPERM;
 
 	down(&fairsched_mutex);
@@ -673,7 +708,7 @@ asmlinkage int sys_fairsched_chwt(unsign
 {
 	int retval;
 
-	if (!capable(CAP_SYS_NICE))
+	if (!capable(CAP_SETVEID))
 		return -EPERM;
 
 	down(&fairsched_mutex);
@@ -719,9 +754,12 @@ int do_fairsched_rate(unsigned int id, i
 			retval = node->rate;
 			break;
 		case 1:
-			node->rate = 0; /* XXX not needed, but was added by
-					   a special patch.  I'm too lazy
-					   to try to find out why  --SAW */
+			node->rate = 0; /* This assignment is not needed
+					   for the kernel code, and it should
+					   not rely on rate being 0 when it's
+					   unset.  This is a band-aid for some
+					   existing tools (don't know which one
+					   exactly).  --SAW */
 			node->rate_limited = 0;
 			node->value = max_value;
 			if (node->delayed) {
@@ -749,7 +787,7 @@ asmlinkage int sys_fairsched_rate(unsign
 {
 	int retval;
 
-	if (!capable(CAP_SYS_NICE))
+	if (!capable(CAP_SETVEID))
 		return -EPERM;
 
 	down(&fairsched_mutex);
@@ -810,7 +848,7 @@ asmlinkage int sys_fairsched_mvpr(pid_t 
 {
 	int retval;
 
-	if (!capable(CAP_SYS_NICE))
+	if (!capable(CAP_SETVEID))
 		return -EPERM;
 
 	down(&fairsched_mutex);
@@ -829,6 +867,9 @@ EXPORT_SYMBOL(sys_fairsched_mvpr);
 /*********************************************************************/
 
 struct fairsched_node_dump {
+#ifdef CONFIG_VE
+	envid_t veid;
+#endif
 	int id;
 	unsigned weight;
 	unsigned rate;
@@ -840,36 +881,53 @@ struct fairsched_node_dump {
 	int nr_ready;
 	int nr_runnable;
 	int nr_pcpu;
+	int nr_tasks, nr_runtasks;
 };
 
 struct fairsched_dump {
-	int len;
+	int len, compat;
 	struct fairsched_node_dump nodes[0];
 };
 
-static struct fairsched_dump *fairsched_do_dump(void)
+static struct fairsched_dump *fairsched_do_dump(int compat)
 {
 	int nr_nodes;
-	int len;
+	int len, i;
 	struct fairsched_dump *dump;
 	struct fairsched_node *node;
 	struct fairsched_node_dump *p;
 	unsigned long flags;
 
 start:
-	nr_nodes = fairsched_nr_nodes + 16;
+	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
 	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
 	dump = ub_vmalloc(len);
 	if (dump == NULL)
 		goto out;
 
 	spin_lock_irqsave(&fairsched_lock, flags);
-	if (nr_nodes < fairsched_nr_nodes)
+	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
 		goto repeat;
 	p = dump->nodes;
 	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
 		if ((char *)p - (char *)dump >= len)
 			break;
+		p->nr_tasks = 0;
+		p->nr_runtasks = 0;
+#ifdef CONFIG_VE
+		if (!ve_accessible(node->owner_env, get_exec_env()))
+			continue;
+		p->veid = node->owner_env->veid;
+		if (compat) {
+			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
+			for (i = 0; i < NR_CPUS; i++)
+				p->nr_runtasks +=
+					VE_CPU_STATS(node->owner_env, i)
+								->nr_running;
+			if (p->nr_runtasks < 0)
+				p->nr_runtasks = 0;
+		}
+#endif
 		p->id = node->id;
 		p->weight = node->weight;
 		p->rate = node->rate;
@@ -884,6 +942,7 @@ start:
 		p++;
 	}
 	dump->len = p - dump->nodes;
+	dump->compat = compat;
 	spin_unlock_irqrestore(&fairsched_lock, flags);
 
 out:
@@ -897,6 +956,102 @@ repeat:
 
 #define FAIRSCHED_PROC_HEADLINES 2
 
+#if defined(CONFIG_VE)
+/*
+ * File format is dictated by compatibility reasons.
+ */
+static int fairsched_seq_show(struct seq_file *m, void *v)
+{
+	struct fairsched_dump *dump;
+	struct fairsched_node_dump *p;
+	unsigned vid, nid, pid, r;
+
+	dump = m->private;
+	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
+	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
+		if (p == dump->nodes)
+			seq_printf(m, "Version: 2.6 debug\n");
+		else if (p == dump->nodes + 1)
+			seq_printf(m,
+				       "      veid "
+				       "        id "
+				       "    parent "
+				       "weight "
+				       " rate "
+  				       "tasks "
+				       "  run "
+				       "cpus"
+				       " "
+				       "flg "
+				       "ready "
+				       "           start_tag "
+				       "               value "
+				       "               delay"
+				       "\n");
+	} else {
+		p -= FAIRSCHED_PROC_HEADLINES;
+		vid = nid = pid = 0;
+		r = (unsigned long)v & 3;
+		if (p == dump->nodes) {
+			if (r == 2)
+				nid = p->id;
+		} else {
+			if (!r)
+				nid = p->id;
+			else if (r == 1)
+				vid = pid = p->id;
+			else
+				vid = p->id, nid = 1;
+		}
+		seq_printf(m,
+			       "%10u "
+			       "%10u %10u %6u %5u %5u %5u %4u"
+			       " "
+			       " %c%c %5u %20Lu %20Lu %20Lu"
+			       "\n",
+			       vid,
+			       nid,
+			       pid,
+			       p->weight,
+			       p->rate,
+			       p->nr_tasks,
+			       p->nr_runtasks,
+			       p->nr_pcpu,
+			       p->rate_limited ? 'L' : '.',
+			       p->delayed ? 'D' : '.',
+			       p->nr_ready,
+			       p->start_tag.t,
+			       p->value.v,
+			       p->delay
+			       );
+	}
+
+	return 0;
+}
+
+static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
+{
+	struct fairsched_dump *dump;
+	unsigned long l;
+
+	dump = m->private;
+	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
+		return NULL;
+	if (*pos < FAIRSCHED_PROC_HEADLINES)
+		return dump->nodes + *pos;
+	/* guess why... */
+	l = (unsigned long)(dump->nodes +
+		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
+	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
+	return (void *)l;
+}
+static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return fairsched_seq_start(m, pos);
+}
+#endif
+
 static int fairsched2_seq_show(struct seq_file *m, void *v)
 {
 	struct fairsched_dump *dump;
@@ -970,6 +1125,14 @@ static void fairsched2_seq_stop(struct s
 {
 }
 
+#ifdef CONFIG_VE
+static struct seq_operations fairsched_seq_op = {
+	.start		= fairsched_seq_start,
+	.next		= fairsched_seq_next,
+	.stop		= fairsched2_seq_stop,
+	.show		= fairsched_seq_show
+};
+#endif
 static struct seq_operations fairsched2_seq_op = {
 	.start		= fairsched2_seq_start,
 	.next		= fairsched2_seq_next,
@@ -980,12 +1143,19 @@ static int fairsched_seq_open(struct ino
 {
 	int ret;
 	struct seq_file *m;
+	int compat;
 
-	ret = seq_open(file, &fairsched2_seq_op);
+#ifdef CONFIG_VE
+	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
+	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
+#else
+	compat = 0;
+	ret = seq_open(file, fairsched2_seq_op);
+#endif
 	if (ret)
 		return ret;
 	m = file->private_data;
-	m->private = fairsched_do_dump();
+	m->private = fairsched_do_dump(compat);
 	if (m->private == NULL) {
 		seq_release(inode, file);
 		ret = -ENOMEM;
@@ -1065,7 +1235,10 @@ void __init fairsched_init_late(void)
 	fairsched_calibrate();
 	fairsched_recompute_max_latency();
 
-	entry = create_proc_entry("fairsched2", S_IRUGO, NULL);
+	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
+	if (entry)
+		entry->proc_fops = &proc_fairsched_operations;
+	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
 	if (entry)
 		entry->proc_fops = &proc_fairsched_operations;
 }


More information about the Devel mailing list