As one of the topics presented in Xen summit2011 in SC, we proposed one method
scheduler rate controller (SRC) to control high frequency of scheduling under
some conditions. You can find the slides at
http://www.slideshare.net/xen_com_mgr/9-hui-lvtacklingthemanagementchallengesofserverconsolidationonmulticoresystems
In the followings, we have tested it with 2-socket multi-core system with many
rounds and got the positive results and improve the performance greatly either
with the consolidation workload SPECvirt_2010 or some small workloads such as
sysbench and SPECjbb. So I posted it here for review.
>From Xen scheduling mechanism, hypervisor kicks related VCPUs by raising
>schedule softirq during processing external interrupts. Therefore, if the
>number of IRQ is very large, the scheduling happens more frequent. Frequent
>scheduling will
1) bring more overhead for hypervisor and
2) increase cache miss rate.
In our consolidation workloads, SPECvirt_sc2010, SR-IOV & iSCSI solution are
adopted to bypass software emulation but bring heavy network traffic.
Correspondingly, 15k scheduling happened per second on each physical core,
which means the average running time is very short, only 60us. We proposed SRC
in XEN to mitigate this problem.
The performance benefits brought by this patch is very huge at peak throughput
with no influence when system loads are low.
SRC improved SPECvirt performance by 14%.
1)It reduced CPU utilization, which allows more load to be added.
2)Response time (QoS) became better at the same CPU %.
3)The better response time allowed us to push the CPU % at peak performance to
an even higher level (CPU was not saturated in SPECvirt).
SRC reduced context switch rate significantly, resulted in
2)Smaller Path Length
3)Less cache misses thus lower CPI
4)Better performance for both Guest and Hypervisor sides.
With this patch, from our SPECvirt_sc2010 results, the performance of xen
catches up the other open sourced hypervisor.
Signed-off-by: Hui Lv hui.lv@xxxxxxxxx
diff -ruNp xen.org/common/schedule.c xen/common/schedule.c
--- xen.org/common/schedule.c 2011-10-20 03:29:44.000000000 -0400
+++ xen/common/schedule.c 2011-10-23 21:41:14.000000000 -0400
@@ -98,6 +98,31 @@ static inline void trace_runstate_change
__trace_var(event, 1/*tsc*/, sizeof(d), &d);
}
+/*
+ *opt_sched_rate_control: parameter to turn on/off scheduler rate controller
(SRC)
+ *opt_sched_rate_high: scheduling frequency threshold, default value is 50.
+
+ *Suggest to set the value of opt_sched_rate_high larger than 50.
+ *It means if the scheduling frequency number, calculated during
SCHED_SRC_INTERVAL (default 10 millisecond), is larger than
opt_sched_rate_high, SRC works.
+*/
+bool_t opt_sched_rate_control = 0;
+unsigned int opt_sched_rate_high = 50;
+boolean_param("sched_rate_control", opt_sched_rate_control);
+integer_param("sched_rate_high", opt_sched_rate_high);
+
+
+/* The following function is the scheduling rate controller (SRC). It is
triggered when
+ * the frequency of scheduling is excessive high. (larger than
opt_sched_rate_high)
+ *
+ * Rules to control the scheduling frequency
+ * 1)if the frequency of scheduling (sd->s_csnum), calculated during the
period of SCHED_SRC_INTERVAL,
+ * is larger than the threshold opt_sched_rate_high, SRC is enabled to work by
setting sd->s_src_control = 1
+ * 2)if SRC works, it returns previous vcpu directly if previous vcpu is still
runnalbe and not the idle vcpu.
+ * This method can decrease the frequency of scheduling when the scheduling
frequency is excessive.
+*/
+
+void src_controller(struct schedule_data *sd, struct vcpu *prev, s_time_t now);
+
static inline void trace_continue_running(struct vcpu *v)
{
@@ -1033,6 +1058,29 @@ static void vcpu_periodic_timer_work(str
set_timer(&v->periodic_timer, periodic_next_event);
}
+void src_controller(struct schedule_data *sd, struct vcpu *prev, s_time_t now)
+{
+ sd->s_csnum++;
+ if ((now - sd->s_src_loop_begin) >= MILLISECS(SCHED_SRC_INTERVAL))
+ {
+ if (sd->s_csnum >= opt_sched_rate_high)
+ sd->s_src_control = 1;
+ else
+ sd->s_src_control = 0;
+ sd->s_src_loop_begin = now;
+ sd->s_csnum = 0;
+ }
+ if (sd->s_src_control)
+ {
+ if (!is_idle_vcpu(prev) && vcpu_runnable(prev))
+ {
+ perfc_incr(sched_src);
+ return continue_running(prev);
+ }
+ perfc_incr(sched_nosrc);
+ }
+}
+
/*
* The main function
* - deschedule the current domain (scheduler independent).
@@ -1054,6 +1102,8 @@ static void schedule(void)
sd = &this_cpu(schedule_data);
+ if (opt_sched_rate_control)
+ src_controller(sd,prev,now);
/* Update tasklet scheduling status. */
switch ( *tasklet_work )
{
@@ -1197,6 +1247,9 @@ static int cpu_schedule_up(unsigned int
sd->curr = idle_vcpu[cpu];
init_timer(&sd->s_timer, s_timer_fn, NULL, cpu);
atomic_set(&sd->urgent_count, 0);
+ sd->s_csnum=0;
+ sd->s_src_loop_begin=NOW();
+ sd->s_src_control=0;
/* Boot CPU is dealt with later in schedule_init(). */
if ( cpu == 0 )
diff -ruNp xen.org/include/xen/perfc_defn.h xen/include/xen/perfc_defn.h
--- xen.org/include/xen/perfc_defn.h 2011-10-20 03:29:44.000000000 -0400
+++ xen/include/xen/perfc_defn.h 2011-10-23 21:08:28.000000000 -0400
@@ -15,6 +15,8 @@ PERFCOUNTER(ipis, "#IP
PERFCOUNTER(sched_irq, "sched: timer")
PERFCOUNTER(sched_run, "sched: runs through scheduler")
PERFCOUNTER(sched_ctx, "sched: context switches")
+PERFCOUNTER(sched_src, "sched: src triggered")
+PERFCOUNTER(sched_nosrc, "sched: src not triggered")
PERFCOUNTER(vcpu_check, "csched: vcpu_check")
PERFCOUNTER(schedule, "csched: schedule")
diff -ruNp xen.org/include/xen/sched-if.h xen/include/xen/sched-if.h
--- xen.org/include/xen/sched-if.h 2011-10-20 03:29:44.000000000 -0400
+++ xen/include/xen/sched-if.h 2011-10-23 21:20:57.000000000 -0400
@@ -15,6 +15,11 @@ extern struct cpupool *cpupool0;
/* cpus currently in no cpupool */
extern cpumask_t cpupool_free_cpus;
+/*SRC judge whether to trigger scheduling controller based on the comparison
+ *between the scheduling frequency, counted during SCHED_SRC_INTERVAL, and the
threshold opt_sched_rate_high
+ *Suggest to set SCHED_SRC_INTERVAL to 10 (millisecond)
+*/
+#define SCHED_SRC_INTERVAL 10
/*
* In order to allow a scheduler to remap the lock->cpu mapping,
@@ -32,6 +37,9 @@ struct schedule_data {
struct vcpu *curr; /* current task */
void *sched_priv;
struct timer s_timer; /* scheduling timer */
+ int s_csnum; /* scheduling number based on
last period */
+ s_time_t s_src_loop_begin; /* SRC conting start point */
+ bool_t s_src_control; /*indicate whether src should
be triggered */
atomic_t urgent_count; /* how many urgent vcpus */
};
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|