How much of a win is this? The per-cpu timer_deadline is already calculated
based on slop, so doing it again in hpet.c is kind of like double dipping. I
think we can validly do some improvements in hpet.c however: e.g., by
exposing a per-cpu timer deadline range (deadline_start,deadline_end) and
setting up HPET to fire on the earliest deadline_end. Then broadcast to all
CPUs with deadline_start < NOW() when the HPET fires.
The deadline range would be computed as follows in timer.c, in the
!ts->overflow case in the softirq handler:
timer_deadline_start = start (i.e., same as timer_deadline today)
timer_deadline_end = end
In the (incredibly uncommon) ts->overflow case we can just set
timer_deadline_start=timer_deadline_end=timer_deadline.
Perhaps give that a go: I would find that more acceptable at least, but all
of this really depends on what benchmark improvements you are seeing.
-- Keir
On 09/12/2009 09:33, "Wei, Gang" <gang.wei@xxxxxxxxx> wrote:
> x86: add a range for hpet broadcast
>
> Apply a range timer like range to hpet broadcast, so that timer expires within
> timer_slop ns across idle CPUs are capable to be aligned to reduce hpet intrs,
> and save idle power.
>
> Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
>
> diff -r 7f611de6b93c xen/arch/x86/hpet.c
> --- a/xen/arch/x86/hpet.c Tue Dec 08 14:14:27 2009 +0000
> +++ b/xen/arch/x86/hpet.c Wed Dec 09 16:13:29 2009 +0800
> @@ -176,7 +176,7 @@ static void handle_hpet_broadcast(struct
> static void handle_hpet_broadcast(struct hpet_event_channel *ch)
> {
> cpumask_t mask;
> - s_time_t now, next_event;
> + s_time_t now, next_event, delay_event;
> int cpu;
>
> spin_lock_irq(&ch->lock);
> @@ -195,6 +195,16 @@ again:
> else if ( per_cpu(timer_deadline, cpu) < next_event )
> next_event = per_cpu(timer_deadline, cpu);
> }
> +
> + /* find the largest event in (next_event,next_event + timer_slop] */
> + delay_event = next_event;
> + for_each_cpu_mask(cpu, ch->cpumask)
> + {
> + if ( per_cpu(timer_deadline, cpu) > delay_event &&
> + per_cpu(timer_deadline, cpu) - timer_slop <= next_event )
> + delay_event = per_cpu(timer_deadline, cpu);
> + }
> + next_event = delay_event;
>
> /* wakeup the cpus which have an expired event. */
> evt_do_broadcast(mask);
> @@ -649,7 +659,7 @@ void hpet_broadcast_enter(void)
> cpu_set(cpu, ch->cpumask);
>
> /* reprogram if current cpu expire time is nearer */
> - if ( this_cpu(timer_deadline) < ch->next_event )
> + if ( this_cpu(timer_deadline) + timer_slop < ch->next_event )
> reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1);
>
> spin_unlock(&ch->lock);
> diff -r 7f611de6b93c xen/common/timer.c
> --- a/xen/common/timer.c Tue Dec 08 14:14:27 2009 +0000
> +++ b/xen/common/timer.c Wed Dec 09 16:13:29 2009 +0800
> @@ -25,7 +25,7 @@
> * We pull handlers off the timer list this far in future,
> * rather than reprogramming the time hardware.
> */
> -static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
> +unsigned int timer_slop __read_mostly = 50000; /* 50 us */
> integer_param("timer_slop", timer_slop);
>
> struct timers {
> diff -r 7f611de6b93c xen/include/xen/timer.h
> --- a/xen/include/xen/timer.h Tue Dec 08 14:14:27 2009 +0000
> +++ b/xen/include/xen/timer.h Wed Dec 09 16:13:29 2009 +0800
> @@ -125,6 +125,9 @@ extern int reprogram_timer(s_time_t time
> /* calculate the aligned first tick time for a given periodic timer */
> extern s_time_t align_timer(s_time_t firsttick, uint64_t period);
>
> +/* time slop for timer expiring */
> +extern unsigned int timer_slop;
> +
> #endif /* _TIMER_H_ */
>
> /*
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|