# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1274192025 -3600
# Node ID 8d7eba5be1d83f80ee2f4633d5ad7d00c4ef55a7
# Parent c1b6647c682816f30f2b6c13da62c3b6cc617ff7
x86: Dynamically allocate percpu data area when a CPU comes online.
At the same time, the data area starts life zeroed.
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
xen/arch/x86/Makefile | 1
xen/arch/x86/irq.c | 9 +--
xen/arch/x86/nmi.c | 2
xen/arch/x86/percpu.c | 69 ++++++++++++++++++++++++
xen/arch/x86/setup.c | 75 +--------------------------
xen/arch/x86/smpboot.c | 2
xen/arch/x86/tboot.c | 10 +--
xen/arch/x86/traps.c | 10 +++
xen/arch/x86/x86_32/supervisor_mode_kernel.S | 2
xen/arch/x86/xen.lds.S | 31 +++--------
xen/common/rcupdate.c | 2
xen/common/sched_credit.c | 2
xen/drivers/cpufreq/utility.c | 4 +
xen/include/asm-x86/percpu.h | 11 ++-
xen/include/asm-x86/x86_32/asm_defns.h | 2
xen/include/asm-x86/x86_64/asm_defns.h | 2
xen/xsm/flask/avc.c | 2
17 files changed, 119 insertions(+), 117 deletions(-)
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/Makefile Tue May 18 15:13:45 2010 +0100
@@ -36,6 +36,7 @@ obj-y += nmi.o
obj-y += nmi.o
obj-y += numa.o
obj-y += pci.o
+obj-y += percpu.o
obj-y += physdev.o
obj-y += setup.o
obj-y += shutdown.o
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/irq.c Tue May 18 15:13:45 2010 +0100
@@ -50,9 +50,7 @@ static struct timer *__read_mostly irq_g
static DEFINE_SPINLOCK(vector_lock);
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
- [0 ... NR_VECTORS - 1] = -1
-};
+DEFINE_PER_CPU(vector_irq_t, vector_irq);
DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
@@ -269,7 +267,10 @@ int init_irq_data(void)
{
struct irq_desc *desc;
struct irq_cfg *cfg;
- int irq;
+ int irq, vector;
+
+ for (vector = 0; vector < NR_VECTORS; ++vector)
+ this_cpu(vector_irq)[vector] = -1;
irq_desc = xmalloc_array(struct irq_desc, nr_irqs);
irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs);
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/nmi.c
--- a/xen/arch/x86/nmi.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/nmi.c Tue May 18 15:13:45 2010 +0100
@@ -230,7 +230,7 @@ static inline void write_watchdog_counte
do_div(count, nmi_hz);
if(descr)
- Dprintk("setting %s to -0x%08Lx\n", descr, count);
+ Dprintk("setting %s to -0x%"PRIx64"\n", descr, count);
wrmsrl(nmi_perfctr_msr, 0 - count);
}
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/percpu.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/percpu.c Tue May 18 15:13:45 2010 +0100
@@ -0,0 +1,69 @@
+#include <xen/config.h>
+#include <xen/percpu.h>
+#include <xen/cpu.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+
+unsigned long __per_cpu_offset[NR_CPUS];
+#define INVALID_PERCPU_AREA (-(long)__per_cpu_start)
+#define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start))
+
+void __init percpu_init_areas(void)
+{
+ unsigned int cpu;
+ for ( cpu = 1; cpu < NR_CPUS; cpu++ )
+ __per_cpu_offset[cpu] = INVALID_PERCPU_AREA;
+}
+
+static int init_percpu_area(unsigned int cpu)
+{
+ char *p;
+ if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA )
+ return 0;
+ if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL )
+ return -ENOMEM;
+ memset(p, 0, __per_cpu_data_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = p - __per_cpu_start;
+ return 0;
+}
+
+static void free_percpu_area(unsigned int cpu)
+{
+ char *p = __per_cpu_start + __per_cpu_offset[cpu];
+ free_xenheap_pages(p, PERCPU_ORDER);
+ __per_cpu_offset[cpu] = INVALID_PERCPU_AREA;
+}
+
+static int cpu_percpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ rc = init_percpu_area(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ free_percpu_area(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block cpu_percpu_nfb = {
+ .notifier_call = cpu_percpu_callback,
+ .priority = 100 /* highest priority */
+};
+
+static int __init percpu_presmp_init(void)
+{
+ register_cpu_notifier(&cpu_percpu_nfb);
+ return 0;
+}
+presmp_initcall(percpu_presmp_init);
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/setup.c Tue May 18 15:13:45 2010 +0100
@@ -107,12 +107,6 @@ unsigned long __read_mostly xenheap_phys
unsigned long __read_mostly xenheap_phys_end;
#endif
-DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table) =
boot_cpu_gdt_table;
-#ifdef CONFIG_COMPAT
-DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table)
- = boot_cpu_compat_gdt_table;
-#endif
-
DEFINE_PER_CPU(struct tss_struct, init_tss);
char __attribute__ ((__section__(".bss.stack_aligned")))
cpu0_stack[STACK_SIZE];
@@ -192,46 +186,6 @@ static void free_xen_data(char *s, char
}
extern char __init_begin[], __init_end[], __bss_start[];
-extern char __per_cpu_start[], __per_cpu_data_end[];
-
-static void __init percpu_init_areas(void)
-{
- unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
-
- BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK);
- BUG_ON((unsigned long)__per_cpu_data_end & ~PAGE_MASK);
- BUG_ON(data_size > PERCPU_SIZE);
-
- /* Initialise per-cpu data area for all possible secondary CPUs. */
- for ( i = 1; i < NR_CPUS; i++ )
- memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
- __per_cpu_start,
- data_size);
-}
-
-static void __init percpu_free_unused_areas(void)
-{
- unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
- unsigned int first_unused;
-
- /* Find first 'impossible' secondary CPU. */
- for ( i = 1; i < NR_CPUS; i++ )
- if ( !cpu_possible(i) )
- break;
- first_unused = i;
-
- /* Check that there are no holes in cpu_possible_map. */
- for ( ; i < NR_CPUS; i++ )
- BUG_ON(cpu_possible(i));
-
- /* Free all unused per-cpu data areas. */
- free_xen_data(&__per_cpu_start[first_unused << PERCPU_SHIFT], __bss_start);
-
- if ( data_size != PERCPU_SIZE )
- for ( i = 0; i < first_unused; i++ )
- free_xen_data(&__per_cpu_start[(i << PERCPU_SHIFT) + data_size],
- &__per_cpu_start[(i+1) << PERCPU_SHIFT]);
-}
static void __init init_idle_domain(void)
{
@@ -1013,8 +967,6 @@ void __init __start_xen(unsigned long mb
init_apic_mappings();
- percpu_free_unused_areas();
-
init_IRQ();
xsm_init(&initrdidx, mbi, initial_images_start);
@@ -1200,7 +1152,7 @@ int xen_in_range(unsigned long mfn)
paddr_t start, end;
int i;
- enum { region_s3, region_text, region_percpu, region_bss, nr_regions };
+ enum { region_s3, region_text, region_bss, nr_regions };
static struct {
paddr_t s, e;
} xen_regions[nr_regions];
@@ -1214,10 +1166,6 @@ int xen_in_range(unsigned long mfn)
/* hypervisor code + data */
xen_regions[region_text].s =__pa(&_stext);
xen_regions[region_text].e = __pa(&__init_begin);
- /* per-cpu data */
- xen_regions[region_percpu].s = __pa(__per_cpu_start);
- xen_regions[region_percpu].e = xen_regions[region_percpu].s +
- (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
/* bss */
xen_regions[region_bss].s = __pa(&__bss_start);
xen_regions[region_bss].e = __pa(&_end);
@@ -1226,25 +1174,8 @@ int xen_in_range(unsigned long mfn)
start = (paddr_t)mfn << PAGE_SHIFT;
end = start + PAGE_SIZE;
for ( i = 0; i < nr_regions; i++ )
- {
- if ( (start >= xen_regions[i].e) || (end <= xen_regions[i].s) )
- continue;
-
- if ( i == region_percpu )
- {
- /*
- * Check if the given page falls into an unused (and therefore
- * freed) section of the per-cpu data space. Each CPU's data
- * area is page-aligned, so the following arithmetic is safe.
- */
- unsigned int off = ((start - __pa(__per_cpu_start))
- & (PERCPU_SIZE - 1));
- unsigned int data_sz = __per_cpu_data_end - __per_cpu_start;
- return off < data_sz;
- }
-
- return 1;
- }
+ if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
+ return 1;
return 0;
}
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/smpboot.c Tue May 18 15:13:45 2010 +0100
@@ -487,7 +487,7 @@ static int wakeup_secondary_cpu(int phys
for ( i = 0; i < num_starts; i++ )
{
- Dprintk("Sending STARTUP #%d.\n",j);
+ Dprintk("Sending STARTUP #%d.\n", i+1);
apic_read_around(APIC_SPIV);
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/tboot.c Tue May 18 15:13:45 2010 +0100
@@ -357,7 +357,7 @@ void tboot_shutdown(uint32_t shutdown_ty
/*
* Xen regions for tboot to MAC
*/
- g_tboot_shared->num_mac_regions = 4;
+ g_tboot_shared->num_mac_regions = 3;
/* S3 resume code (and other real mode trampoline code) */
g_tboot_shared->mac_regions[0].start = bootsym_phys(trampoline_start);
g_tboot_shared->mac_regions[0].size = bootsym_phys(trampoline_end) -
@@ -366,13 +366,9 @@ void tboot_shutdown(uint32_t shutdown_ty
g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext);
g_tboot_shared->mac_regions[1].size = __pa(&__init_begin) -
__pa(&_stext);
- /* per-cpu data */
- g_tboot_shared->mac_regions[2].start =
(uint64_t)__pa(&__per_cpu_start);
- g_tboot_shared->mac_regions[2].size =
- (((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
/* bss */
- g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start);
- g_tboot_shared->mac_regions[3].size = __pa(&_end) - __pa(&__bss_start);
+ g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__bss_start);
+ g_tboot_shared->mac_regions[2].size = __pa(&_end) - __pa(&__bss_start);
/*
* MAC domains and other Xen memory
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/traps.c Tue May 18 15:13:45 2010 +0100
@@ -82,6 +82,11 @@ string_param("nmi", opt_nmi);
string_param("nmi", opt_nmi);
DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr);
+
+DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table);
+#ifdef CONFIG_COMPAT
+DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
+#endif
/* Master table, used by CPU0. */
idt_entry_t idt_table[IDT_ENTRIES];
@@ -3290,6 +3295,11 @@ void __init trap_init(void)
/* CPU0 uses the master IDT. */
idt_tables[0] = idt_table;
+ this_cpu(gdt_table) = boot_cpu_gdt_table;
+#ifdef CONFIG_COMPAT
+ this_cpu(compat_gdt_table) = boot_cpu_compat_gdt_table;
+#endif
+
percpu_traps_init();
cpu_init();
diff -r c1b6647c6828 -r 8d7eba5be1d8
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Tue May 18 15:05:54
2010 +0100
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Tue May 18 15:13:45
2010 +0100
@@ -102,7 +102,7 @@ ENTRY(fixup_ring0_guest_stack)
movl $PER_CPU_GDT_ENTRY*8,%ecx
lsll %ecx,%ecx
- shll $PERCPU_SHIFT,%ecx
+ movl __per_cpu_offset(,%ecx,4),%ecx
addl $per_cpu__init_tss,%ecx
# Load Xen stack from TSS.
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/arch/x86/xen.lds.S
--- a/xen/arch/x86/xen.lds.S Tue May 18 15:05:54 2010 +0100
+++ b/xen/arch/x86/xen.lds.S Tue May 18 15:13:45 2010 +0100
@@ -59,7 +59,7 @@ SECTIONS
CONSTRUCTORS
} :text
- . = ALIGN(128);
+ . = ALIGN(SMP_CACHE_BYTES);
.data.read_mostly : {
*(.data.read_mostly)
} :text
@@ -71,7 +71,7 @@ SECTIONS
__lock_profile_end = .;
#endif
- . = ALIGN(4096); /* Init code and data */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
__init_begin = .;
.init.text : {
_sinittext = .;
@@ -99,33 +99,22 @@ SECTIONS
*(.xsm_initcall.init)
__xsm_initcall_end = .;
} :text
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(STACK_SIZE);
__init_end = .;
- __per_cpu_shift = PERCPU_SHIFT; /* kdump assist */
- .data.percpu : {
- __per_cpu_start = .;
- *(.data.percpu)
- . = ALIGN(SMP_CACHE_BYTES);
- *(.data.percpu.read_mostly)
- . = ALIGN(PAGE_SIZE);
- __per_cpu_data_end = .;
- } :text
- . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT);
- . = ALIGN(PAGE_SIZE);
-
- /*
- * Do not insert anything here - the unused portion of .data.percpu
- * will be freed/unmapped up to __bss_start (defined below).
- */
-
.bss : { /* BSS */
- . = ALIGN(STACK_SIZE);
__bss_start = .;
*(.bss.stack_aligned)
. = ALIGN(PAGE_SIZE);
*(.bss.page_aligned)
*(.bss)
+ . = ALIGN(SMP_CACHE_BYTES);
+ __per_cpu_start = .;
+ *(.bss.percpu)
+ . = ALIGN(SMP_CACHE_BYTES);
+ *(.bss.percpu.read_mostly)
+ . = ALIGN(SMP_CACHE_BYTES);
+ __per_cpu_data_end = .;
} :text
_end = . ;
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/common/rcupdate.c
--- a/xen/common/rcupdate.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/common/rcupdate.c Tue May 18 15:13:45 2010 +0100
@@ -53,7 +53,7 @@ struct rcu_ctrlblk rcu_ctrlblk = {
.cpumask = CPU_MASK_NONE,
};
-DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
+DEFINE_PER_CPU(struct rcu_data, rcu_data);
static int blimit = 10;
static int qhimark = 10000;
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/common/sched_credit.c Tue May 18 15:13:45 2010 +0100
@@ -231,7 +231,7 @@ static int opt_tickle_one_idle __read_mo
static int opt_tickle_one_idle __read_mostly = 1;
boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle);
-DEFINE_PER_CPU(unsigned int, last_tickle_cpu) = 0;
+DEFINE_PER_CPU(unsigned int, last_tickle_cpu);
static inline void
__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/drivers/cpufreq/utility.c
--- a/xen/drivers/cpufreq/utility.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/drivers/cpufreq/utility.c Tue May 18 15:13:45 2010 +0100
@@ -36,7 +36,7 @@ struct processor_pminfo *__read_mostly p
struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS];
-DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock) = SPIN_LOCK_UNLOCKED;
+DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock);
/*********************************************************************
* Px STATISTIC INFO *
@@ -94,6 +94,8 @@ int cpufreq_statistic_init(unsigned int
const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
spinlock_t *cpufreq_statistic_lock =
&per_cpu(cpufreq_statistic_lock, cpuid);
+
+ spin_lock_init(cpufreq_statistic_lock);
if ( !pmpt )
return -EINVAL;
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/percpu.h
--- a/xen/include/asm-x86/percpu.h Tue May 18 15:05:54 2010 +0100
+++ b/xen/include/asm-x86/percpu.h Tue May 18 15:13:45 2010 +0100
@@ -1,17 +1,20 @@
#ifndef __X86_PERCPU_H__
#define __X86_PERCPU_H__
-#define PERCPU_SHIFT 13
-#define PERCPU_SIZE (1UL << PERCPU_SHIFT)
+#ifndef __ASSEMBLY__
+extern char __per_cpu_start[], __per_cpu_data_end[];
+extern unsigned long __per_cpu_offset[NR_CPUS];
+void percpu_init_areas(void);
+#endif
/* Separate out the type, so (int[3], foo) works. */
#define __DEFINE_PER_CPU(type, name, suffix) \
- __attribute__((__section__(".data.percpu" #suffix))) \
+ __attribute__((__section__(".bss.percpu" #suffix))) \
__typeof__(type) per_cpu_##name
/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) \
- (*RELOC_HIDE(&per_cpu__##var, ((unsigned int)(cpu))<<PERCPU_SHIFT))
+ (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
#define __get_cpu_var(var) \
(per_cpu(var, smp_processor_id()))
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h Tue May 18 15:05:54 2010 +0100
+++ b/xen/include/asm-x86/x86_32/asm_defns.h Tue May 18 15:13:45 2010 +0100
@@ -82,7 +82,7 @@ 1: addl $4,%esp;
#define PERFC_INCR(_name,_idx,_cur) \
pushl _cur; \
movl VCPU_processor(_cur),_cur; \
- shll $PERCPU_SHIFT,_cur; \
+ movl __per_cpu_offset(,_cur,4),_cur; \
incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\
popl _cur
#else
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/include/asm-x86/x86_64/asm_defns.h
--- a/xen/include/asm-x86/x86_64/asm_defns.h Tue May 18 15:05:54 2010 +0100
+++ b/xen/include/asm-x86/x86_64/asm_defns.h Tue May 18 15:13:45 2010 +0100
@@ -68,7 +68,7 @@ 1: addq $8,%rsp;
movslq VCPU_processor(_cur),_cur; \
pushq %rdx; \
leaq per_cpu__perfcounters(%rip),%rdx; \
- shlq $PERCPU_SHIFT,_cur; \
+ movq __per_cpu_offset(,_cur,8),_cur; \
addq %rdx,_cur; \
popq %rdx; \
incl _name*4(_cur,_idx,4); \
diff -r c1b6647c6828 -r 8d7eba5be1d8 xen/xsm/flask/avc.c
--- a/xen/xsm/flask/avc.c Tue May 18 15:05:54 2010 +0100
+++ b/xen/xsm/flask/avc.c Tue May 18 15:13:45 2010 +0100
@@ -118,7 +118,7 @@ unsigned int avc_cache_threshold = AVC_D
unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
#ifdef FLASK_AVC_STATS
-DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 };
+DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
#endif
static struct avc_cache avc_cache;
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|