[Xen-devel] [patch 40/44] xen: Place vcpu_info structure into pe

An experimental patch for Xen allows guests to place their vcpu_info
structs anywhere.  We try to use this to place the vcpu_info into the
PDA, which allows direct access.

If this works, then switch to using direct access operations for
irq_enable, disable, save_fl and restore_fl.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
Cc: Chris Wright <chrisw@xxxxxxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 arch/i386/xen/enlighten.c    |  156 ++++++++++++++++++++++++++++++++++++++++--
 arch/i386/xen/setup.c        |    8 --
 arch/i386/xen/smp.c          |    5 -
 arch/i386/xen/xen-ops.h      |    2 
 include/xen/interface/vcpu.h |   13 +++
 5 files changed, 166 insertions(+), 18 deletions(-)

===================================================================
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -61,9 +61,63 @@ struct start_info *xen_start_info;
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
 
-void xen_vcpu_setup(int cpu)
-{
+static /* __initdata */ struct shared_info dummy_shared_info;
+
+/*
+ * Point at some empty memory to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
+
+/*
+ * Flag to determine whether vcpu info placement is available on all
+ * VCPUs.  We assume it is to start with, and then set it to zero on
+ * the first failure.  This is because it can succeed on some VCPUs
+ * and not others, since it can involve hypervisor memory allocation,
+ * or because the guest failed to guarantee all the appropriate
+ * constraints on all VCPUs (ie buffer can't cross a page boundary).
+ *
+ * Note that any particular CPU may be using a placed vcpu structure,
+ * but we can only optimise if the all are.
+ *
+ * 0: not available, 1: available
+ */
+static int have_vcpu_info_placement = 1;
+
+static void __init xen_vcpu_setup(int cpu)
+{
+       struct vcpu_register_vcpu_info info;
+       int err;
+       struct vcpu_info *vcpup;
+
        per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+
+       if (!have_vcpu_info_placement)
+               return;         /* already tested, not available */
+
+       vcpup = &per_cpu(xen_vcpu_info, cpu);
+
+       info.mfn = virt_to_mfn(vcpup);
+       info.offset = offset_in_page(vcpup);
+
+       printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset 
%d\n",
+              cpu, vcpup, info.mfn, info.offset);
+
+       /* Check to see if the hypervisor will put the vcpu_info
+          structure where we want it, which allows direct access via
+          a percpu-variable. */
+       err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+
+       if (err) {
+               printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
+               have_vcpu_info_placement = 0;
+       } else {
+               /* This cpu is using the registered vcpu info, even if
+                  later ones fail to. */
+               per_cpu(xen_vcpu, cpu) = vcpup;
+               printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
+                      cpu, vcpup);
+       }
 }
 
 static void __init xen_banner(void)
@@ -123,6 +177,20 @@ static unsigned long xen_save_fl(void)
        return (-flags) & X86_EFLAGS_IF;
 }
 
+static unsigned long xen_save_fl_direct(void)
+{
+       unsigned long flags;
+
+       /* flag has opposite sense of mask */
+       flags = !x86_read_percpu(xen_vcpu_info.evtchn_upcall_mask);
+
+       /* convert to IF type flag
+          -0 -> 0x00000000
+          -1 -> 0xffffffff
+       */
+       return (-flags) & X86_EFLAGS_IF;
+}
+
 static void xen_restore_fl(unsigned long flags)
 {
        struct vcpu_info *vcpu;
@@ -149,6 +217,25 @@ static void xen_restore_fl(unsigned long
        }
 }
 
+static void xen_restore_fl_direct(unsigned long flags)
+{
+       /* convert from IF type flag */
+       flags = !(flags & X86_EFLAGS_IF);
+
+       /* This is an atomic update, so no need to worry about
+          preemption. */
+       x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, flags);
+
+       /* If we get preempted here, then any pending event will be
+          handled anyway. */
+
+       if (flags == 0) {
+               barrier(); /* unmask then check (avoid races) */
+               if 
(unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending)))
+                       force_evtchn_callback();
+       }
+}
+
 static void xen_irq_disable(void)
 {
        /* There's a one instruction preempt window here.  We need to
@@ -157,6 +244,12 @@ static void xen_irq_disable(void)
        preempt_disable();
        x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
        preempt_enable_no_resched();
+}
+
+static void xen_irq_disable_direct(void)
+{
+       /* Atomic update, so preemption not a concern. */
+       x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 1);
 }
 
 static void xen_irq_enable(void)
@@ -176,6 +269,19 @@ static void xen_irq_enable(void)
 
        barrier(); /* unmask then check (avoid races) */
        if (unlikely(vcpu->evtchn_upcall_pending))
+               force_evtchn_callback();
+}
+
+static void xen_irq_enable_direct(void)
+{
+       /* Atomic update, so preemption not a concern. */
+       x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 0);
+
+       /* Doesn't matter if we get preempted here, because any
+          pending event will get dealt with anyway. */
+
+       barrier(); /* unmask then check (avoid races) */
+       if (unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending)))
                force_evtchn_callback();
 }
 
@@ -551,9 +657,19 @@ static void xen_flush_tlb_others(const c
        xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
+static void xen_write_cr2(unsigned long cr2)
+{
+       x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
+}
+
 static unsigned long xen_read_cr2(void)
 {
        return x86_read_percpu(xen_vcpu)->arch.cr2;
+}
+
+static unsigned long xen_read_cr2_direct(void)
+{
+       return x86_read_percpu(xen_vcpu_info.arch.cr2);
 }
 
 static void xen_write_cr4(unsigned long cr4)
@@ -753,8 +869,27 @@ static __init void xen_pagetable_setup_d
                if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
                        BUG();
        }
-
-       xen_vcpu_setup(smp_processor_id());
+}
+
+/* This is called once we have the cpu_possible_map */
+void __init xen_setup_vcpu_info_placement(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               xen_vcpu_setup(cpu);
+
+       /* xen_vcpu_setup managed to place the vcpu_info within the
+          percpu area for all cpus, so make use of it */
+       if (have_vcpu_info_placement) {
+               printk(KERN_INFO "Xen: using vcpu_info placement\n");
+
+               paravirt_ops.save_fl = xen_save_fl_direct;
+               paravirt_ops.restore_fl = xen_restore_fl_direct;
+               paravirt_ops.irq_disable = xen_irq_disable_direct;
+               paravirt_ops.irq_enable = xen_irq_enable_direct;
+               paravirt_ops.read_cr2 = xen_read_cr2_direct;
+       }
 }
 
 static const struct paravirt_ops xen_paravirt_ops __initdata = {
@@ -788,7 +923,7 @@ static const struct paravirt_ops xen_par
        .write_cr0 = native_write_cr0,
 
        .read_cr2 = xen_read_cr2,
-       .write_cr2 = native_write_cr2,
+       .write_cr2 = xen_write_cr2,
 
        .read_cr3 = xen_read_cr3,
        .write_cr3 = xen_write_cr3,
@@ -974,7 +1109,16 @@ asmlinkage void __init xen_start_kernel(
        /* keep using Xen gdt for now; no urgent need to change it */
 
        x86_write_percpu(xen_cr3, __pa(pgd));
-       xen_vcpu_setup(0);
+
+#ifdef CONFIG_SMP
+       /* Don't do the full vcpu_info placement stuff until we have a
+          possible map. */
+       per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+#else
+       /* May as well do it now, since there's no good time to call
+          it later on UP. */
+       xen_setup_vcpu_info_placement();
+#endif
 
        paravirt_ops.kernel_rpl = 1;
        if (xen_feature(XENFEAT_supervisor_mode_kernel))
===================================================================
--- a/arch/i386/xen/setup.c
+++ b/arch/i386/xen/setup.c
@@ -23,14 +23,6 @@
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
-
-static __initdata struct shared_info init_shared;
-
-/*
- * Point at some empty memory to start with. We map the real shared_info
- * page as soon as fixmap is up and running.
- */
-struct shared_info *HYPERVISOR_shared_info = &init_shared;
 
 unsigned long *phys_to_machine_mapping;
 EXPORT_SYMBOL(phys_to_machine_mapping);
===================================================================
--- a/arch/i386/xen/smp.c
+++ b/arch/i386/xen/smp.c
@@ -142,8 +142,6 @@ void __init xen_smp_prepare_boot_cpu(voi
        BUG_ON(smp_processor_id() != 0);
        native_smp_prepare_boot_cpu();
 
-       xen_vcpu_setup(0);
-
        /* We've switched to the "real" per-cpu gdt, so make sure the
           old memory can be recycled */
        make_lowmem_page_readwrite(&per_cpu__gdt_page);
@@ -152,6 +150,8 @@ void __init xen_smp_prepare_boot_cpu(voi
                cpus_clear(cpu_sibling_map[cpu]);
                cpus_clear(cpu_core_map[cpu]);
        }
+
+       xen_setup_vcpu_info_placement();
 }
 
 void __init xen_smp_prepare_cpus(unsigned int max_cpus)
@@ -262,7 +262,6 @@ int __cpuinit xen_cpu_up(unsigned int cp
 
        init_gdt(cpu);
        per_cpu(current_task, cpu) = idle;
-       xen_vcpu_setup(cpu);
        irq_ctx_init(cpu);
        xen_setup_timer(cpu);
 
===================================================================
--- a/arch/i386/xen/xen-ops.h
+++ b/arch/i386/xen/xen-ops.h
@@ -38,7 +38,7 @@ static inline unsigned xen_get_lazy_mode
 
 void __init xen_fill_possible_map(void);
 
-void xen_vcpu_setup(int cpu);
+void __init xen_setup_vcpu_info_placement(void);
 void xen_smp_prepare_boot_cpu(void);
 void xen_smp_prepare_cpus(unsigned int max_cpus);
 int xen_cpu_up(unsigned int cpu);
===================================================================
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -151,4 +151,17 @@ struct vcpu_set_singleshot_timer {
 #define _VCPU_SSHOTTMR_future (0)
 #define VCPU_SSHOTTMR_future  (1U << _VCPU_SSHOTTMR_future)
 
+/*
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure.  This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ */
+#define VCPUOP_register_vcpu_info   10  /* arg == struct vcpu_info */
+struct vcpu_register_vcpu_info {
+    uint32_t mfn;               /* mfn of page to place vcpu_info */
+    uint32_t offset;            /* offset within page */
+};
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */

-- 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

[Xen-devel] [patch 40/44] xen: Place vcpu_info structure into per-cpu me