x86: Make assigned devices' interrupts delivery to right vcpu.
This patch targets for reducing IPIs when delivery VT-d's devices' intrs to
target vcpus. According to the experiments leveraging 10G Oplin NIC card, CPU
utilization can reduce 5%~6% and NIC's bandwidth keeps unchanged through
testings. And this patch can always benefit UP guests with MSI-capable devices
assigned and SMP guests whose lapic's destination mode is physical mode. And
also it can benefit SMP guests whose lapic's dest_mode is logical mode but only
one destination is specified. So it should cover major cases in real
environment. Currenlty, this patch intercepts the programming for MSI intr
status, and caculate the destination id for the pirq when do the programming in
advance. When vcpu migration occurs or guest re-programe MSI status, it
checks that whether needs to set the corresponding pirq's affinity of assigned
devices and make vcpu's affinity and pirq's one consistent to reduce the IPIs
eventually.
Signed-off-by : Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
diff -r 3a71e070e3c5 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/arch/x86/hvm/hvm.c Wed Sep 23 09:50:30 2009 +0800
@@ -211,6 +211,35 @@ void hvm_migrate_timers(struct vcpu *v)
pt_migrate(v);
}
+void hvm_migrate_pirqs(struct vcpu *v)
+{
+ int pirq, irq;
+ struct irq_desc *desc;
+ struct domain *d = v->domain;
+ struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+
+ if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
+ return;
+
+ spin_lock(&d->event_lock);
+ for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
+ pirq < d->nr_pirqs;
+ pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) )
+ {
+ if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) ||
+ (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) )
+ continue;
+ desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
+ if (!desc)
+ continue;
+ irq = desc - irq_desc;
+ ASSERT(MSI_IRQ(irq));
+ desc->handler->set_affinity(irq, *cpumask_of(v->processor));
+ spin_unlock_irq(&desc->lock);
+ }
+ spin_unlock(&d->event_lock);
+}
+
void hvm_do_resume(struct vcpu *v)
{
ioreq_t *p;
diff -r 3a71e070e3c5 xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/arch/x86/hvm/vmsi.c Wed Sep 23 09:50:15 2009 +0800
@@ -124,6 +124,29 @@ int vmsi_deliver(struct domain *d, int p
return 1;
}
+/* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
+{
+ int dest_vcpu_id = -1, w = 0;
+ struct vcpu *v;
+
+ if ( d->max_vcpus == 1 )
+ return 0;
+
+ for_each_vcpu ( d, v )
+ {
+ if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
+ {
+ w++;
+ dest_vcpu_id = v->vcpu_id;
+ }
+ }
+ if ( w > 1 )
+ return -1;
+
+ return dest_vcpu_id;
+}
+
/* MSI-X mask bit hypervisor interception */
struct msixtbl_entry
{
diff -r 3a71e070e3c5 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Sep 22 22:16:47 2009 +0800
@@ -940,6 +940,7 @@ void vmx_do_resume(struct vcpu *v)
vmx_clear_vmcs(v);
vmx_load_vmcs(v);
hvm_migrate_timers(v);
+ hvm_migrate_pirqs(v);
vmx_set_host_env(v);
vpid_sync_vcpu_all(v);
}
diff -r 3a71e070e3c5 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/arch/x86/irq.c Tue Sep 22 21:45:08 2009 +0800
@@ -1517,7 +1517,7 @@ static void dump_irqs(unsigned char key)
/* Only show CPU0 - CPU31's affinity info.*/
printk(" IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s"
" status=%08x mapped, unbound\n",
- irq, *(int*)cfg->domain.bits, cfg->vector,
+ irq, *(int*)desc->affinity.bits, cfg->vector,
desc->handler->typename, desc->status);
else
{
@@ -1525,7 +1525,7 @@ static void dump_irqs(unsigned char key)
printk(" IRQ:%4d, IRQ affinity:0x%08x, Vec:%3d type=%-15s "
"status=%08x in-flight=%d domain-list=",
- irq, *(int*)cfg->domain.bits, cfg->vector,
+ irq, *(int*)desc->affinity.bits, cfg->vector,
desc->handler->typename, desc->status, action->in_flight);
for ( i = 0; i < action->nr_guests; i++ )
diff -r 3a71e070e3c5 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/drivers/passthrough/io.c Wed Sep 23 09:51:21 2009 +0800
@@ -139,8 +139,10 @@ int pt_irq_create_bind_vtd(
bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs);
memset(hvm_irq_dpci->hvm_timer, 0,
nr_irqs * sizeof(*hvm_irq_dpci->hvm_timer));
- for ( int i = 0; i < d->nr_pirqs; i++ )
+ for ( int i = 0; i < d->nr_pirqs; i++ ) {
INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
+ hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1;
+ }
for ( int i = 0; i < NR_HVM_IRQS; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
@@ -154,6 +156,8 @@ int pt_irq_create_bind_vtd(
if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
{
+ uint8_t dest, dest_mode;
+ int dest_vcpu_id;
if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
{
@@ -195,6 +199,14 @@ int pt_irq_create_bind_vtd(
hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
}
+ /* Caculate dest_vcpu_id for MSI-type pirq migration */
+ dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
+ dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
+ dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
+ hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id;
+ spin_unlock(&d->event_lock);
+ if ( dest_vcpu_id >= 0 )
+ hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
}
else
{
@@ -278,8 +290,8 @@ int pt_irq_create_bind_vtd(
gdprintk(XENLOG_INFO VTDPREFIX,
"VT-d irq bind: m_irq = %x device = %x intx = %x\n",
machine_gsi, device, intx);
- }
- spin_unlock(&d->event_lock);
+ spin_unlock(&d->event_lock);
+ }
return 0;
}
diff -r 3a71e070e3c5 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/include/asm-x86/hvm/hvm.h Tue Sep 22 22:17:18 2009 +0800
@@ -157,6 +157,8 @@ void hvm_set_guest_time(struct vcpu *v,
void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
u64 hvm_get_guest_time(struct vcpu *v);
+int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
+
#define hvm_paging_enabled(v) \
(!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
#define hvm_wp_enabled(v) \
@@ -230,6 +232,7 @@ void hvm_cpuid(unsigned int input, unsig
unsigned int *ecx, unsigned int *edx);
void hvm_migrate_timers(struct vcpu *v);
void hvm_do_resume(struct vcpu *v);
+void hvm_migrate_pirqs(struct vcpu *v);
static inline void
hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
diff -r 3a71e070e3c5 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/include/asm-x86/irq.h Tue Sep 22 20:37:52 2009 +0800
@@ -112,6 +112,8 @@ struct irq_desc;
struct irq_desc;
extern void irq_complete_move(struct irq_desc **descp);
+extern struct irq_desc *irq_desc;
+
void lock_vector_lock(void);
void unlock_vector_lock(void);
diff -r 3a71e070e3c5 xen/include/xen/hvm/irq.h
--- a/xen/include/xen/hvm/irq.h Fri Sep 18 14:45:40 2009 +0100
+++ b/xen/include/xen/hvm/irq.h Wed Sep 23 09:49:04 2009 +0800
@@ -50,6 +50,7 @@ struct hvm_gmsi_info {
struct hvm_gmsi_info {
uint32_t gvec;
uint32_t gflags;
+ int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
};
struct hvm_mirq_dpci_mapping {
make_interrupts_delivery_to_right_vcpu.patch
Description: make_interrupts_delivery_to_right_vcpu.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|