>>> On 19.08.11 at 11:39, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:
>> From: Jan Beulich [mailto:JBeulich@xxxxxxxxxx]
>> Sent: Friday, August 19, 2011 5:32 PM
>>
>> >>> On 19.08.11 at 10:34, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:
>> >> From: Jan Beulich [mailto:JBeulich@xxxxxxxxxx]
>> >> Sent: Friday, August 19, 2011 4:11 PM
>> >>
>> >> >>> On 19.08.11 at 03:31, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:
>> >> > OK, to avoid the regression, if it's really cared, then we may change
>> >> > Xen
>> >> > to support entering C-state by mwait with interrupt enabled.
>> >>
>> >> I don't think that's worth it.
>> >>
>> >> > But the next question is whether it's really worthy of enabling Xen PM
>> >> > such way:
>> >> > - I think native Linux only supports mwait with
>> >> > break-on-interrupt
>> >> > extension too. You may confirm on such machines which I think should
>> >> > have no C2/C3 available. It's less likely for a customer to try PM on a
>> >> > platform where native Linux fails to do that
>> >>
>> >> Looking at the code, I can't see why Linux wouldn't use the I/O method
>> >> in this case instead.
>> >
>> > acpi_processor_ffh_cstate_probe_cpu:
>> > /* mwait ecx extensions INTERRUPT_BREAK should be supported
>> for
>> > C2/C3 */
>> > if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
>> > !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
>> > retval = -1;
>> > goto out;
>> > }
>> >
>> > arch_acpi_set_pdc_bits:
>> > /*
>> > * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
>> > */
>> > if (!cpu_has(c, X86_FEATURE_MWAIT))
>> > buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
>> >
>>
>> Right, this precludes the use of MWAIT, but it doesn't preclude using
>> the I/O method.
>
> In your special machine which has mwait but no break-on-interrupt extension:
>
> arch_acpi_set_pdc_bits tells BIOS that mwait should be used.
>
> BIOS then report _CST table with each entry filled with mwait style info
>
> later when Linux verifies _CST entry by acpi_processor_ffh_cstate_probe_cpu,
> it simply fails. No fallback to i/o method as BIOS is only notified in ACPI
> init time.
Oh, right, I just stumbled across that too (and didn't pay close enough
attention before). The function really should be checking for the
extension bits.
>>
>> >>
>> >> > - using mwait with interrupt enabled lacks the trace capability,
>> >> > while w/o trace I don't think any customer would enable Xen PM w/o a
>> >> > verification process.
>> >> >
>> >> > Another approach, if we really want to keep original I/O style, is to
>> >> > reveal Xen's mwait related conditions in shared info page, say a simple
>> >> > flag to indicate whether mwait bit should be set by pvops cpuid hook.
>> >> > Xen will check mwait extension earlier before dom0 is launched, instead
>> >> > of current point where dom0 registers cx info. This way there's no Xen
>> >> > implementation detail encoded in dom0, while concerned regression
>> >> > could be removed.
>> >>
>> >> The concept sounds reasonable, just that the shared info page probably
>> >> isn't the right mechanism (after all this is Dom0-only information that
>> >> we want to expose). A new platform sub-hypercall would probably be
>> >> the better route.
>> >>
>> >
>> > yes, that's a better choice.
>>
>> Yet another idea - why don't we simply pass the buffer passed to
>> arch_acpi_set_pdc_bits() down to Xen, rather than fiddling with the
>> bits in Dom0? That would at once allow to not set ACPI_PDC_T_FFH
>> (which I don't think Xen really supports at present).
>>
>> Or really, depending on who controls what, the P, C, and T bits should
>> be set by either Dom0 or Xen (so e.g. let Dom0 do what it currently
>> does, and then let Xen override the bits it ought to control).
>
> _PDC is encoded in AML language, and requires an ACPI parser which
> is one thing we avoid in Xen. If Xen want to override those bits, then
> whole ACPI component needs move down to Xen too.
No, I'm not saying the evaluation should be happening there. Below is
a draft hypervisor patch (only compile tested so far).
Jan
--- a/xen/arch/ia64/linux-xen/acpi.c
+++ b/xen/arch/ia64/linux-xen/acpi.c
@@ -241,6 +241,12 @@ int get_cpu_id(u32 acpi_id)
return -1;
}
+
+int arch_acpi_set_pdc_bits(u32 *pdc, u32 mask)
+{
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SMP & mask;
+}
+
#endif
static int __init
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -643,12 +643,6 @@ static int cpuidle_init_cpu(int cpu)
return 0;
}
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
-
-#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
-
#define MWAIT_SUBSTATE_MASK (0xf)
#define MWAIT_SUBSTATE_SIZE (4)
--- a/xen/arch/x86/acpi/lib.c
+++ b/xen/arch/x86/acpi/lib.c
@@ -81,3 +81,31 @@ unsigned int acpi_get_processor_id(unsig
return INVALID_ACPIID;
}
+
+int arch_acpi_set_pdc_bits(u32 *pdc, u32 mask)
+{
+ u32 ecx;
+
+ pdc[2] |= ACPI_PDC_C_CAPABILITY_SMP & mask;
+
+ if (boot_cpu_has(X86_FEATURE_EST))
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP & mask;
+
+ if (boot_cpu_has(X86_FEATURE_ACPI))
+ pdc[2] |= ACPI_PDC_T_FFH & mask;
+
+ /*
+ * If mwait/monitor or its break-on-interrupt extension are
+ * unsupported, Cx_FFH will be disabled.
+ */
+ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
+ boot_cpu_data.cpuid_level >= CPUID_MWAIT_LEAF)
+ ecx = cpuid_ecx(CPUID_MWAIT_LEAF);
+ else
+ ecx = 0;
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH);
+
+ return 0;
+}
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -416,6 +416,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
ret = -EINVAL;
break;
+ case XEN_PM_PDC:
+ if ( op->u.set_pminfo.id + 1 )
+ ret = -EINVAL;
+ else
+ {
+ XEN_GUEST_HANDLE(uint32) pdc;
+
+ guest_from_compat_handle(pdc, op->u.set_pminfo.u.pdc);
+ ret = acpi_set_pdc_bits(pdc);
+ }
+ break;
+
default:
ret = -EINVAL;
break;
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -517,3 +517,37 @@ int do_pm_op(struct xen_sysctl_pm_op *op
return ret;
}
+
+int acpi_set_pdc_bits(XEN_GUEST_HANDLE(uint32) pdc)
+{
+ u32 bits[3];
+ int ret;
+
+ if ( copy_from_guest(bits, pdc, 2) )
+ ret = -EFAULT;
+ else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
+ ret = -EINVAL;
+ else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
+ ret = -EFAULT;
+ else
+ {
+ u32 mask = 0;
+
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
+ mask |= ACPI_PDC_C_BITS | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
+ mask |= ACPI_PDC_P_BITS | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
+ mask |= ACPI_PDC_T_BITS | ACPI_PDC_SMP_C1PT;
+printk("PDC: %04x (mask %04x)", bits[2], mask);//temp
+ bits[2] &= ACPI_PDC_C_BITS & ACPI_PDC_P_BITS & ACPI_PDC_T_BITS &
+ ACPI_PDC_SMP_C1PT & ~mask;
+printk(" -> %04x", bits[2]);//temp
+ ret = arch_acpi_set_pdc_bits(bits, mask);
+printk(" -> %04x (%d)\n", bits[2], ret);//temp
+ }
+ if ( !ret )
+ ret = copy_to_guest_offset(pdc, 2, bits + 2, 1);
+
+ return ret;
+}
--- a/xen/include/acpi/pdc_intel.h
+++ b/xen/include/acpi/pdc_intel.h
@@ -4,6 +4,8 @@
#ifndef __PDC_INTEL_H__
#define __PDC_INTEL_H__
+#define ACPI_PDC_REVISION_ID 1
+
#define ACPI_PDC_P_FFH (0x0001)
#define ACPI_PDC_C_C1_HALT (0x0002)
#define ACPI_PDC_T_FFH (0x0004)
@@ -14,6 +16,7 @@
#define ACPI_PDC_SMP_T_SWCOORD (0x0080)
#define ACPI_PDC_C_C1_FFH (0x0100)
#define ACPI_PDC_C_C2C3_FFH (0x0200)
+#define ACPI_PDC_SMP_P_HWCOORD (0x0800)
#define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
@@ -22,6 +25,7 @@
#define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD | \
ACPI_PDC_P_FFH)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
@@ -30,4 +34,17 @@
ACPI_PDC_C_C1_FFH | \
ACPI_PDC_C_C2C3_FFH)
+#define ACPI_PDC_C_BITS (ACPI_PDC_C_C1_HALT | \
+ ACPI_PDC_C_C1_FFH | \
+ ACPI_PDC_SMP_C2C3 | \
+ ACPI_PDC_SMP_C_SWCOORD | \
+ ACPI_PDC_C_C2C3_FFH)
+
+#define ACPI_PDC_P_BITS (ACPI_PDC_P_FFH | \
+ ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD)
+
+#define ACPI_PDC_T_BITS (ACPI_PDC_T_FFH | \
+ ACPI_PDC_SMP_T_SWCOORD)
+
#endif /* __PDC_INTEL_H__ */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -152,6 +152,10 @@
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
#define cpufeat_mask(idx) (1u << ((idx) & 31))
+#define CPUID_MWAIT_LEAF 5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+#define CPUID5_ECX_INTERRUPT_BREAK 0x2
+
#ifdef __i386__
#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -304,6 +304,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletim
#define XEN_PM_CX 0
#define XEN_PM_PX 1
#define XEN_PM_TX 2
+#define XEN_PM_PDC 3
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -401,6 +402,7 @@ struct xenpf_set_processor_pminfo {
union {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
} u;
};
typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -358,6 +358,9 @@ static inline unsigned int acpi_get_csta
static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
#endif
+int acpi_set_pdc_bits(XEN_GUEST_HANDLE(uint32));
+int arch_acpi_set_pdc_bits(u32 *, u32 mask);
+
#ifdef CONFIG_ACPI_NUMA
int acpi_get_pxm(acpi_handle handle);
#else
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|