WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: Sync cpu/intel_cacheinfo.c with Linu

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: Sync cpu/intel_cacheinfo.c with Linux kernel
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 21 Jan 2008 10:50:10 -0800
Delivery-date: Mon, 21 Jan 2008 10:50:23 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1200920963 0
# Node ID f1947dddb5a05d7b03ec9c8d8d09fe4fd500c485
# Parent  60ee6f97cb19d24214f69627740b3a6719d056e4
x86: Sync cpu/intel_cacheinfo.c with Linux kernel
Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>
---
 xen/arch/x86/cpu/intel_cacheinfo.c |  347 ++++++++++++++++++++++++++++++++++---
 1 files changed, 321 insertions(+), 26 deletions(-)

diff -r 60ee6f97cb19 -r f1947dddb5a0 xen/arch/x86/cpu/intel_cacheinfo.c
--- a/xen/arch/x86/cpu/intel_cacheinfo.c        Mon Jan 21 13:08:44 2008 +0000
+++ b/xen/arch/x86/cpu/intel_cacheinfo.c        Mon Jan 21 13:09:23 2008 +0000
@@ -1,6 +1,16 @@
+/*
+ *      Routines to indentify caches on Intel CPU.
+ *
+ *      Changes:
+ *      Venkatesh Pallipadi    : Adding cache identification through cpuid(4)
+ *             Ashok Raj <ashok.raj@xxxxxxxxx>: Work with CPU hotplug 
infrastructure.
+ *     Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
+ */
+
 #include <xen/config.h>
 #include <xen/init.h>
 #include <xen/lib.h>
+#include <xen/errno.h>
 #include <asm/processor.h>
 
 #define LVL_1_INST     1
@@ -17,7 +27,7 @@ struct _cache_table
 };
 
 /* all the cache descriptor types we care about (no TLB or trace cache 
entries) */
-static struct _cache_table cache_table[] __devinitdata =
+static struct _cache_table cache_table[] __cpuinitdata =
 {
        { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
        { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
@@ -30,13 +40,23 @@ static struct _cache_table cache_table[]
        { 0x2c, LVL_1_DATA, 32 },       /* 8-way set assoc, 64 byte line size */
        { 0x30, LVL_1_INST, 32 },       /* 8-way set assoc, 64 byte line size */
        { 0x39, LVL_2,      128 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
+       { 0x3a, LVL_2,      192 },      /* 6-way set assoc, sectored cache, 64 
byte line size */
        { 0x3b, LVL_2,      128 },      /* 2-way set assoc, sectored cache, 64 
byte line size */
        { 0x3c, LVL_2,      256 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
+       { 0x3d, LVL_2,      384 },      /* 6-way set assoc, sectored cache, 64 
byte line size */
+       { 0x3e, LVL_2,      512 },      /* 4-way set assoc, sectored cache, 64 
byte line size */
        { 0x41, LVL_2,      128 },      /* 4-way set assoc, 32 byte line size */
        { 0x42, LVL_2,      256 },      /* 4-way set assoc, 32 byte line size */
        { 0x43, LVL_2,      512 },      /* 4-way set assoc, 32 byte line size */
        { 0x44, LVL_2,      1024 },     /* 4-way set assoc, 32 byte line size */
        { 0x45, LVL_2,      2048 },     /* 4-way set assoc, 32 byte line size */
+       { 0x46, LVL_3,      4096 },     /* 4-way set assoc, 64 byte line size */
+       { 0x47, LVL_3,      8192 },     /* 8-way set assoc, 64 byte line size */
+       { 0x49, LVL_3,      4096 },     /* 16-way set assoc, 64 byte line size 
*/
+       { 0x4a, LVL_3,      6144 },     /* 12-way set assoc, 64 byte line size 
*/
+       { 0x4b, LVL_3,      8192 },     /* 16-way set assoc, 64 byte line size 
*/
+       { 0x4c, LVL_3,     12288 },     /* 12-way set assoc, 64 byte line size 
*/
+       { 0x4d, LVL_3,     16384 },     /* 16-way set assoc, 64 byte line size 
*/
        { 0x60, LVL_1_DATA, 16 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
        { 0x66, LVL_1_DATA, 8 },        /* 4-way set assoc, sectored cache, 64 
byte line size */
        { 0x67, LVL_1_DATA, 16 },       /* 4-way set assoc, sectored cache, 64 
byte line size */
@@ -44,6 +64,7 @@ static struct _cache_table cache_table[]
        { 0x70, LVL_TRACE,  12 },       /* 8-way set assoc */
        { 0x71, LVL_TRACE,  16 },       /* 8-way set assoc */
        { 0x72, LVL_TRACE,  32 },       /* 8-way set assoc */
+       { 0x73, LVL_TRACE,  64 },       /* 8-way set assoc */
        { 0x78, LVL_2,    1024 },       /* 4-way set assoc, 64 byte line size */
        { 0x79, LVL_2,     128 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
        { 0x7a, LVL_2,     256 },       /* 8-way set assoc, sectored cache, 64 
byte line size */
@@ -60,15 +81,276 @@ static struct _cache_table cache_table[]
        { 0x00, 0, 0}
 };
 
-unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
+
+enum _cache_type
+{
+       CACHE_TYPE_NULL = 0,
+       CACHE_TYPE_DATA = 1,
+       CACHE_TYPE_INST = 2,
+       CACHE_TYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+       struct {
+               enum _cache_type        type:5;
+               unsigned int            level:3;
+               unsigned int            is_self_initializing:1;
+               unsigned int            is_fully_associative:1;
+               unsigned int            reserved:4;
+               unsigned int            num_threads_sharing:12;
+               unsigned int            num_cores_on_die:6;
+       } split;
+       u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+       struct {
+               unsigned int            coherency_line_size:12;
+               unsigned int            physical_line_partition:10;
+               unsigned int            ways_of_associativity:10;
+       } split;
+       u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+       struct {
+               unsigned int            number_of_sets:32;
+       } split;
+       u32 full;
+};
+
+struct _cpuid4_info {
+       union _cpuid4_leaf_eax eax;
+       union _cpuid4_leaf_ebx ebx;
+       union _cpuid4_leaf_ecx ecx;
+       unsigned long size;
+       cpumask_t shared_cpu_map;
+};
+
+unsigned short                 num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+   information to the user.  This makes some assumptions about the machine:
+   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+   In theory the TLBs could be reported as fake type (they are in "dummy").
+   Maybe later */
+union l1_cache {
+       struct {
+               unsigned line_size : 8;
+               unsigned lines_per_tag : 8;
+               unsigned assoc : 8;
+               unsigned size_in_kb : 8;
+       };
+       unsigned val;
+};
+
+union l2_cache {
+       struct {
+               unsigned line_size : 8;
+               unsigned lines_per_tag : 4;
+               unsigned assoc : 4;
+               unsigned size_in_kb : 16;
+       };
+       unsigned val;
+};
+
+union l3_cache {
+       struct {
+               unsigned line_size : 8;
+               unsigned lines_per_tag : 4;
+               unsigned assoc : 4;
+               unsigned res : 2;
+               unsigned size_encoded : 14;
+       };
+       unsigned val;
+};
+
+static const unsigned short assocs[] = {
+       [1] = 1, [2] = 2, [4] = 4, [6] = 8,
+       [8] = 16, [0xa] = 32, [0xb] = 48,
+       [0xc] = 64,
+       [0xf] = 0xffff // ??
+};
+
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+                      union _cpuid4_leaf_ebx *ebx,
+                      union _cpuid4_leaf_ecx *ecx)
+{
+       unsigned dummy;
+       unsigned line_size, lines_per_tag, assoc, size_in_kb;
+       union l1_cache l1i, l1d;
+       union l2_cache l2;
+       union l3_cache l3;
+       union l1_cache *l1 = &l1d;
+
+       eax->full = 0;
+       ebx->full = 0;
+       ecx->full = 0;
+
+       cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+       cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
+
+       switch (leaf) {
+       case 1:
+               l1 = &l1i;
+       case 0:
+               if (!l1->val)
+                       return;
+               assoc = l1->assoc;
+               line_size = l1->line_size;
+               lines_per_tag = l1->lines_per_tag;
+               size_in_kb = l1->size_in_kb;
+               break;
+       case 2:
+               if (!l2.val)
+                       return;
+               assoc = l2.assoc;
+               line_size = l2.line_size;
+               lines_per_tag = l2.lines_per_tag;
+               /* cpu_data has errata corrections for K7 applied */
+               size_in_kb = current_cpu_data.x86_cache_size;
+               break;
+       case 3:
+               if (!l3.val)
+                       return;
+               assoc = l3.assoc;
+               line_size = l3.line_size;
+               lines_per_tag = l3.lines_per_tag;
+               size_in_kb = l3.size_encoded * 512;
+               break;
+       default:
+               return;
+       }
+
+       eax->split.is_self_initializing = 1;
+       eax->split.type = types[leaf];
+       eax->split.level = levels[leaf];
+       if (leaf == 3)
+               eax->split.num_threads_sharing = current_cpu_data.x86_max_cores 
- 1;
+       else
+               eax->split.num_threads_sharing = 0;
+       eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+
+       if (assoc == 0xf)
+               eax->split.is_fully_associative = 1;
+       ebx->split.coherency_line_size = line_size - 1;
+       ebx->split.ways_of_associativity = assocs[assoc] - 1;
+       ebx->split.physical_line_partition = lines_per_tag - 1;
+       ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+               (ebx->split.ways_of_associativity + 1) - 1;
+}
+
+static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info 
*this_leaf)
+{
+       union _cpuid4_leaf_eax  eax;
+       union _cpuid4_leaf_ebx  ebx;
+       union _cpuid4_leaf_ecx  ecx;
+       unsigned                edx;
+
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               amd_cpuid4(index, &eax, &ebx, &ecx);
+       else
+               cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+       if (eax.split.type == CACHE_TYPE_NULL)
+               return -EIO; /* better error ? */
+
+       this_leaf->eax = eax;
+       this_leaf->ebx = ebx;
+       this_leaf->ecx = ecx;
+       this_leaf->size = (ecx.split.number_of_sets + 1) *
+               (ebx.split.coherency_line_size + 1) *
+               (ebx.split.physical_line_partition + 1) *
+               (ebx.split.ways_of_associativity + 1);
+       return 0;
+}
+
+static int __cpuinit find_num_cache_leaves(void)
+{
+       unsigned int            eax, ebx, ecx, edx;
+       union _cpuid4_leaf_eax  cache_eax;
+       int                     i = -1;
+
+       do {
+               ++i;
+               /* Do cpuid(4) loop to find out num_cache_leaves */
+               cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+               cache_eax.full = eax;
+       } while (cache_eax.split.type != CACHE_TYPE_NULL);
+       return i;
+}
+
+unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 {
        unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache 
sizes */
-
-       if (c->cpuid_level > 1) {
+       unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+       unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+       unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+
+       if (c->cpuid_level > 3) {
+               static int is_initialized;
+
+               if (is_initialized == 0) {
+                       /* Init num_cache_leaves from boot CPU */
+                       num_cache_leaves = find_num_cache_leaves();
+                       is_initialized++;
+               }
+
+               /*
+                * Whenever possible use cpuid(4), deterministic cache
+                * parameters cpuid leaf to find the cache details
+                */
+               for (i = 0; i < num_cache_leaves; i++) {
+                       struct _cpuid4_info this_leaf;
+
+                       int retval;
+
+                       retval = cpuid4_cache_lookup(i, &this_leaf);
+                       if (retval >= 0) {
+                               switch(this_leaf.eax.split.level) {
+                                   case 1:
+                                       if (this_leaf.eax.split.type ==
+                                                       CACHE_TYPE_DATA)
+                                               new_l1d = this_leaf.size/1024;
+                                       else if (this_leaf.eax.split.type ==
+                                                       CACHE_TYPE_INST)
+                                               new_l1i = this_leaf.size/1024;
+                                       break;
+                                   case 2:
+                                       new_l2 = this_leaf.size/1024;
+                                       num_threads_sharing = 1 + 
this_leaf.eax.split.num_threads_sharing;
+                                       index_msb = 
get_count_order(num_threads_sharing);
+                                       l2_id = c->apicid >> index_msb;
+                                       break;
+                                   case 3:
+                                       new_l3 = this_leaf.size/1024;
+                                       num_threads_sharing = 1 + 
this_leaf.eax.split.num_threads_sharing;
+                                       index_msb = 
get_count_order(num_threads_sharing);
+                                       l3_id = c->apicid >> index_msb;
+                                       break;
+                                   default:
+                                       break;
+                               }
+                       }
+               }
+       }
+       /*
+        * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+        * trace cache
+        */
+       if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
                /* supports eax=2  call */
                int i, j, n;
                int regs[4];
                unsigned char *dp = (unsigned char *)regs;
+               int only_trace = 0;
+
+               if (num_cache_leaves != 0 && c->x86 == 15)
+                       only_trace = 1;
 
                /* Number of times to iterate */
                n = cpuid_eax(2) & 0xFF;
@@ -90,6 +372,8 @@ unsigned int __devinit init_intel_cachei
                                while (cache_table[k].descriptor != 0)
                                {
                                        if (cache_table[k].descriptor == des) {
+                                               if (only_trace && 
cache_table[k].cache_type != LVL_TRACE)
+                                                       break;
                                                switch 
(cache_table[k].cache_type) {
                                                case LVL_1_INST:
                                                        l1i += 
cache_table[k].size;
@@ -115,28 +399,39 @@ unsigned int __devinit init_intel_cachei
                                }
                        }
                }
-
-               if ( trace )
-                       printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
-               else if ( l1i )
-                       printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
-               if ( l1d )
-                       printk(", L1 D cache: %dK\n", l1d);
-               else
-                       printk("\n");
-               if ( l2 )
-                       printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-               if ( l3 )
-                       printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
-               /*
-                * This assumes the L3 cache is shared; it typically lives in
-                * the northbridge.  The L1 caches are included by the L2
-                * cache, and so should not be included for the purpose of
-                * SMP switching weights.
-                */
-               c->x86_cache_size = l2 ? l2 : (l1i+l1d);
-       }
+       }
+
+       if (new_l1d)
+               l1d = new_l1d;
+
+       if (new_l1i)
+               l1i = new_l1i;
+
+       if (new_l2) {
+               l2 = new_l2;
+       }
+
+       if (new_l3) {
+               l3 = new_l3;
+       }
+
+       if (trace)
+               printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+       else if ( l1i )
+               printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+
+       if (l1d)
+               printk(", L1 D cache: %dK\n", l1d);
+       else
+               printk("\n");
+
+       if (l2)
+               printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+
+       if (l3)
+               printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+       c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 
        return l2;
 }

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: Sync cpu/intel_cacheinfo.c with Linux kernel, Xen patchbot-unstable <=