WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86/mca: simplify mce action, and some up

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86/mca: simplify mce action, and some update of mem err handler for
From: Xen patchbot-unstable <patchbot@xxxxxxx>
Date: Wed, 18 May 2011 07:25:12 +0100
Delivery-date: Tue, 17 May 2011 23:25:22 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1305547923 -3600
# Node ID 216c2efeb246041cc39e4dcb675368e7e3182b85
# Parent  f9bb0bbea7c27e256275cfe58c16c0077542984e
x86/mca: simplify mce action, and some update of mem err handler for
future SRAR extension

This patch simplify mce_action(). Basically the 2 set of mce status
flags MCA_... and MCER_... are highly functional similar. This patch
remove the redundant middle-level flag MCA_..., and its related data
structure and function, so that mce_action() logic is more clean and
simpler.  This patch also update mem err
handler. intel_memerr_dhandler() will be commonly used by SRAO and
SRAR, so for the extension of future SRAR case, this patch remove the
default fail flag from intel_memerr_dhandler() outside to SRAO/SRAR
level, since only SRAO/SRAR handler itself has the knowledge of how to
handle the failure when mem err handler fail.

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---


diff -r f9bb0bbea7c2 -r 216c2efeb246 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Thu May 12 16:42:54 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon May 16 13:12:03 2011 +0100
@@ -172,23 +172,14 @@
 static unsigned int __read_mostly mce_dhandler_num;
 static unsigned int __read_mostly mce_uhandler_num;
 
-enum mce_result
-{
-    MCER_NOERROR,
-    MCER_RECOVERED,
-    /* Not recoverd, but can continue */
-    MCER_CONTINUE,
-    MCER_RESET,
-};
-
 /* Maybe called in MCE context, no lock, no printk */
 static enum mce_result mce_action(struct cpu_user_regs *regs,
                       mctelem_cookie_t mctc)
 {
     struct mc_info *local_mi;
-    enum mce_result ret = MCER_NOERROR;
+    enum mce_result bank_result = MCER_NOERROR;
+    enum mce_result worst_result = MCER_NOERROR;
     struct mcinfo_common *mic = NULL;
-    struct mca_handle_result mca_res;
     struct mca_binfo binfo;
     const struct mca_error_handler *handlers = mce_dhandlers;
     unsigned int i, handler_num = mce_dhandler_num;
@@ -217,7 +208,7 @@
     /* Processing bank information */
     x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
 
-    for ( ; ret != MCER_RESET && mic && mic->size;
+    for ( ; bank_result != MCER_RESET && mic && mic->size;
           mic = x86_mcinfo_next(mic) )
     {
         if (mic->type != MC_TYPE_BANK) {
@@ -225,34 +216,20 @@
         }
         binfo.mib = (struct mcinfo_bank*)mic;
         binfo.bank = binfo.mib->mc_bank;
-        memset(&mca_res, 0x0f, sizeof(mca_res));
+        bank_result = MCER_NOERROR;
         for ( i = 0; i < handler_num; i++ ) {
             if (handlers[i].owned_error(binfo.mib->mc_status))
             {
-                handlers[i].recovery_handler(&binfo, &mca_res);
-
-                if (mca_res.result & MCA_OWNER)
-                    binfo.mib->mc_domid = mca_res.owner;
-
-                if (mca_res.result == MCA_NEED_RESET)
-                    ret = MCER_RESET;
-                else if (mca_res.result == MCA_RECOVERED)
-                {
-                    if (ret < MCER_RECOVERED)
-                        ret = MCER_RECOVERED;
-                }
-                else if (mca_res.result == MCA_NO_ACTION)
-                {
-                    if (ret < MCER_CONTINUE)
-                        ret = MCER_CONTINUE;
-                }
+                handlers[i].recovery_handler(&binfo, &bank_result);
+                if (worst_result < bank_result)
+                    worst_result = bank_result;
                 break;
             }
         }
         ASSERT(i != handler_num);
     }
 
-    return ret;
+    return worst_result;
 }
 
 /*
@@ -608,7 +585,7 @@
 
 static void intel_memerr_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     struct mcinfo_bank *bank = binfo->mib;
     struct mcinfo_global *global = binfo->mig;
@@ -618,7 +595,6 @@
     uint64_t mc_status, mc_misc;
 
     mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
-    result->result = MCA_NEED_RESET;
 
     mc_status = bank->mc_status;
     mc_misc = bank->mc_misc;
@@ -626,7 +602,6 @@
         !(mc_status & MCi_STATUS_MISCV) ||
         ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
     {
-        result->result |= MCA_NO_ACTION;
         dprintk(XENLOG_WARNING,
             "No physical address provided for memory error\n");
         return;
@@ -644,23 +619,19 @@
 
     /* This is free page */
     if (status & PG_OFFLINE_OFFLINED)
-        result->result = MCA_RECOVERED;
+        *result = MCER_RECOVERED;
     else if (status & PG_OFFLINE_PENDING) {
         /* This page has owner */
         if (status & PG_OFFLINE_OWNED) {
-            result->result |= MCA_OWNER;
-            result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
+            bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
             mce_printk(MCE_QUIET, "MCE: This error page is ownded"
-              " by DOM %d\n", result->owner);
-            /* Fill vMCE# injection and vMCE# MSR virtualization "
-             * "related data */
-            bank->mc_domid = result->owner;
+              " by DOM %d\n", bank->mc_domid);
             /* XXX: Cannot handle shared pages yet 
              * (this should identify all domains and gfn mapping to
              *  the mfn in question) */
-            BUG_ON( result->owner == DOMID_COW );
-            if ( result->owner != DOMID_XEN ) {
-                d = get_domain_by_id(result->owner);
+            BUG_ON( bank->mc_domid == DOMID_COW );
+            if ( bank->mc_domid != DOMID_XEN ) {
+                d = get_domain_by_id(bank->mc_domid);
                 ASSERT(d);
                 gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
 
@@ -683,7 +654,7 @@
                       global->mc_gstatus) == -1 )
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
-                      "failed\n", result->owner);
+                      "failed\n", bank->mc_domid);
                     goto vmce_failed;
                 }
 
@@ -699,7 +670,7 @@
                  * For xen, it has contained the error and finished
                  * its own recovery job.
                  */
-                result->result = MCA_RECOVERED;
+                *result = MCER_RECOVERED;
                 put_domain(d);
 
                 return;
@@ -718,11 +689,13 @@
 
 static void intel_srao_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
 
     /* For unknown srao error code, no action required */
+    *result = MCER_CONTINUE;
+
     if ( status & MCi_STATUS_VAL )
     {
         switch ( status & INTEL_MCCOD_MASK )
@@ -744,7 +717,7 @@
 
 static void intel_default_mce_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
     enum intel_mce_type type;
@@ -752,9 +725,9 @@
     type = intel_check_mce_type(status);
 
     if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
-        result->result = MCA_NEED_RESET;
-    else if (type == intel_mce_ucr_srao)
-        result->result = MCA_NO_ACTION;
+        *result = MCER_RESET;
+    else
+        *result = MCER_CONTINUE;
 }
 
 static const struct mca_error_handler intel_mce_dhandlers[] = {
@@ -764,7 +737,7 @@
 
 static void intel_default_mce_uhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
     enum intel_mce_type type;
@@ -776,10 +749,10 @@
     /* Panic if no handler for SRAR error */
     case intel_mce_ucr_srar:
     case intel_mce_fatal:
-        result->result = MCA_NEED_RESET;
+        *result = MCER_RESET;
         break;
     default:
-        result->result = MCA_NO_ACTION;
+        *result = MCER_CONTINUE;
         break;
     }
 }
diff -r f9bb0bbea7c2 -r 216c2efeb246 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Thu May 12 16:42:54 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon May 16 13:12:03 2011 +0100
@@ -124,36 +124,6 @@
 void mcabanks_free(struct mca_banks *banks);
 extern struct mca_banks *mca_allbanks;
 
-/* Below interfaces are defined for MCA internal processing:
- * a. pre_handler will be called early in MCA ISR context, mainly for early
- *    need_reset detection for avoiding log missing. Also, it is used to judge
- *    impacted DOMAIN if possible.
- * b. mca_error_handler is actually a (error_action_index,
- *    recovery_hanlder pointer) pair. The defined recovery_handler
- *    performs the actual recovery operations such as page_offline, cpu_offline
- *    in softIRQ context when the per_bank MCA error matching the corresponding
- *    mca_code index. If pre_handler can't judge the impacted domain,
- *    recovery_handler must figure it out.
-*/
-
-/* MCA error has been recovered successfully by the recovery action*/
-#define MCA_RECOVERED (0x1 << 0)
-/* MCA error impact the specified DOMAIN in owner field below */
-#define MCA_OWNER (0x1 << 1)
-/* MCA error can't be recovered and need reset */
-#define MCA_NEED_RESET (0x1 << 2)
-/* MCA error did not have any action yet */
-#define MCA_NO_ACTION (0x1 << 3)
-
-struct mca_handle_result
-{
-    uint32_t result;
-    /* Used one result & MCA_OWNER */
-    domid_t owner;
-    /* Used by mca_error_handler, result & MCA_RECOVRED */
-    struct recovery_action *action;
-};
-
 /*Keep bank so that we can get staus even if mib is NULL */
 struct mca_binfo {
     int bank;
@@ -163,8 +133,14 @@
     struct cpu_user_regs *regs;
 };
 
-extern void (*mca_prehandler)( struct cpu_user_regs *regs,
-                        struct mca_handle_result *result);
+enum mce_result
+{
+    MCER_NOERROR,
+    MCER_RECOVERED,
+    /* Not recoverd, but can continue */
+    MCER_CONTINUE,
+    MCER_RESET,
+};
 
 struct mca_error_handler
 {
@@ -175,7 +151,7 @@
     */
     int (*owned_error)(uint64_t status);
     void (*recovery_handler)(struct mca_binfo *binfo,
-                    struct mca_handle_result *result);
+                    enum mce_result *result);
 };
 
 /* Global variables */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86/mca: simplify mce action, and some update of mem err handler for, Xen patchbot-unstable <=