powerpc/pseries/eeh: Fix crash when error happens during device probe

[linux-2.6.git] / arch / powerpc / platforms / pseries / eeh.c
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c

index eac2a631c5a1f6f85545f4b69694a447afaf8a1c..c0b40af4ce4f130edbd094d48467f747a10d78cc 100644 (file)
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1,6 +1,8 @@
  /*
   * eeh.c
- * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -15,9 +17,12 @@
   * You should have received a copy of the GNU General Public License
   * along with this program; if not, write to the Free Software
   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
   */
  
  #include <linux/delay.h>
+#include <linux/sched.h>       /* for init_mm */
  #include <linux/init.h>
  #include <linux/list.h>
  #include <linux/pci.h>
@@ -25,7 +30,10 @@
  #include <linux/rbtree.h>
  #include <linux/seq_file.h>
  #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/export.h>
+#include <linux/of.h>
+
+#include <linux/atomic.h>
  #include <asm/eeh.h>
  #include <asm/eeh_event.h>
  #include <asm/io.h>
@@ -33,7 +41,6 @@
  #include <asm/ppc-pci.h>
  #include <asm/rtas.h>
  
-#undef DEBUG
  
  /** Overview:
   *  EEH, or "Extended Error Handling" is a PCI bridge technology for
@@ -60,7 +67,7 @@
   *  with EEH.
   *
   *  Ideally, a PCI device driver, when suspecting that an isolation
- *  event has occured (e.g. by reading 0xff's), will then ask EEH
+ *  event has occurred (e.g. by reading 0xff's), will then ask EEH
   *  whether this is the case, and then take appropriate steps to
   *  reset the PCI slot, the PCI device, and then resume operations.
   *  However, until that day,  the checking is done here, with the
@@ -70,13 +77,13 @@
   */
  
  /* If a device driver keeps reading an MMIO register in an interrupt
- * handler after a slot isolation event has occurred, we assume it
- * is broken and panic.  This sets the threshold for how many read
- * attempts we allow before panicking.
+ * handler after a slot isolation event, it might be broken.
+ * This sets the threshold for how many read attempts we allow
+ * before printing an error message.
   */
  #define EEH_MAX_FAILS  2100000
  
-/* Time to wait for a PCI slot to retport status, in milliseconds */
+/* Time to wait for a PCI slot to report status, in milliseconds */
  #define PCI_BUS_RESET_WAIT_MSEC (60*1000)
  
  /* RTAS tokens */
@@ -88,18 +95,29 @@ static int ibm_slot_error_detail;
  static int ibm_get_config_addr_info;
  static int ibm_get_config_addr_info2;
  static int ibm_configure_bridge;
+static int ibm_configure_pe;
  
  int eeh_subsystem_enabled;
  EXPORT_SYMBOL(eeh_subsystem_enabled);
  
  /* Lock to avoid races due to multiple reports of an error */
-static DEFINE_SPINLOCK(confirm_error_lock);
+static DEFINE_RAW_SPINLOCK(confirm_error_lock);
  
-/* Buffer for reporting slot-error-detail rtas calls */
+/* Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
  static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
  static DEFINE_SPINLOCK(slot_errbuf_lock);
  static int eeh_error_buf_size;
  
+/* Buffer for reporting pci register dumps. Its here in BSS, and
+ * not dynamically alloced, so that it ends up in RMO where RTAS
+ * can access it.
+ */
+#define EEH_PCI_REGS_LOG_LEN 4096
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
  /* System monitoring statistics */
  static unsigned long no_device;
  static unsigned long no_dn;
@@ -107,7 +125,6 @@ static unsigned long no_cfg_addr;
  static unsigned long ignored_check;
  static unsigned long total_mmio_ffs;
  static unsigned long false_positives;
-static unsigned long ignored_failures;
  static unsigned long slot_resets;
  
  #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
@@ -115,7 +132,8 @@ static unsigned long slot_resets;
  /* --------------------------------------------------------------- */
  /* Below lies the EEH event infrastructure */
  
-void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
+static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
+                                   char *driver_log, size_t loglen)
  {
         int config_addr;
         unsigned long flags;
@@ -133,7 +151,8 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
         rc = rtas_call(ibm_slot_error_detail,
                        8, 1, NULL, config_addr,
                        BUID_HI(pdn->phb->buid),
-                      BUID_LO(pdn->phb->buid), NULL, 0,
+                      BUID_LO(pdn->phb->buid),
+                      virt_to_phys(driver_log), loglen,
                        virt_to_phys(slot_errbuf),
                        eeh_error_buf_size,
                        severity);
@@ -143,6 +162,115 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
         spin_unlock_irqrestore(&slot_errbuf_lock, flags);
  }
  
+/**
+ * gather_pci_data - copy assorted PCI config space registers to buff
+ * @pdn: device to report data for
+ * @buf: point to buffer in which to log
+ * @len: amount of room in buffer
+ *
+ * This routine captures assorted PCI configuration space data,
+ * and puts them into a buffer for RTAS error logging.
+ */
+static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+{
+       struct pci_dev *dev = pdn->pcidev;
+       u32 cfg;
+       int cap, i;
+       int n = 0;
+
+       n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
+       printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
+
+       rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+       n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+       printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+
+       rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
+       n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+       printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
+
+       if (!dev) {
+               printk(KERN_WARNING "EEH: no PCI device for this of node\n");
+               return n;
+       }
+
+       /* Gather bridge-specific registers */
+       if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+               rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+               n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
+               printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
+
+               rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+               n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
+               printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
+       }
+
+       /* Dump out the PCI-X command and status regs */
+       cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+       if (cap) {
+               rtas_read_config(pdn, cap, 4, &cfg);
+               n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+               printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+
+               rtas_read_config(pdn, cap+4, 4, &cfg);
+               n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+               printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+       }
+
+       /* If PCI-E capable, dump PCI-E cap 10, and the AER */
+       cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
+       if (cap) {
+               n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+               printk(KERN_WARNING
+                      "EEH: PCI-E capabilities and status follow:\n");
+
+               for (i=0; i<=8; i++) {
+                       rtas_read_config(pdn, cap+4*i, 4, &cfg);
+                       n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+                       printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+               }
+
+               cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+               if (cap) {
+                       n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+                       printk(KERN_WARNING
+                              "EEH: PCI-E AER capability register set follows:\n");
+
+                       for (i=0; i<14; i++) {
+                               rtas_read_config(pdn, cap+4*i, 4, &cfg);
+                               n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+                               printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
+                       }
+               }
+       }
+
+       /* Gather status on devices under the bridge */
+       if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
+               struct device_node *dn;
+
+               for_each_child_of_node(pdn->node, dn) {
+                       pdn = PCI_DN(dn);
+                       if (pdn)
+                               n += gather_pci_data(pdn, buf+n, len-n);
+               }
+       }
+
+       return n;
+}
+
+void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
+{
+       size_t loglen = 0;
+       pci_regs_buf[0] = 0;
+
+       rtas_pci_enable(pdn, EEH_THAW_MMIO);
+       rtas_configure_bridge(pdn);
+       eeh_restore_bars(pdn);
+       loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+
+       rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
+}
+
  /**
   * read_slot_reset_state - Read the reset state of a device node's slot
   * @dn: device node to read
@@ -200,7 +328,7 @@ eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
  
                 if (rets[2] == 0) return -1; /* permanently unavailable */
  
-               if (max_wait_msecs <= 0) return -1;
+               if (max_wait_msecs <= 0) break;
  
                 mwait = rets[2];
                 if (mwait <= 0) {
@@ -249,7 +377,7 @@ struct device_node * find_device_pe(struct device_node *dn)
         return dn;
  }
  
-/** Mark all devices that are peers of this device as failed.
+/** Mark all devices that are children of this device as failed.
   *  Mark the device driver too, so that it can see the failure
   *  immediately; this is critical, since some drivers poll
   *  status registers in interrupts ... If a driver is polling,
@@ -257,9 +385,11 @@ struct device_node * find_device_pe(struct device_node *dn)
   *  an interrupt context, which is bad.
   */
  
-static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
+static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
  {
-       while (dn) {
+       struct device_node *dn;
+
+       for_each_child_of_node(parent, dn) {
                 if (PCI_DN(dn)) {
                         /* Mark the pci device driver too */
                         struct pci_dev *dev = PCI_DN(dn)->pcidev;
@@ -269,10 +399,8 @@ static void __eeh_mark_slot (struct device_node *dn, int mode_flag)
                         if (dev && dev->driver)
                                 dev->error_state = pci_channel_io_frozen;
  
-                       if (dn->child)
-                               __eeh_mark_slot (dn->child, mode_flag);
+                       __eeh_mark_slot(dn, mode_flag);
                 }
-               dn = dn->sibling;
         }
  }
  
@@ -282,7 +410,7 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag)
         dn = find_device_pe (dn);
  
         /* Back up one, since config addrs might be shared */
-       if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
+       if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
                 dn = dn->parent;
  
         PCI_DN(dn)->eeh_mode |= mode_flag;
@@ -292,37 +420,70 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag)
         if (dev)
                 dev->error_state = pci_channel_io_frozen;
  
-       __eeh_mark_slot (dn->child, mode_flag);
+       __eeh_mark_slot(dn, mode_flag);
  }
  
-static void __eeh_clear_slot (struct device_node *dn, int mode_flag)
+static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
  {
-       while (dn) {
+       struct device_node *dn;
+
+       for_each_child_of_node(parent, dn) {
                 if (PCI_DN(dn)) {
                         PCI_DN(dn)->eeh_mode &= ~mode_flag;
                         PCI_DN(dn)->eeh_check_count = 0;
-                       if (dn->child)
-                               __eeh_clear_slot (dn->child, mode_flag);
+                       __eeh_clear_slot(dn, mode_flag);
                 }
-               dn = dn->sibling;
         }
  }
  
  void eeh_clear_slot (struct device_node *dn, int mode_flag)
  {
         unsigned long flags;
-       spin_lock_irqsave(&confirm_error_lock, flags);
+       raw_spin_lock_irqsave(&confirm_error_lock, flags);
         
         dn = find_device_pe (dn);
         
         /* Back up one, since config addrs might be shared */
-       if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)
+       if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
                 dn = dn->parent;
  
         PCI_DN(dn)->eeh_mode &= ~mode_flag;
         PCI_DN(dn)->eeh_check_count = 0;
-       __eeh_clear_slot (dn->child, mode_flag);
-       spin_unlock_irqrestore(&confirm_error_lock, flags);
+       __eeh_clear_slot(dn, mode_flag);
+       raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
+}
+
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+       struct device_node *dn;
+
+       for_each_child_of_node(parent, dn) {
+               if (PCI_DN(dn)) {
+
+                       struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+                       if (dev && dev->driver)
+                               *freset |= dev->needs_freset;
+
+                       __eeh_set_pe_freset(dn, freset);
+               }
+       }
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+       struct pci_dev *dev;
+       dn = find_device_pe(dn);
+
+       /* Back up one, since config addrs might be shared */
+       if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+               dn = dn->parent;
+
+       dev = PCI_DN(dn)->pcidev;
+       if (dev)
+               *freset |= dev->needs_freset;
+
+       __eeh_set_pe_freset(dn, freset);
  }
  
  /**
@@ -347,6 +508,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         unsigned long flags;
         struct pci_dn *pdn;
         int rc = 0;
+       const char *location;
  
         total_mmio_ffs++;
  
@@ -357,16 +519,15 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
                 no_dn++;
                 return 0;
         }
+       dn = find_device_pe(dn);
         pdn = PCI_DN(dn);
  
         /* Access to IO BARs might get this far and still not want checking. */
         if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
             pdn->eeh_mode & EEH_MODE_NOCHECK) {
                 ignored_check++;
-#ifdef DEBUG
-               printk ("EEH:ignored check (%x) for %s %s\n", 
-                       pdn->eeh_mode, pci_name (dev), dn->full_name);
-#endif
+               pr_debug("EEH: Ignored check (%x) for %s %s\n",
+                        pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
                 return 0;
         }
  
@@ -381,22 +542,19 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
          * in one slot might report errors simultaneously, and we
          * only want one error recovery routine running.
          */
-       spin_lock_irqsave(&confirm_error_lock, flags);
+       raw_spin_lock_irqsave(&confirm_error_lock, flags);
         rc = 1;
         if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
                 pdn->eeh_check_count ++;
-               if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
-                       printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
-                               pdn->eeh_check_count);
+               if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
+                       location = of_get_property(dn, "ibm,loc-code", NULL);
+                       printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
+                               "location=%s driver=%s pci addr=%s\n",
+                               pdn->eeh_check_count, location,
+                               eeh_driver_name(dev), eeh_pci_name(dev));
+                       printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+                               eeh_driver_name(dev));
                         dump_stack();
-                       msleep(5000);
-                       
-                       /* re-read the slot reset state */
-                       if (read_slot_reset_state(pdn, rets) != 0)
-                               rets[0] = -1;   /* reset state unknown */
-
-                       /* If we are here, then we hit an infinite loop. Stop. */
-                       panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
                 }
                 goto dn_unlock;
         }
@@ -415,14 +573,16 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
                 printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
                        ret, dn->full_name);
                 false_positives++;
+               pdn->eeh_false_positives ++;
                 rc = 0;
                 goto dn_unlock;
         }
  
         /* Note that config-io to empty slots may fail;
          * they are empty when they don't have children. */
-       if ((rets[0] == 5) && (dn->child == NULL)) {
+       if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
                 false_positives++;
+               pdn->eeh_false_positives ++;
                 rc = 0;
                 goto dn_unlock;
         }
@@ -432,6 +592,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
                 printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
                        ret, dn->full_name);
                 false_positives++;
+               pdn->eeh_false_positives ++;
                 rc = 0;
                 goto dn_unlock;
         }
@@ -439,6 +600,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         /* If not the kind of error we know about, punt. */
         if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
                 false_positives++;
+               pdn->eeh_false_positives ++;
                 rc = 0;
                 goto dn_unlock;
         }
@@ -449,7 +611,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
          * with other functions on this device, and functions under
          * bridges. */
         eeh_mark_slot (dn, EEH_MODE_ISOLATED);
-       spin_unlock_irqrestore(&confirm_error_lock, flags);
+       raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
  
         eeh_send_failure_event (dn, dev);
  
@@ -460,7 +622,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
         return 1;
  
  dn_unlock:
-       spin_unlock_irqrestore(&confirm_error_lock, flags);
+       raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
         return rc;
  }
  
@@ -568,27 +730,77 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
         if (pdn->eeh_pe_config_addr)
                 config_addr = pdn->eeh_pe_config_addr;
  
-       rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+       rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
                        config_addr,
                        BUID_HI(pdn->phb->buid),
                        BUID_LO(pdn->phb->buid),
                        state);
-       if (rc)
-               printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
-                       " (%d) #RST=%d dn=%s\n",
-                       rc, state, pdn->node->full_name);
+
+       /* Fundamental-reset not supported on this PE, try hot-reset */
+       if (rc == -8 && state == 3) {
+               rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+                              config_addr,
+                              BUID_HI(pdn->phb->buid),
+                              BUID_LO(pdn->phb->buid), 1);
+               if (rc)
+                       printk(KERN_WARNING
+                               "EEH: Unable to reset the failed slot,"
+                               " #RST=%d dn=%s\n",
+                               rc, pdn->node->full_name);
+       }
+}
+
+/**
+ * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * @dev:       pci device struct
+ * @state:     reset state to enter
+ *
+ * Return value:
+ *     0 if success
+ **/
+int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
+{
+       struct device_node *dn = pci_device_to_OF_node(dev);
+       struct pci_dn *pdn = PCI_DN(dn);
+
+       switch (state) {
+       case pcie_deassert_reset:
+               rtas_pci_slot_reset(pdn, 0);
+               break;
+       case pcie_hot_reset:
+               rtas_pci_slot_reset(pdn, 1);
+               break;
+       case pcie_warm_reset:
+               rtas_pci_slot_reset(pdn, 3);
+               break;
+       default:
+               return -EINVAL;
+       };
+
+       return 0;
  }
  
  /**
   * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
   * @pdn: pci device node to be reset.
- *
- *  Return 0 if success, else a non-zero value.
   */
  
  static void __rtas_set_slot_reset(struct pci_dn *pdn)
  {
-       rtas_pci_slot_reset (pdn, 1);
+       unsigned int freset = 0;
+
+       /* Determine type of EEH reset required for
+        * Partitionable Endpoint, a hot-reset (1)
+        * or a fundamental reset (3).
+        * A fundamental reset required by any device under
+        * Partitionable Endpoint trumps hot-reset.
+        */
+       eeh_set_pe_freset(pdn->node, &freset);
+
+       if (freset)
+               rtas_pci_slot_reset(pdn, 3);
+       else
+               rtas_pci_slot_reset(pdn, 1);
  
         /* The PCI bus requires that the reset be held high for at least
          * a 100 milliseconds. We wait a bit longer 'just in case'.  */
@@ -623,12 +835,12 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
                         return 0;
  
                 if (rc < 0) {
-                       printk (KERN_ERR "EEH: unrecoverable slot failure %s\n",
-                               pdn->node->full_name);
+                       printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
+                              pdn->node->full_name);
                         return -1;
                 }
-               printk (KERN_ERR "EEH: bus reset %d failed on slot %s\n",
-                       i+1, pdn->node->full_name);
+               printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
+                      i+1, pdn->node->full_name, rc);
         }
  
         return -1;
@@ -654,6 +866,7 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
  static inline void __restore_bars (struct pci_dn *pdn)
  {
         int i;
+       u32 cmd;
  
         if (NULL==pdn->phb) return;
         for (i=4; i<10; i++) {
@@ -674,6 +887,19 @@ static inline void __restore_bars (struct pci_dn *pdn)
  
         /* max latency, min grant, interrupt pin and line */
         rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
+
+       /* Restore PERR & SERR bits, some devices require it,
+          don't touch the other command bits */
+       rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
+       if (pdn->config_space[1] & PCI_COMMAND_PARITY)
+               cmd |= PCI_COMMAND_PARITY;
+       else
+               cmd &= ~PCI_COMMAND_PARITY;
+       if (pdn->config_space[1] & PCI_COMMAND_SERR)
+               cmd |= PCI_COMMAND_SERR;
+       else
+               cmd &= ~PCI_COMMAND_SERR;
+       rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
  }
  
  /**
@@ -691,11 +917,8 @@ void eeh_restore_bars(struct pci_dn *pdn)
         if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
                 __restore_bars (pdn);
  
-       dn = pdn->node->child;
-       while (dn) {
+       for_each_child_of_node(pdn->node, dn)
                 eeh_restore_bars (PCI_DN(dn));
-               dn = dn->sibling;
-       }
  }
  
  /**
@@ -703,7 +926,7 @@ void eeh_restore_bars(struct pci_dn *pdn)
   *
   * Save the values of the device bars. Unlike the restore
   * routine, this routine is *not* recursive. This is because
- * PCI devices are added individuallly; but, for the restore,
+ * PCI devices are added individually; but, for the restore,
   * an entire slot is reset at a time.
   */
  static void eeh_save_bars(struct pci_dn *pdn)
@@ -722,13 +945,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
  {
         int config_addr;
         int rc;
+       int token;
  
         /* Use PE configuration address, if present */
         config_addr = pdn->eeh_config_addr;
         if (pdn->eeh_pe_config_addr)
                 config_addr = pdn->eeh_pe_config_addr;
  
-       rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+       /* Use new configure-pe function, if supported */
+       if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+               token = ibm_configure_pe;
+       else
+               token = ibm_configure_bridge;
+
+       rc = rtas_call(token, 3, 1, NULL,
                        config_addr,
                        BUID_HI(pdn->phb->buid),
                        BUID_LO(pdn->phb->buid));
@@ -789,10 +1019,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
         unsigned int rets[3];
         struct eeh_early_enable_info *info = data;
         int ret;
-       const char *status = get_property(dn, "status", NULL);
-       const u32 *class_code = get_property(dn, "class-code", NULL);
-       const u32 *vendor_id = get_property(dn, "vendor-id", NULL);
-       const u32 *device_id = get_property(dn, "device-id", NULL);
+       const u32 *class_code = of_get_property(dn, "class-code", NULL);
+       const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
+       const u32 *device_id = of_get_property(dn, "device-id", NULL);
         const u32 *regs;
         int enable;
         struct pci_dn *pdn = PCI_DN(dn);
@@ -801,9 +1030,10 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
         pdn->eeh_mode = 0;
         pdn->eeh_check_count = 0;
         pdn->eeh_freeze_count = 0;
+       pdn->eeh_false_positives = 0;
  
-       if (status && strcmp(status, "ok") != 0)
-               return NULL;    /* ignore devices with bad status */
+       if (!of_device_is_available(dn))
+               return NULL;
  
         /* Ignore bad nodes. */
         if (!class_code || !vendor_id || !device_id)
@@ -816,26 +1046,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
         }
         pdn->class_code = *class_code;
  
-       /*
-        * Now decide if we are going to "Disable" EEH checking
-        * for this device.  We still run with the EEH hardware active,
-        * but we won't be checking for ff's.  This means a driver
-        * could return bad data (very bad!), an interrupt handler could
-        * hang waiting on status bits that won't change, etc.
-        * But there are a few cases like display devices that make sense.
-        */
-       enable = 1;     /* i.e. we will do checking */
-#if 0
-       if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
-               enable = 0;
-#endif
-
-       if (!enable)
-               pdn->eeh_mode |= EEH_MODE_NOCHECK;
-
         /* Ok... see if this device supports EEH.  Some do, some don't,
          * and the only way to find out is to check each and every one. */
-       regs = get_property(dn, "reg", NULL);
+       regs = of_get_property(dn, "reg", NULL);
         if (regs) {
                 /* First register entry is addr (00BBSS00)  */
                 /* Try to enable eeh */
@@ -864,10 +1077,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
                         eeh_subsystem_enabled = 1;
                         pdn->eeh_mode |= EEH_MODE_SUPPORTED;
  
-#ifdef DEBUG
-                       printk(KERN_DEBUG "EEH: %s: eeh enabled, config=%x pe_config=%x\n",
-                              dn->full_name, pdn->eeh_config_addr, pdn->eeh_pe_config_addr);
-#endif
+                       pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
+                                dn->full_name, pdn->eeh_config_addr,
+                                pdn->eeh_pe_config_addr);
                 } else {
  
                         /* This device doesn't support EEH, but it may have an
@@ -907,7 +1119,7 @@ void __init eeh_init(void)
         struct device_node *phb, *np;
         struct eeh_early_enable_info info;
  
-       spin_lock_init(&confirm_error_lock);
+       raw_spin_lock_init(&confirm_error_lock);
         spin_lock_init(&slot_errbuf_lock);
  
         np = of_find_node_by_path("/rtas");
@@ -922,6 +1134,7 @@ void __init eeh_init(void)
         ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
         ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
         ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+       ibm_configure_pe = rtas_token("ibm,configure-pe");
  
         if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
                 return;
@@ -989,7 +1202,8 @@ static void eeh_add_device_early(struct device_node *dn)
  void eeh_add_device_tree_early(struct device_node *dn)
  {
         struct device_node *sib;
-       for (sib = dn->child; sib; sib = sib->sibling)
+
+       for_each_child_of_node(dn, sib)
                 eeh_add_device_tree_early(sib);
         eeh_add_device_early(dn);
  }
@@ -1010,16 +1224,21 @@ static void eeh_add_device_late(struct pci_dev *dev)
         if (!dev || !eeh_subsystem_enabled)
                 return;
  
-#ifdef DEBUG
-       printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev));
-#endif
+       pr_debug("EEH: Adding device %s\n", pci_name(dev));
  
-       pci_dev_get (dev);
         dn = pci_device_to_OF_node(dev);
         pdn = PCI_DN(dn);
+       if (pdn->pcidev == dev) {
+               pr_debug("EEH: Already referenced !\n");
+               return;
+       }
+       WARN_ON(pdn->pcidev);
+
+       pci_dev_get (dev);
         pdn->pcidev = dev;
  
-       pci_addr_cache_insert_device (dev);
+       pci_addr_cache_insert_device(dev);
+       eeh_sysfs_add_device(dev);
  }
  
  void eeh_add_device_tree_late(struct pci_bus *bus)
@@ -1054,16 +1273,18 @@ static void eeh_remove_device(struct pci_dev *dev)
                 return;
  
         /* Unregister the device with the EEH/PCI address search system */
-#ifdef DEBUG
-       printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
-#endif
-       pci_addr_cache_remove_device(dev);
+       pr_debug("EEH: Removing device %s\n", pci_name(dev));
  
         dn = pci_device_to_OF_node(dev);
-       if (PCI_DN(dn)->pcidev) {
-               PCI_DN(dn)->pcidev = NULL;
-               pci_dev_put (dev);
+       if (PCI_DN(dn)->pcidev == NULL) {
+               pr_debug("EEH: Not referenced !\n");
+               return;
         }
+       PCI_DN(dn)->pcidev = NULL;
+       pci_dev_put (dev);
+
+       pci_addr_cache_remove_device(dev);
+       eeh_sysfs_remove_device(dev);
  }
  
  void eeh_remove_bus_device(struct pci_dev *dev)
@@ -1094,11 +1315,10 @@ static int proc_eeh_show(struct seq_file *m, void *v)
                                 "check not wanted=%ld\n"
                                 "eeh_total_mmio_ffs=%ld\n"
                                 "eeh_false_positives=%ld\n"
-                               "eeh_ignored_failures=%ld\n"
                                 "eeh_slot_resets=%ld\n",
                                 no_device, no_dn, no_cfg_addr, 
                                 ignored_check, total_mmio_ffs, 
-                               false_positives, ignored_failures, 
+                               false_positives,
                                 slot_resets);
         }
  
@@ -1119,14 +1339,8 @@ static const struct file_operations proc_eeh_operations = {
  
  static int __init eeh_init_proc(void)
  {
-       struct proc_dir_entry *e;
-
-       if (machine_is(pseries)) {
-               e = create_proc_entry("ppc64/eeh", 0, NULL);
-               if (e)
-                       e->proc_fops = &proc_eeh_operations;
-       }
-
+       if (machine_is(pseries))
+               proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
         return 0;
  }
  __initcall(eeh_init_proc);