powerpc/powernv: Disable interrupts while taking phb->lock
[linux-2.6.git] / arch / powerpc / platforms / powernv / pci.c
index 85bb66d..f92b9ef 100644 (file)
@@ -52,32 +52,38 @@ static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type)
 
 static unsigned int pnv_get_one_msi(struct pnv_phb *phb)
 {
-       unsigned int id;
+       unsigned long flags;
+       unsigned int id, rc;
+
+       spin_lock_irqsave(&phb->lock, flags);
 
-       spin_lock(&phb->lock);
        id = find_next_zero_bit(phb->msi_map, phb->msi_count, phb->msi_next);
        if (id >= phb->msi_count && phb->msi_next)
                id = find_next_zero_bit(phb->msi_map, phb->msi_count, 0);
        if (id >= phb->msi_count) {
-               spin_unlock(&phb->lock);
-               return 0;
+               rc = 0;
+               goto out;
        }
        __set_bit(id, phb->msi_map);
-       spin_unlock(&phb->lock);
-       return id + phb->msi_base;
+       rc = id + phb->msi_base;
+out:
+       spin_unlock_irqrestore(&phb->lock, flags);
+       return rc;
 }
 
 static void pnv_put_msi(struct pnv_phb *phb, unsigned int hwirq)
 {
+       unsigned long flags;
        unsigned int id;
 
        if (WARN_ON(hwirq < phb->msi_base ||
                    hwirq >= (phb->msi_base + phb->msi_count)))
                return;
        id = hwirq - phb->msi_base;
-       spin_lock(&phb->lock);
+
+       spin_lock_irqsave(&phb->lock, flags);
        __clear_bit(id, phb->msi_map);
-       spin_unlock(&phb->lock);
+       spin_unlock_irqrestore(&phb->lock, flags);
 }
 
 static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
@@ -144,6 +150,112 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_MSI */
 
+static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
+{
+       struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
+       int i;
+
+       pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
+
+       pr_info("  brdgCtl              = 0x%08x\n", data->brdgCtl);
+
+       pr_info("  portStatusReg        = 0x%08x\n", data->portStatusReg);
+       pr_info("  rootCmplxStatus      = 0x%08x\n", data->rootCmplxStatus);
+       pr_info("  busAgentStatus       = 0x%08x\n", data->busAgentStatus);
+
+       pr_info("  deviceStatus         = 0x%08x\n", data->deviceStatus);
+       pr_info("  slotStatus           = 0x%08x\n", data->slotStatus);
+       pr_info("  linkStatus           = 0x%08x\n", data->linkStatus);
+       pr_info("  devCmdStatus         = 0x%08x\n", data->devCmdStatus);
+       pr_info("  devSecStatus         = 0x%08x\n", data->devSecStatus);
+
+       pr_info("  rootErrorStatus      = 0x%08x\n", data->rootErrorStatus);
+       pr_info("  uncorrErrorStatus    = 0x%08x\n", data->uncorrErrorStatus);
+       pr_info("  corrErrorStatus      = 0x%08x\n", data->corrErrorStatus);
+       pr_info("  tlpHdr1              = 0x%08x\n", data->tlpHdr1);
+       pr_info("  tlpHdr2              = 0x%08x\n", data->tlpHdr2);
+       pr_info("  tlpHdr3              = 0x%08x\n", data->tlpHdr3);
+       pr_info("  tlpHdr4              = 0x%08x\n", data->tlpHdr4);
+       pr_info("  sourceId             = 0x%08x\n", data->sourceId);
+
+       pr_info("  errorClass           = 0x%016llx\n", data->errorClass);
+       pr_info("  correlator           = 0x%016llx\n", data->correlator);
+
+       pr_info("  p7iocPlssr           = 0x%016llx\n", data->p7iocPlssr);
+       pr_info("  p7iocCsr             = 0x%016llx\n", data->p7iocCsr);
+       pr_info("  lemFir               = 0x%016llx\n", data->lemFir);
+       pr_info("  lemErrorMask         = 0x%016llx\n", data->lemErrorMask);
+       pr_info("  lemWOF               = 0x%016llx\n", data->lemWOF);
+       pr_info("  phbErrorStatus       = 0x%016llx\n", data->phbErrorStatus);
+       pr_info("  phbFirstErrorStatus  = 0x%016llx\n", data->phbFirstErrorStatus);
+       pr_info("  phbErrorLog0         = 0x%016llx\n", data->phbErrorLog0);
+       pr_info("  phbErrorLog1         = 0x%016llx\n", data->phbErrorLog1);
+       pr_info("  mmioErrorStatus      = 0x%016llx\n", data->mmioErrorStatus);
+       pr_info("  mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
+       pr_info("  mmioErrorLog0        = 0x%016llx\n", data->mmioErrorLog0);
+       pr_info("  mmioErrorLog1        = 0x%016llx\n", data->mmioErrorLog1);
+       pr_info("  dma0ErrorStatus      = 0x%016llx\n", data->dma0ErrorStatus);
+       pr_info("  dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
+       pr_info("  dma0ErrorLog0        = 0x%016llx\n", data->dma0ErrorLog0);
+       pr_info("  dma0ErrorLog1        = 0x%016llx\n", data->dma0ErrorLog1);
+       pr_info("  dma1ErrorStatus      = 0x%016llx\n", data->dma1ErrorStatus);
+       pr_info("  dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
+       pr_info("  dma1ErrorLog0        = 0x%016llx\n", data->dma1ErrorLog0);
+       pr_info("  dma1ErrorLog1        = 0x%016llx\n", data->dma1ErrorLog1);
+
+       for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
+               if ((data->pestA[i] >> 63) == 0 &&
+                   (data->pestB[i] >> 63) == 0)
+                       continue;
+               pr_info("  PE[%3d] PESTA        = 0x%016llx\n", i, data->pestA[i]);
+               pr_info("          PESTB        = 0x%016llx\n", data->pestB[i]);
+       }
+}
+
+static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
+{
+       switch(phb->model) {
+       case PNV_PHB_MODEL_P7IOC:
+               pnv_pci_dump_p7ioc_diag_data(phb);
+               break;
+       default:
+               pr_warning("PCI %d: Can't decode this PHB diag data\n",
+                          phb->hose->global_number);
+       }
+}
+
+static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
+{
+       unsigned long flags, rc;
+       int has_diag;
+
+       spin_lock_irqsave(&phb->lock, flags);
+
+       rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+       has_diag = (rc == OPAL_SUCCESS);
+
+       rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+                                      OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+       if (rc) {
+               pr_warning("PCI %d: Failed to clear EEH freeze state"
+                          " for PE#%d, err %ld\n",
+                          phb->hose->global_number, pe_no, rc);
+
+               /* For now, let's only display the diag buffer when we fail to clear
+                * the EEH status. We'll do more sensible things later when we have
+                * proper EEH support. We need to make sure we don't pollute ourselves
+                * with the normal errors generated when probing empty slots
+                */
+               if (has_diag)
+                       pnv_pci_dump_phb_diag_data(phb);
+               else
+                       pr_warning("PCI %d: No diag data available\n",
+                                  phb->hose->global_number);
+       }
+
+       spin_unlock_irqrestore(&phb->lock, flags);
+}
+
 static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
                                     u32 bdfn)
 {
@@ -165,15 +277,8 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
        }
        cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
                bdfn, pe_no, fstate);
-       if (fstate != 0) {
-               rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
-                                             OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-               if (rc) {
-                       pr_warning("PCI %d: Failed to clear EEH freeze state"
-                                  " for PE#%d, err %lld\n",
-                                  phb->hose->global_number, pe_no, rc);
-               }
-       }
+       if (fstate != 0)
+               pnv_pci_handle_eeh_config(phb, pe_no);
 }
 
 static int pnv_pci_read_config(struct pci_bus *bus,
@@ -257,12 +362,54 @@ struct pci_ops pnv_pci_ops = {
        .write = pnv_pci_write_config,
 };
 
+
+static void pnv_tce_invalidate(struct iommu_table *tbl,
+                              u64 *startp, u64 *endp)
+{
+       u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+       unsigned long start, end, inc;
+
+       start = __pa(startp);
+       end = __pa(endp);
+
+
+       /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
+       if (tbl->it_busno) {
+               start <<= 12;
+               end <<= 12;
+               inc = 128 << 12;
+               start |= tbl->it_busno;
+               end |= tbl->it_busno;
+       }
+       /* p7ioc-style invalidation, 2 TCEs per write */
+       else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
+               start |= (1ull << 63);
+               end |= (1ull << 63);
+               inc = 16;
+       }
+       /* Default (older HW) */
+       else
+               inc = 128;
+
+       end |= inc - 1;         /* round up end to be different than start */
+
+       mb(); /* Ensure above stores are visible */
+       while (start <= end) {
+               __raw_writeq(start, invalidate);
+               start += inc;
+       }
+       /* The iommu layer will do another mb() for us on build() and
+        * we don't care on free()
+        */
+}
+
+
 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
                         unsigned long uaddr, enum dma_data_direction direction,
                         struct dma_attrs *attrs)
 {
        u64 proto_tce;
-       u64 *tcep;
+       u64 *tcep, *tces;
        u64 rpn;
 
        proto_tce = TCE_PCI_READ; // Read allowed
@@ -270,25 +417,33 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
        if (direction != DMA_TO_DEVICE)
                proto_tce |= TCE_PCI_WRITE;
 
-       tcep = ((u64 *)tbl->it_base) + index;
+       tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
+       rpn = __pa(uaddr) >> TCE_SHIFT;
 
-       while (npages--) {
-               /* can't move this out since we might cross LMB boundary */
-               rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
-               *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
+       while (npages--)
+               *(tcep++) = proto_tce | (rpn++ << TCE_RPN_SHIFT);
+
+       /* Some implementations won't cache invalid TCEs and thus may not
+        * need that flush. We'll probably turn it_type into a bit mask
+        * of flags if that becomes the case
+        */
+       if (tbl->it_type & TCE_PCI_SWINV_CREATE)
+               pnv_tce_invalidate(tbl, tces, tcep - 1);
 
-               uaddr += TCE_PAGE_SIZE;
-               tcep++;
-       }
        return 0;
 }
 
 static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 {
-       u64 *tcep = ((u64 *)tbl->it_base) + index;
+       u64 *tcep, *tces;
+
+       tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
 
        while (npages--)
                *(tcep++) = 0;
+
+       if (tbl->it_type & TCE_PCI_SWINV_FREE)
+               pnv_tce_invalidate(tbl, tces, tcep - 1);
 }
 
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
@@ -308,13 +463,14 @@ static struct iommu_table * __devinit
 pnv_pci_setup_bml_iommu(struct pci_controller *hose)
 {
        struct iommu_table *tbl;
-       const __be64 *basep;
+       const __be64 *basep, *swinvp;
        const __be32 *sizep;
 
        basep = of_get_property(hose->dn, "linux,tce-base", NULL);
        sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
        if (basep == NULL || sizep == NULL) {
-               pr_err("PCI: %s has missing tce entries !\n", hose->dn->full_name);
+               pr_err("PCI: %s has missing tce entries !\n",
+                      hose->dn->full_name);
                return NULL;
        }
        tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
@@ -323,6 +479,15 @@ pnv_pci_setup_bml_iommu(struct pci_controller *hose)
        pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
                                  be32_to_cpup(sizep), 0);
        iommu_init_table(tbl, hose->node);
+
+       /* Deal with SW invalidated TCEs when needed (BML way) */
+       swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
+                                NULL);
+       if (swinvp) {
+               tbl->it_busno = swinvp[1];
+               tbl->it_index = (unsigned long)ioremap(swinvp[0], 8);
+               tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+       }
        return tbl;
 }
 
@@ -356,6 +521,13 @@ static void __devinit pnv_pci_dma_dev_setup(struct pci_dev *pdev)
                pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
+/* Fixup wrong class code in p7ioc root complex */
+static void __devinit pnv_p7ioc_rc_quirk(struct pci_dev *dev)
+{
+       dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
+
 static int pnv_pci_probe_mode(struct pci_bus *bus)
 {
        struct pci_controller *hose = pci_bus_to_host(bus);
@@ -400,12 +572,24 @@ void __init pnv_pci_init(void)
                init_pci_config_tokens();
                find_and_init_phbs();
 #endif /* CONFIG_PPC_POWERNV_RTAS */
-       } else {
-               /* OPAL is here, do our normal stuff */
+       }
+       /* OPAL is here, do our normal stuff */
+       else {
+               int found_ioda = 0;
+
+               /* Look for IODA IO-Hubs. We don't support mixing IODA
+                * and p5ioc2 due to the need to change some global
+                * probing flags
+                */
+               for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
+                       pnv_pci_init_ioda_hub(np);
+                       found_ioda = 1;
+               }
 
                /* Look for p5ioc2 IO-Hubs */
-               for_each_compatible_node(np, NULL, "ibm,p5ioc2")
-                       pnv_pci_init_p5ioc2_hub(np);
+               if (!found_ioda)
+                       for_each_compatible_node(np, NULL, "ibm,p5ioc2")
+                               pnv_pci_init_p5ioc2_hub(np);
        }
 
        /* Setup the linkage between OF nodes and PHBs */