ehea: error handling improvement
Thomas Klein [Tue, 20 Apr 2010 23:10:55 +0000 (23:10 +0000)]
Reset a port's resources only if they're actually in an error state

Signed-off-by: Thomas Klein <tklein@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

drivers/net/ehea/ehea_main.c
drivers/net/ehea/ehea_qmr.c
drivers/net/ehea/ehea_qmr.h

index 3f445ef..c35d1e3 100644 (file)
@@ -791,11 +791,17 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
                cqe_counter++;
                rmb();
                if (cqe->status & EHEA_CQE_STAT_ERR_MASK) {
-                       ehea_error("Send Completion Error: Resetting port");
+                       ehea_error("Bad send completion status=0x%04X",
+                                  cqe->status);
+
                        if (netif_msg_tx_err(pr->port))
                                ehea_dump(cqe, sizeof(*cqe), "Send CQE");
-                       ehea_schedule_port_reset(pr->port);
-                       break;
+
+                       if (cqe->status & EHEA_CQE_STAT_RESET_MASK) {
+                               ehea_error("Resetting port");
+                               ehea_schedule_port_reset(pr->port);
+                               break;
+                       }
                }
 
                if (netif_msg_tx_done(pr->port))
@@ -901,6 +907,8 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
        struct ehea_eqe *eqe;
        struct ehea_qp *qp;
        u32 qp_token;
+       u64 resource_type, aer, aerr;
+       int reset_port = 0;
 
        eqe = ehea_poll_eq(port->qp_eq);
 
@@ -910,11 +918,24 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
                           eqe->entry, qp_token);
 
                qp = port->port_res[qp_token].qp;
-               ehea_error_data(port->adapter, qp->fw_handle);
+
+               resource_type = ehea_error_data(port->adapter, qp->fw_handle,
+                                               &aer, &aerr);
+
+               if (resource_type == EHEA_AER_RESTYPE_QP) {
+                       if ((aer & EHEA_AER_RESET_MASK) ||
+                           (aerr & EHEA_AERR_RESET_MASK))
+                                reset_port = 1;
+               } else
+                       reset_port = 1;   /* Reset in case of CQ or EQ error */
+
                eqe = ehea_poll_eq(port->qp_eq);
        }
 
-       ehea_schedule_port_reset(port);
+       if (reset_port) {
+               ehea_error("Resetting port");
+               ehea_schedule_port_reset(port);
+       }
 
        return IRQ_HANDLED;
 }
index a1b4c7e..89128b6 100644 (file)
@@ -229,14 +229,14 @@ u64 ehea_destroy_cq_res(struct ehea_cq *cq, u64 force)
 
 int ehea_destroy_cq(struct ehea_cq *cq)
 {
-       u64 hret;
+       u64 hret, aer, aerr;
        if (!cq)
                return 0;
 
        hcp_epas_dtor(&cq->epas);
        hret = ehea_destroy_cq_res(cq, NORMAL_FREE);
        if (hret == H_R_STATE) {
-               ehea_error_data(cq->adapter, cq->fw_handle);
+               ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr);
                hret = ehea_destroy_cq_res(cq, FORCE_FREE);
        }
 
@@ -357,7 +357,7 @@ u64 ehea_destroy_eq_res(struct ehea_eq *eq, u64 force)
 
 int ehea_destroy_eq(struct ehea_eq *eq)
 {
-       u64 hret;
+       u64 hret, aer, aerr;
        if (!eq)
                return 0;
 
@@ -365,7 +365,7 @@ int ehea_destroy_eq(struct ehea_eq *eq)
 
        hret = ehea_destroy_eq_res(eq, NORMAL_FREE);
        if (hret == H_R_STATE) {
-               ehea_error_data(eq->adapter, eq->fw_handle);
+               ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr);
                hret = ehea_destroy_eq_res(eq, FORCE_FREE);
        }
 
@@ -540,7 +540,7 @@ u64 ehea_destroy_qp_res(struct ehea_qp *qp, u64 force)
 
 int ehea_destroy_qp(struct ehea_qp *qp)
 {
-       u64 hret;
+       u64 hret, aer, aerr;
        if (!qp)
                return 0;
 
@@ -548,7 +548,7 @@ int ehea_destroy_qp(struct ehea_qp *qp)
 
        hret = ehea_destroy_qp_res(qp, NORMAL_FREE);
        if (hret == H_R_STATE) {
-               ehea_error_data(qp->adapter, qp->fw_handle);
+               ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr);
                hret = ehea_destroy_qp_res(qp, FORCE_FREE);
        }
 
@@ -986,42 +986,45 @@ void print_error_data(u64 *data)
        if (length > EHEA_PAGESIZE)
                length = EHEA_PAGESIZE;
 
-       if (type == 0x8) /* Queue Pair */
+       if (type == EHEA_AER_RESTYPE_QP)
                ehea_error("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, "
                           "port=%llX", resource, data[6], data[12], data[22]);
-
-       if (type == 0x4) /* Completion Queue */
+       else if (type == EHEA_AER_RESTYPE_CQ)
                ehea_error("CQ (resource=%llX) state: AER=0x%llX", resource,
                           data[6]);
-
-       if (type == 0x3) /* Event Queue */
+       else if (type == EHEA_AER_RESTYPE_EQ)
                ehea_error("EQ (resource=%llX) state: AER=0x%llX", resource,
                           data[6]);
 
        ehea_dump(data, length, "error data");
 }
 
-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle)
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+                   u64 *aer, u64 *aerr)
 {
        unsigned long ret;
        u64 *rblock;
+       u64 type = 0;
 
        rblock = (void *)get_zeroed_page(GFP_KERNEL);
        if (!rblock) {
                ehea_error("Cannot allocate rblock memory.");
-               return;
+               goto out;
        }
 
-       ret = ehea_h_error_data(adapter->handle,
-                               res_handle,
-                               rblock);
+       ret = ehea_h_error_data(adapter->handle, res_handle, rblock);
 
-       if (ret == H_R_STATE)
-               ehea_error("No error data is available: %llX.", res_handle);
-       else if (ret == H_SUCCESS)
+       if (ret == H_SUCCESS) {
+               type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+               *aer = rblock[6];
+               *aerr = rblock[12];
                print_error_data(rblock);
-       else
+       } else if (ret == H_R_STATE) {
+               ehea_error("No error data available: %llX.", res_handle);
+       } else
                ehea_error("Error data could not be fetched: %llX", res_handle);
 
        free_page((unsigned long)rblock);
+out:
+       return type;
 }
index 0817c1e..882c50c 100644 (file)
@@ -154,6 +154,9 @@ struct ehea_rwqe {
 #define EHEA_CQE_STAT_ERR_IP       0x2000
 #define EHEA_CQE_STAT_ERR_CRC      0x1000
 
+/* Defines which bad send cqe stati lead to a port reset */
+#define EHEA_CQE_STAT_RESET_MASK   0x0002
+
 struct ehea_cqe {
        u64 wr_id;              /* work request ID from WQE */
        u8 type;
@@ -187,6 +190,14 @@ struct ehea_cqe {
 #define EHEA_EQE_SM_MECH_NUMBER  EHEA_BMASK_IBM(48, 55)
 #define EHEA_EQE_SM_PORT_NUMBER  EHEA_BMASK_IBM(56, 63)
 
+#define EHEA_AER_RESTYPE_QP  0x8
+#define EHEA_AER_RESTYPE_CQ  0x4
+#define EHEA_AER_RESTYPE_EQ  0x3
+
+/* Defines which affiliated errors lead to a port reset */
+#define EHEA_AER_RESET_MASK   0xFFFFFFFFFEFFFFFFULL
+#define EHEA_AERR_RESET_MASK  0xFFFFFFFFFFFFFFFFULL
+
 struct ehea_eqe {
        u64 entry;
 };
@@ -379,7 +390,8 @@ int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr,
 
 int ehea_rem_mr(struct ehea_mr *mr);
 
-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle);
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+                   u64 *aer, u64 *aerr);
 
 int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages);
 int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages);