Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6
[linux-2.6.git] / drivers / scsi / bnx2i / bnx2i_hwi.c
index 372d30c..030a96c 100644 (file)
@@ -1,6 +1,6 @@
 /* bnx2i_hwi.c: Broadcom NetXtreme II iSCSI driver.
  *
- * Copyright (c) 2006 - 2010 Broadcom Corporation
+ * Copyright (c) 2006 - 2011 Broadcom Corporation
  * Copyright (c) 2007, 2008 Red Hat, Inc.  All rights reserved.
  * Copyright (c) 2007, 2008 Mike Christie
  *
@@ -17,6 +17,8 @@
 #include <scsi/libiscsi.h>
 #include "bnx2i.h"
 
+DECLARE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
+
 /**
  * bnx2i_get_cid_num - get cid from ep
  * @ep:        endpoint pointer
@@ -131,16 +133,16 @@ static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code)
  *     the driver. EQ event is generated CQ index is hit or at least 1 CQ is
  *     outstanding and on chip timer expires
  */
-void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
+int bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
 {
        struct bnx2i_5771x_cq_db *cq_db;
        u16 cq_index;
-       u16 next_index;
+       u16 next_index = 0;
        u32 num_active_cmds;
 
        /* Coalesce CQ entries only on 10G devices */
        if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
-               return;
+               return 0;
 
        /* Do not update CQ DB multiple times before firmware writes
         * '0xFFFF' to CQDB->SQN field. Deviation may cause spurious
@@ -150,16 +152,17 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
 
        if (action != CNIC_ARM_CQE_FP)
                if (cq_db->sqn[0] && cq_db->sqn[0] != 0xFFFF)
-                       return;
+                       return 0;
 
        if (action == CNIC_ARM_CQE || action == CNIC_ARM_CQE_FP) {
-               num_active_cmds = ep->num_active_cmds;
+               num_active_cmds = atomic_read(&ep->num_active_cmds);
                if (num_active_cmds <= event_coal_min)
                        next_index = 1;
-               else
-                       next_index = event_coal_min +
-                                    ((num_active_cmds - event_coal_min) >>
-                                    ep->ec_shift);
+               else {
+                       next_index = num_active_cmds >> ep->ec_shift;
+                       if (next_index > num_active_cmds - event_coal_min)
+                               next_index = num_active_cmds - event_coal_min;
+               }
                if (!next_index)
                        next_index = 1;
                cq_index = ep->qp.cqe_exp_seq_sn + next_index - 1;
@@ -170,6 +173,7 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
 
                cq_db->sqn[0] = cq_index;
        }
+       return next_index;
 }
 
 
@@ -265,7 +269,7 @@ static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
        struct bnx2i_5771x_sq_rq_db *sq_db;
        struct bnx2i_endpoint *ep = bnx2i_conn->ep;
 
-       ep->num_active_cmds++;
+       atomic_inc(&ep->num_active_cmds);
        wmb();  /* flush SQ WQE memory before the doorbell is rung */
        if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
                sq_db = (struct bnx2i_5771x_sq_rq_db *) ep->qp.sq_pgtbl_virt;
@@ -430,7 +434,7 @@ int bnx2i_send_iscsi_tmf(struct bnx2i_conn *bnx2i_conn,
        default:
                tmfabort_wqe->ref_itt = RESERVED_ITT;
        }
-       memcpy(scsi_lun, tmfabort_hdr->lun, sizeof(struct scsi_lun));
+       memcpy(scsi_lun, &tmfabort_hdr->lun, sizeof(struct scsi_lun));
        tmfabort_wqe->lun[0] = be32_to_cpu(scsi_lun[0]);
        tmfabort_wqe->lun[1] = be32_to_cpu(scsi_lun[1]);
 
@@ -547,7 +551,7 @@ int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn,
 
        nopout_wqe->op_code = nopout_hdr->opcode;
        nopout_wqe->op_attr = ISCSI_FLAG_CMD_FINAL;
-       memcpy(nopout_wqe->lun, nopout_hdr->lun, 8);
+       memcpy(nopout_wqe->lun, &nopout_hdr->lun, 8);
 
        if (test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) {
                u32 tmp = nopout_wqe->lun[0];
@@ -1331,14 +1335,15 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
 
 /**
  * bnx2i_process_scsi_cmd_resp - this function handles scsi cmd completion.
- * @conn:      iscsi connection
+ * @session:   iscsi session
+ * @bnx2i_conn:        bnx2i connection
  * @cqe:       pointer to newly DMA'ed CQE entry for processing
  *
  * process SCSI CMD Response CQE & complete the request to SCSI-ML
  */
-static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
-                                      struct bnx2i_conn *bnx2i_conn,
-                                      struct cqe *cqe)
+int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
+                               struct bnx2i_conn *bnx2i_conn,
+                               struct cqe *cqe)
 {
        struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
        struct bnx2i_cmd_response *resp_cqe;
@@ -1348,7 +1353,7 @@ static int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
        u32 datalen = 0;
 
        resp_cqe = (struct bnx2i_cmd_response *)cqe;
-       spin_lock(&session->lock);
+       spin_lock_bh(&session->lock);
        task = iscsi_itt_to_task(conn,
                                 resp_cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
        if (!task)
@@ -1409,7 +1414,7 @@ done:
        __iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr,
                             conn->data, datalen);
 fail:
-       spin_unlock(&session->lock);
+       spin_unlock_bh(&session->lock);
        return 0;
 }
 
@@ -1711,7 +1716,7 @@ static int bnx2i_process_nopin_mesg(struct iscsi_session *session,
                hdr->flags = ISCSI_FLAG_CMD_FINAL;
                hdr->itt = task->hdr->itt;
                hdr->ttt = cpu_to_be32(nop_in->ttt);
-               memcpy(hdr->lun, nop_in->lun, 8);
+               memcpy(&hdr->lun, nop_in->lun, 8);
        }
 done:
        __iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, NULL, 0);
@@ -1754,7 +1759,7 @@ static void bnx2i_process_async_mesg(struct iscsi_session *session,
        resp_hdr->opcode = async_cqe->op_code;
        resp_hdr->flags = 0x80;
 
-       memcpy(resp_hdr->lun, async_cqe->lun, 8);
+       memcpy(&resp_hdr->lun, async_cqe->lun, 8);
        resp_hdr->exp_cmdsn = cpu_to_be32(async_cqe->exp_cmd_sn);
        resp_hdr->max_cmdsn = cpu_to_be32(async_cqe->max_cmd_sn);
 
@@ -1836,21 +1841,136 @@ static void bnx2i_process_cmd_cleanup_resp(struct iscsi_session *session,
 }
 
 
+/**
+ * bnx2i_percpu_io_thread - thread per cpu for ios
+ *
+ * @arg:       ptr to bnx2i_percpu_info structure
+ */
+int bnx2i_percpu_io_thread(void *arg)
+{
+       struct bnx2i_percpu_s *p = arg;
+       struct bnx2i_work *work, *tmp;
+       LIST_HEAD(work_list);
+
+       set_user_nice(current, -20);
+
+       while (!kthread_should_stop()) {
+               spin_lock_bh(&p->p_work_lock);
+               while (!list_empty(&p->work_list)) {
+                       list_splice_init(&p->work_list, &work_list);
+                       spin_unlock_bh(&p->p_work_lock);
+
+                       list_for_each_entry_safe(work, tmp, &work_list, list) {
+                               list_del_init(&work->list);
+                               /* work allocated in the bh, freed here */
+                               bnx2i_process_scsi_cmd_resp(work->session,
+                                                           work->bnx2i_conn,
+                                                           &work->cqe);
+                               atomic_dec(&work->bnx2i_conn->work_cnt);
+                               kfree(work);
+                       }
+                       spin_lock_bh(&p->p_work_lock);
+               }
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_unlock_bh(&p->p_work_lock);
+               schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+
+       return 0;
+}
+
+
+/**
+ * bnx2i_queue_scsi_cmd_resp - queue cmd completion to the percpu thread
+ * @bnx2i_conn:                bnx2i connection
+ *
+ * this function is called by generic KCQ handler to queue all pending cmd
+ * completion CQEs
+ *
+ * The implementation is to queue the cmd response based on the
+ * last recorded command for the given connection.  The
+ * cpu_id gets recorded upon task_xmit.  No out-of-order completion!
+ */
+static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
+                                    struct bnx2i_conn *bnx2i_conn,
+                                    struct bnx2i_nop_in_msg *cqe)
+{
+       struct bnx2i_work *bnx2i_work = NULL;
+       struct bnx2i_percpu_s *p = NULL;
+       struct iscsi_task *task;
+       struct scsi_cmnd *sc;
+       int rc = 0;
+       int cpu;
+
+       spin_lock(&session->lock);
+       task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data,
+                                cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
+       if (!task) {
+               spin_unlock(&session->lock);
+               return -EINVAL;
+       }
+       sc = task->sc;
+       spin_unlock(&session->lock);
+
+       if (!blk_rq_cpu_valid(sc->request))
+               cpu = smp_processor_id();
+       else
+               cpu = sc->request->cpu;
+
+       p = &per_cpu(bnx2i_percpu, cpu);
+       spin_lock(&p->p_work_lock);
+       if (unlikely(!p->iothread)) {
+               rc = -EINVAL;
+               goto err;
+       }
+       /* Alloc and copy to the cqe */
+       bnx2i_work = kzalloc(sizeof(struct bnx2i_work), GFP_ATOMIC);
+       if (bnx2i_work) {
+               INIT_LIST_HEAD(&bnx2i_work->list);
+               bnx2i_work->session = session;
+               bnx2i_work->bnx2i_conn = bnx2i_conn;
+               memcpy(&bnx2i_work->cqe, cqe, sizeof(struct cqe));
+               list_add_tail(&bnx2i_work->list, &p->work_list);
+               atomic_inc(&bnx2i_conn->work_cnt);
+               wake_up_process(p->iothread);
+               spin_unlock(&p->p_work_lock);
+               goto done;
+       } else
+               rc = -ENOMEM;
+err:
+       spin_unlock(&p->p_work_lock);
+       bnx2i_process_scsi_cmd_resp(session, bnx2i_conn, (struct cqe *)cqe);
+done:
+       return rc;
+}
+
 
 /**
  * bnx2i_process_new_cqes - process newly DMA'ed CQE's
- * @bnx2i_conn:                iscsi connection
+ * @bnx2i_conn:                bnx2i connection
  *
  * this function is called by generic KCQ handler to process all pending CQE's
  */
-static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
+static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
 {
        struct iscsi_conn *conn = bnx2i_conn->cls_conn->dd_data;
        struct iscsi_session *session = conn->session;
-       struct qp_info *qp = &bnx2i_conn->ep->qp;
+       struct qp_info *qp;
        struct bnx2i_nop_in_msg *nopin;
        int tgt_async_msg;
+       int cqe_cnt = 0;
 
+       if (bnx2i_conn->ep == NULL)
+               return 0;
+
+       qp = &bnx2i_conn->ep->qp;
+
+       if (!qp->cq_virt) {
+               printk(KERN_ALERT "bnx2i (%s): cq resr freed in bh execution!",
+                       bnx2i_conn->hba->netdev->name);
+               goto out;
+       }
        while (1) {
                nopin = (struct bnx2i_nop_in_msg *) qp->cq_cons_qe;
                if (nopin->cq_req_sn != qp->cqe_exp_seq_sn)
@@ -1873,8 +1993,9 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
                switch (nopin->op_code) {
                case ISCSI_OP_SCSI_CMD_RSP:
                case ISCSI_OP_SCSI_DATA_IN:
-                       bnx2i_process_scsi_cmd_resp(session, bnx2i_conn,
-                                                   qp->cq_cons_qe);
+                       /* Run the kthread engine only for data cmds
+                          All other cmds will be completed in this bh! */
+                       bnx2i_queue_scsi_cmd_resp(session, bnx2i_conn, nopin);
                        break;
                case ISCSI_OP_LOGIN_RSP:
                        bnx2i_process_login_resp(session, bnx2i_conn,
@@ -1918,13 +2039,21 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
                        printk(KERN_ALERT "bnx2i: unknown opcode 0x%x\n",
                                          nopin->op_code);
                }
-               if (!tgt_async_msg)
-                       bnx2i_conn->ep->num_active_cmds--;
+               if (!tgt_async_msg) {
+                       if (!atomic_read(&bnx2i_conn->ep->num_active_cmds))
+                               printk(KERN_ALERT "bnx2i (%s): no active cmd! "
+                                      "op 0x%x\n",
+                                      bnx2i_conn->hba->netdev->name,
+                                      nopin->op_code);
+                       else
+                               atomic_dec(&bnx2i_conn->ep->num_active_cmds);
+               }
 cqe_out:
                /* clear out in production version only, till beta keep opcode
                 * field intact, will be helpful in debugging (context dump)
                 * nopin->op_code = 0;
                 */
+               cqe_cnt++;
                qp->cqe_exp_seq_sn++;
                if (qp->cqe_exp_seq_sn == (qp->cqe_size * 2 + 1))
                        qp->cqe_exp_seq_sn = ISCSI_INITIAL_SN;
@@ -1937,6 +2066,8 @@ cqe_out:
                        qp->cq_cons_idx++;
                }
        }
+out:
+       return cqe_cnt;
 }
 
 /**
@@ -1952,6 +2083,7 @@ static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
 {
        struct bnx2i_conn *bnx2i_conn;
        u32 iscsi_cid;
+       int nxt_idx;
 
        iscsi_cid = new_cqe_kcqe->iscsi_conn_id;
        bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
@@ -1964,9 +2096,12 @@ static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
                printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid);
                return;
        }
+
        bnx2i_process_new_cqes(bnx2i_conn);
-       bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE_FP);
-       bnx2i_process_new_cqes(bnx2i_conn);
+       nxt_idx = bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep,
+                                               CNIC_ARM_CQE_FP);
+       if (nxt_idx && nxt_idx == bnx2i_process_new_cqes(bnx2i_conn))
+               bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE_FP);
 }
 
 
@@ -2312,7 +2447,7 @@ static void bnx2i_process_ofld_cmpl(struct bnx2i_hba *hba,
                        printk(KERN_ALERT "bnx2i (%s): ofld1 cmpl - invalid "
                                "opcode\n", hba->netdev->name);
                else if (ofld_kcqe->completion_status ==
-                       ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY)
+                        ISCSI_KCQE_COMPLETION_STATUS_CID_BUSY)
                        /* error status code valid only for 5771x chipset */
                        ep->state = EP_STATE_OFLD_FAILED_CID_BUSY;
                else
@@ -2517,7 +2652,7 @@ static void bnx2i_cm_remote_abort(struct cnic_sock *cm_sk)
 
 
 static int bnx2i_send_nl_mesg(void *context, u32 msg_type,
-                              char *buf, u16 buflen)
+                             char *buf, u16 buflen)
 {
        struct bnx2i_hba *hba = context;
        int rc;