NVMe: Rename cancel_cmdid_data to cancel_cmdid
[linux-2.6.git] / drivers / block / nvme.c
index 2409227..9ca9db9 100644 (file)
@@ -142,10 +142,10 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
 }
 
 /**
- * alloc_cmdid - Allocate a Command ID
- * @param nvmeq The queue that will be used for this command
- * @param ctx A pointer that will be passed to the handler
- * @param handler The ID of the handler to call
+ * alloc_cmdid() - Allocate a Command ID
+ * @nvmeq: The queue that will be used for this command
+ * @ctx: A pointer that will be passed to the handler
+ * @handler: The ID of the handler to call
  *
  * Allocate a Command ID for a queue.  The data passed in will
  * be passed to the completion handler.  This is implemented by using
@@ -156,7 +156,7 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
 static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler,
                                                        unsigned timeout)
 {
-       int depth = nvmeq->q_depth;
+       int depth = nvmeq->q_depth - 1;
        struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
        int cmdid;
 
@@ -182,7 +182,8 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
        return (cmdid < 0) ? -EINTR : cmdid;
 }
 
-/* If you need more than four handlers, you'll need to change how
+/*
+ * If you need more than four handlers, you'll need to change how
  * alloc_cmdid and nvme_process_cq work.  Consider using a special
  * CMD_CTX value instead, if that works for your situation.
  */
@@ -212,20 +213,18 @@ static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
        return data;
 }
 
-static void cancel_cmdid_data(struct nvme_queue *nvmeq, int cmdid)
+static unsigned long cancel_cmdid(struct nvme_queue *nvmeq, int cmdid)
 {
+       unsigned long data;
        struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
+       data = info[cmdid].ctx;
        info[cmdid].ctx = CMD_CTX_CANCELLED;
+       return data;
 }
 
 static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
 {
-       int qid, cpu = get_cpu();
-       if (cpu < ns->dev->queue_count)
-               qid = cpu + 1;
-       else
-               qid = (cpu % rounddown_pow_of_two(ns->dev->queue_count)) + 1;
-       return ns->dev->queues[qid];
+       return ns->dev->queues[get_cpu() + 1];
 }
 
 static void put_nvmeq(struct nvme_queue *nvmeq)
@@ -234,7 +233,7 @@ static void put_nvmeq(struct nvme_queue *nvmeq)
 }
 
 /**
- * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
  * @cmd: The command to send
  *
@@ -244,7 +243,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
        unsigned long flags;
        u16 tail;
-       /* XXX: Need to check tail isn't going to overrun head */
        spin_lock_irqsave(&nvmeq->q_lock, flags);
        tail = nvmeq->sq_tail;
        memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
@@ -315,9 +313,9 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
        dma_unmap_sg(nvmeq->q_dmadev, nbio->sg, nbio->nents,
                        bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
        free_nbio(nvmeq, nbio);
-       if (status)
+       if (status) {
                bio_endio(bio, -EIO);
-       if (bio->bi_vcnt > bio->bi_idx) {
+       } else if (bio->bi_vcnt > bio->bi_idx) {
                bio_list_add(&nvmeq->sq_cong, bio);
                wake_up_process(nvme_thread);
        } else {
@@ -376,12 +374,13 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_dev *dev,
        cmd->prp2 = cpu_to_le64(prp_dma);
        i = 0;
        for (;;) {
-               if (i == PAGE_SIZE / 8 - 1) {
+               if (i == PAGE_SIZE / 8) {
                        __le64 *old_prp_list = prp_list;
                        prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
                        prps->list[prp_page++] = prp_list;
-                       old_prp_list[i] = cpu_to_le64(prp_dma);
-                       i = 0;
+                       prp_list[0] = old_prp_list[i - 1];
+                       old_prp_list[i - 1] = cpu_to_le64(prp_dma);
+                       i = 1;
                }
                prp_list[i++] = cpu_to_le64(dma_addr);
                dma_len -= PAGE_SIZE;
@@ -529,6 +528,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
        cmnd->rw.control = cpu_to_le16(control);
        cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
+       bio->bi_sector += length >> 9;
+
        if (++nvmeq->sq_tail == nvmeq->q_depth)
                nvmeq->sq_tail = 0;
        writel(nvmeq->sq_tail, nvmeq->q_db);
@@ -669,7 +670,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid)
 {
        spin_lock_irq(&nvmeq->q_lock);
-       cancel_cmdid_data(nvmeq, cmdid);
+       cancel_cmdid(nvmeq, cmdid);
        spin_unlock_irq(&nvmeq->q_lock);
 }
 
@@ -783,8 +784,10 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 static void nvme_free_queue(struct nvme_dev *dev, int qid)
 {
        struct nvme_queue *nvmeq = dev->queues[qid];
+       int vector = dev->entry[nvmeq->cq_vector].vector;
 
-       free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
+       irq_set_affinity_hint(vector, NULL);
+       free_irq(vector, nvmeq);
 
        /* Don't tell the adapter to delete the admin queue */
        if (qid) {
@@ -893,6 +896,8 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
        u32 aqa;
+       u64 cap;
+       unsigned long timeout;
        struct nvme_queue *nvmeq;
 
        dev->dbs = ((void __iomem *)dev->bar) + 4096;
@@ -907,6 +912,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
        dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
        dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
        dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+       dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
        writel(0, &dev->bar->cc);
        writel(aqa, &dev->bar->aqa);
@@ -914,10 +920,18 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
        writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
        writel(dev->ctrl_config, &dev->bar->cc);
 
+       cap = readq(&dev->bar->cap);
+       timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
        while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
                msleep(100);
                if (fatal_signal_pending(current))
                        return -EINTR;
+               if (time_after(jiffies, timeout)) {
+                       dev_err(&dev->pci_dev->dev,
+                               "Device not ready; aborting initialisation\n");
+                       return -ENODEV;
+               }
        }
 
        result = queue_request_irq(dev, nvmeq, "nvme admin");
@@ -1037,44 +1051,52 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
        struct nvme_user_io io;
        struct nvme_command c;
        unsigned length;
-       u32 result;
        int nents, status;
        struct scatterlist *sg;
        struct nvme_prps *prps;
 
        if (copy_from_user(&io, uio, sizeof(io)))
                return -EFAULT;
-       length = io.nblocks << io.block_shift;
-       nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+       length = (io.nblocks + 1) << ns->lba_shift;
+
+       switch (io.opcode) {
+       case nvme_cmd_write:
+       case nvme_cmd_read:
+               nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
+                                                               length, &sg);
+       default:
+               return -EFAULT;
+       }
+
        if (nents < 0)
                return nents;
 
        memset(&c, 0, sizeof(c));
        c.rw.opcode = io.opcode;
        c.rw.flags = io.flags;
-       c.rw.nsid = cpu_to_le32(io.nsid);
+       c.rw.nsid = cpu_to_le32(ns->ns_id);
        c.rw.slba = cpu_to_le64(io.slba);
-       c.rw.length = cpu_to_le16(io.nblocks - 1);
+       c.rw.length = cpu_to_le16(io.nblocks);
        c.rw.control = cpu_to_le16(io.control);
        c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
-       c.rw.reftag = cpu_to_le32(io.reftag);   /* XXX: endian? */
-       c.rw.apptag = cpu_to_le16(io.apptag);
-       c.rw.appmask = cpu_to_le16(io.appmask);
+       c.rw.reftag = io.reftag;
+       c.rw.apptag = io.apptag;
+       c.rw.appmask = io.appmask;
        /* XXX: metadata */
        prps = nvme_setup_prps(dev, &c.common, sg, length);
 
        nvmeq = get_nvmeq(ns);
-       /* Since nvme_submit_sync_cmd sleeps, we can't keep preemption
+       /*
+        * Since nvme_submit_sync_cmd sleeps, we can't keep preemption
         * disabled.  We may be preempted at any point, and be rescheduled
         * to a different CPU.  That will cause cacheline bouncing, but no
         * additional races since q_lock already protects against other CPUs.
         */
        put_nvmeq(nvmeq);
-       status = nvme_submit_sync_cmd(nvmeq, &c, &result, IO_TIMEOUT);
+       status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT);
 
        nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
        nvme_free_prps(dev, prps);
-       put_user(result, &uio->result);
        return status;
 }
 
@@ -1147,6 +1169,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 static const struct block_device_operations nvme_fops = {
        .owner          = THIS_MODULE,
        .ioctl          = nvme_ioctl,
+       .compat_ioctl   = nvme_ioctl,
 };
 
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
@@ -1158,6 +1181,9 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
                        bio_list_add_head(&nvmeq->sq_cong, bio);
                        break;
                }
+               if (bio_list_empty(&nvmeq->sq_cong))
+                       remove_wait_queue(&nvmeq->sq_full,
+                                                       &nvmeq->sq_cong_wait);
        }
 }
 
@@ -1309,6 +1335,11 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
                dev->queue_count++;
        }
 
+       for (; i < num_possible_cpus(); i++) {
+               int target = i % rounddown_pow_of_two(dev->queue_count - 1);
+               dev->queues[i + 1] = dev->queues[target + 1];
+       }
+
        return 0;
 }
 
@@ -1361,7 +1392,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
        crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
        crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
 
-       for (i = 0; i < nn; i++) {
+       for (i = 0; i <= nn; i++) {
                cid.identify.nsid = cpu_to_le32(i);
                res = nvme_submit_admin_cmd(dev, &cid, NULL);
                if (res)
@@ -1615,6 +1646,6 @@ static void __exit nvme_exit(void)
 
 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("0.3");
+MODULE_VERSION("0.5");
 module_init(nvme_init);
 module_exit(nvme_exit);