Merge branch 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvar...
Linus Torvalds [Sun, 4 Oct 2009 21:59:14 +0000 (14:59 -0700)]
* 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging:
  macintosh: Don't assume i2c device probing always succeeds
  i2c: Hide probe errors caused by ACPI resource conflicts
  i2c: Minor documentation update
  mfd: AB3100 drop unused module parameters
  Staging: IIO: tsl2561: Drop unused module parameters
  leds: leds-pca9532 - Drop unused module parameters
  ltc4215/ltc4245: Discard obsolete detect methods
  ds2482: Discard obsolete detect method
  max6875: Discard obsolete detect method
  i2c: Move misc devices documentation

26 files changed:
Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
block/blk-barrier.c
block/blk-core.c
block/blk-merge.c
block/blk-settings.c
block/blk-sysfs.c
block/cfq-iosched.c
block/compat_ioctl.c
block/genhd.c
block/ioctl.c
drivers/block/DAC960.c
drivers/block/cciss.c
drivers/block/cciss.h
drivers/block/cpqarray.c
drivers/md/dm.c
drivers/mtd/mtd_blkdevs.c
drivers/staging/dst/dcore.c
fs/bio.c
fs/partitions/check.c
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/fs.h
include/linux/genhd.h
include/trace/events/block.h
kernel/trace/blktrace.c
mm/swapfile.c

index 0a92a7c..4f29e5f 100644 (file)
@@ -31,3 +31,31 @@ Date:                March 2009
 Kernel Version: 2.6.30
 Contact:       iss_storagedev@hp.com
 Description:   A symbolic link to /sys/block/cciss!cXdY
+
+Where:         /sys/bus/pci/devices/<dev>/ccissX/rescan
+Date:          August 2009
+Kernel Version:        2.6.31
+Contact:       iss_storagedev@hp.com
+Description:   Kicks of a rescan of the controller to discover logical
+               drive topology changes.
+
+Where:         /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+Date:          August 2009
+Kernel Version: 2.6.31
+Contact:       iss_storagedev@hp.com
+Description:   Displays the 8-byte LUN ID used to address logical
+               drive Y of controller X.
+
+Where:         /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+Date:          August 2009
+Kernel Version: 2.6.31
+Contact:       iss_storagedev@hp.com
+Description:   Displays the RAID level of logical drive Y of
+               controller X.
+
+Where:         /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+Date:          August 2009
+Kernel Version: 2.6.31
+Contact:       iss_storagedev@hp.com
+Description:   Displays the usage count (number of opens) of logical drive Y
+               of controller X.
index 6593ab3..8873b9b 100644 (file)
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
        if (bio->bi_private)
                complete(bio->bi_private);
+       __free_page(bio_page(bio));
 
        bio_put(bio);
 }
@@ -372,30 +373,50 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        struct request_queue *q = bdev_get_queue(bdev);
        int type = flags & DISCARD_FL_BARRIER ?
                DISCARD_BARRIER : DISCARD_NOBARRIER;
+       struct bio *bio;
+       struct page *page;
        int ret = 0;
 
        if (!q)
                return -ENXIO;
 
-       if (!q->prepare_discard_fn)
+       if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
 
        while (nr_sects && !ret) {
-               struct bio *bio = bio_alloc(gfp_mask, 0);
-               if (!bio)
-                       return -ENOMEM;
+               unsigned int sector_size = q->limits.logical_block_size;
+               unsigned int max_discard_sectors =
+                       min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
+               bio = bio_alloc(gfp_mask, 1);
+               if (!bio)
+                       goto out;
+               bio->bi_sector = sector;
                bio->bi_end_io = blkdev_discard_end_io;
                bio->bi_bdev = bdev;
                if (flags & DISCARD_FL_WAIT)
                        bio->bi_private = &wait;
 
-               bio->bi_sector = sector;
+               /*
+                * Add a zeroed one-sector payload as that's what
+                * our current implementations need.  If we'll ever need
+                * more the interface will need revisiting.
+                */
+               page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!page)
+                       goto out_free_bio;
+               if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+                       goto out_free_page;
 
-               if (nr_sects > queue_max_hw_sectors(q)) {
-                       bio->bi_size = queue_max_hw_sectors(q) << 9;
-                       nr_sects -= queue_max_hw_sectors(q);
-                       sector += queue_max_hw_sectors(q);
+               /*
+                * And override the bio size - the way discard works we
+                * touch many more blocks on disk than the actual payload
+                * length.
+                */
+               if (nr_sects > max_discard_sectors) {
+                       bio->bi_size = max_discard_sectors << 9;
+                       nr_sects -= max_discard_sectors;
+                       sector += max_discard_sectors;
                } else {
                        bio->bi_size = nr_sects << 9;
                        nr_sects = 0;
@@ -414,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                bio_put(bio);
        }
        return ret;
+out_free_page:
+       __free_page(page);
+out_free_bio:
+       bio_put(bio);
+out:
+       return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
index 8135228..81f3431 100644 (file)
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
@@ -69,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
                part_stat_inc(cpu, part, merges[rw]);
        else {
                part_round_stats(cpu, part);
-               part_inc_in_flight(part, rw);
+               part_inc_in_flight(part);
        }
 
        part_stat_unlock();
@@ -1031,7 +1032,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 
        if (part->in_flight) {
                __part_stat_add(cpu, part, time_in_queue,
-                               part_in_flight(part) * (now - part->stamp));
+                               part->in_flight * (now - part->stamp));
                __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
        }
        part->stamp = now;
@@ -1124,7 +1125,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
                req->cmd_flags |= REQ_DISCARD;
                if (bio_rw_flagged(bio, BIO_RW_BARRIER))
                        req->cmd_flags |= REQ_SOFTBARRIER;
-               req->q->prepare_discard_fn(req->q, req);
        } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
                req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1437,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
                }
 
-               if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+               if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+                            nr_sectors > queue_max_hw_sectors(q))) {
                        printk(KERN_ERR "bio too big device %s (%u > %u)\n",
                               bdevname(bio->bi_bdev, b),
                               bio_sectors(bio),
@@ -1470,7 +1471,7 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
 
                if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-                   !q->prepare_discard_fn) {
+                   !blk_queue_discard(q)) {
                        err = -EOPNOTSUPP;
                        goto end_io;
                }
@@ -1738,7 +1739,7 @@ static void blk_account_io_done(struct request *req)
                part_stat_inc(cpu, part, ios[rw]);
                part_stat_add(cpu, part, ticks[rw], duration);
                part_round_stats(cpu, part);
-               part_dec_in_flight(part, rw);
+               part_dec_in_flight(part);
 
                part_stat_unlock();
        }
@@ -2491,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+                                 struct delayed_work *work,
+                                 unsigned long delay)
+{
+       return queue_delayed_work(kblockd_workqueue, work, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
 int __init blk_dev_init(void)
 {
        BUILD_BUG_ON(__REQ_NR_BITS > 8 *
index 99cb5cf..b0de857 100644 (file)
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
                part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
                part_round_stats(cpu, part);
-               part_dec_in_flight(part, rq_data_dir(req));
+               part_dec_in_flight(part);
 
                part_stat_unlock();
        }
index 83413ff..e0695bc 100644 (file)
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:         queue
- * @dfn:       prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-       q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:         queue
  * @mbfn:      merge_bvec_fn
@@ -111,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->max_hw_segments = MAX_HW_SEGMENTS;
        lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
        lim->max_segment_size = MAX_SEGMENT_SIZE;
-       lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+       lim->max_sectors = BLK_DEF_MAX_SECTORS;
+       lim->max_hw_sectors = INT_MAX;
+       lim->max_discard_sectors = SAFE_MAX_SECTORS;
        lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
        lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
        lim->alignment_offset = 0;
@@ -164,6 +149,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
        q->unplug_timer.data = (unsigned long)q;
 
        blk_set_default_limits(&q->limits);
+       blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
 
        /*
         * If the caller didn't supply a lock, fall back to our embedded
@@ -254,6 +240,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+               unsigned int max_discard_sectors)
+{
+       q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
index b78c9c3..8a6d81a 100644 (file)
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
        if (ret) {
                kobject_uevent(&q->kobj, KOBJ_REMOVE);
                kobject_del(&q->kobj);
+               blk_trace_remove_sysfs(disk_to_dev(disk));
                return ret;
        }
 
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
        if (WARN_ON(!q))
                return;
 
-       if (q->request_fn) {
+       if (q->request_fn)
                elv_unregister_queue(q);
 
-               kobject_uevent(&q->kobj, KOBJ_REMOVE);
-               kobject_del(&q->kobj);
-               kobject_put(&disk_to_dev(disk)->kobj);
-       }
+       kobject_uevent(&q->kobj, KOBJ_REMOVE);
+       kobject_del(&q->kobj);
+       blk_trace_remove_sysfs(disk_to_dev(disk));
+       kobject_put(&disk_to_dev(disk)->kobj);
 }
index 1ca813b..9c4b679 100644 (file)
@@ -150,7 +150,7 @@ struct cfq_data {
         * idle window management
         */
        struct timer_list idle_slice_timer;
-       struct work_struct unplug_work;
+       struct delayed_work unplug_work;
 
        struct cfq_queue *active_queue;
        struct cfq_io_context *active_cic;
@@ -173,6 +173,7 @@ struct cfq_data {
        unsigned int cfq_slice[2];
        unsigned int cfq_slice_async_rq;
        unsigned int cfq_slice_idle;
+       unsigned int cfq_latency;
 
        struct list_head cic_list;
 
@@ -180,6 +181,8 @@ struct cfq_data {
         * Fallback dummy cfqq for extreme OOM conditions
         */
        struct cfq_queue oom_cfqq;
+
+       unsigned long last_end_sync_rq;
 };
 
 enum cfqq_state_flags {
@@ -265,11 +268,13 @@ static inline int cfq_bio_sync(struct bio *bio)
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
+                                        unsigned long delay)
 {
        if (cfqd->busy_queues) {
                cfq_log(cfqd, "schedule dispatch");
-               kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+               kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
+                                               delay);
        }
 }
 
@@ -1326,12 +1331,30 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
                        return 0;
 
                /*
-                * we are the only queue, allow up to 4 times of 'quantum'
+                * Sole queue user, allow bigger slice
                 */
-               if (cfqq->dispatched >= 4 * max_dispatch)
-                       return 0;
+               max_dispatch *= 4;
+       }
+
+       /*
+        * Async queues must wait a bit before being allowed dispatch.
+        * We also ramp up the dispatch depth gradually for async IO,
+        * based on the last sync IO we serviced
+        */
+       if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
+               unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+               unsigned int depth;
+
+               depth = last_sync / cfqd->cfq_slice[1];
+               if (!depth && !cfqq->dispatched)
+                       depth = 1;
+               if (depth < max_dispatch)
+                       max_dispatch = depth;
        }
 
+       if (cfqq->dispatched >= max_dispatch)
+               return 0;
+
        /*
         * Dispatch a request from this cfqq
         */
@@ -1376,7 +1399,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
        if (unlikely(cfqd->active_queue == cfqq)) {
                __cfq_slice_expired(cfqd, cfqq, 0);
-               cfq_schedule_dispatch(cfqd);
+               cfq_schedule_dispatch(cfqd, 0);
        }
 
        kmem_cache_free(cfq_pool, cfqq);
@@ -1471,7 +1494,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        if (unlikely(cfqq == cfqd->active_queue)) {
                __cfq_slice_expired(cfqd, cfqq, 0);
-               cfq_schedule_dispatch(cfqd);
+               cfq_schedule_dispatch(cfqd, 0);
        }
 
        cfq_put_queue(cfqq);
@@ -1951,7 +1974,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
 
        if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-           (cfqd->hw_tag && CIC_SEEKY(cic)))
+           (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
                enable_idle = 0;
        else if (sample_valid(cic->ttime_samples)) {
                if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -2157,8 +2180,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        if (cfq_cfqq_sync(cfqq))
                cfqd->sync_flight--;
 
-       if (sync)
+       if (sync) {
                RQ_CIC(rq)->last_end_request = now;
+               cfqd->last_end_sync_rq = now;
+       }
 
        /*
         * If this is the active queue, check if it needs to be expired,
@@ -2186,7 +2211,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        }
 
        if (!rq_in_driver(cfqd))
-               cfq_schedule_dispatch(cfqd);
+               cfq_schedule_dispatch(cfqd, 0);
 }
 
 /*
@@ -2316,7 +2341,7 @@ queue_fail:
        if (cic)
                put_io_context(cic->ioc);
 
-       cfq_schedule_dispatch(cfqd);
+       cfq_schedule_dispatch(cfqd, 0);
        spin_unlock_irqrestore(q->queue_lock, flags);
        cfq_log(cfqd, "set_request fail");
        return 1;
@@ -2325,7 +2350,7 @@ queue_fail:
 static void cfq_kick_queue(struct work_struct *work)
 {
        struct cfq_data *cfqd =
-               container_of(work, struct cfq_data, unplug_work);
+               container_of(work, struct cfq_data, unplug_work.work);
        struct request_queue *q = cfqd->queue;
 
        spin_lock_irq(q->queue_lock);
@@ -2379,7 +2404,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 expire:
        cfq_slice_expired(cfqd, timed_out);
 out_kick:
-       cfq_schedule_dispatch(cfqd);
+       cfq_schedule_dispatch(cfqd, 0);
 out_cont:
        spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2387,7 +2412,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
        del_timer_sync(&cfqd->idle_slice_timer);
-       cancel_work_sync(&cfqd->unplug_work);
+       cancel_delayed_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2469,7 +2494,7 @@ static void *cfq_init_queue(struct request_queue *q)
        cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
        cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-       INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
+       INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
        cfqd->cfq_quantum = cfq_quantum;
        cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
@@ -2480,8 +2505,9 @@ static void *cfq_init_queue(struct request_queue *q)
        cfqd->cfq_slice[1] = cfq_slice_sync;
        cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
        cfqd->cfq_slice_idle = cfq_slice_idle;
+       cfqd->cfq_latency = 1;
        cfqd->hw_tag = 1;
-
+       cfqd->last_end_sync_rq = jiffies;
        return cfqd;
 }
 
@@ -2549,6 +2575,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                        \
@@ -2580,6 +2607,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
                UINT_MAX, 0);
+STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2595,6 +2623,7 @@ static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(slice_async),
        CFQ_ATTR(slice_async_rq),
        CFQ_ATTR(slice_idle),
+       CFQ_ATTR(low_latency),
        __ATTR_NULL
 };
 
index 7865a34..9bd086c 100644 (file)
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
        return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
 
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+       return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
 static int compat_put_long(unsigned long arg, long val)
 {
        return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
        switch (cmd) {
        case HDIO_GETGEO:
                return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+       case BLKPBSZGET:
+               return compat_put_uint(arg, bdev_physical_block_size(bdev));
+       case BLKIOMIN:
+               return compat_put_uint(arg, bdev_io_min(bdev));
+       case BLKIOOPT:
+               return compat_put_uint(arg, bdev_io_opt(bdev));
+       case BLKALIGNOFF:
+               return compat_put_int(arg, bdev_alignment_offset(bdev));
        case BLKFLSBUF:
        case BLKROSET:
        case BLKDISCARD:
index 517e433..5a0861d 100644 (file)
@@ -869,7 +869,6 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -889,7 +888,6 @@ static struct attribute *disk_attrs[] = {
        &dev_attr_alignment_offset.attr,
        &dev_attr_capability.attr,
        &dev_attr_stat.attr,
-       &dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        &dev_attr_fail.attr,
 #endif
@@ -1055,7 +1053,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           part_stat_read(hd, merges[1]),
                           (unsigned long long)part_stat_read(hd, sectors[1]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-                          part_in_flight(hd),
+                          hd->in_flight,
                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
                        );
index d3e6b58..1f4d1de 100644 (file)
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
        return put_user(val, (int __user *)arg);
 }
 
+static int put_uint(unsigned long arg, unsigned int val)
+{
+       return put_user(val, (unsigned int __user *)arg);
+}
+
 static int put_long(unsigned long arg, long val)
 {
        return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
        case BLKROGET:
                return put_int(arg, bdev_read_only(bdev) != 0);
-       case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+       case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
                return put_int(arg, block_size(bdev));
-       case BLKSSZGET: /* get block device hardware sector size */
+       case BLKSSZGET: /* get block device logical block size */
                return put_int(arg, bdev_logical_block_size(bdev));
+       case BLKPBSZGET: /* get block device physical block size */
+               return put_uint(arg, bdev_physical_block_size(bdev));
+       case BLKIOMIN:
+               return put_uint(arg, bdev_io_min(bdev));
+       case BLKIOOPT:
+               return put_uint(arg, bdev_io_opt(bdev));
+       case BLKALIGNOFF:
+               return put_int(arg, bdev_alignment_offset(bdev));
        case BLKSECTGET:
                return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
        case BLKRASET:
index 6fa7b0f..eb4fa19 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/reboot.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -6422,16 +6423,10 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller,
   return true;
 }
 
-
-/*
-  DAC960_ProcReadStatus implements reading /proc/rd/status.
-*/
-
-static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset,
-                                int Count, int *EOF, void *Data)
+static int dac960_proc_show(struct seq_file *m, void *v)
 {
   unsigned char *StatusMessage = "OK\n";
-  int ControllerNumber, BytesAvailable;
+  int ControllerNumber;
   for (ControllerNumber = 0;
        ControllerNumber < DAC960_ControllerCount;
        ControllerNumber++)
@@ -6444,52 +6439,49 @@ static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset,
          break;
        }
     }
-  BytesAvailable = strlen(StatusMessage) - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &StatusMessage[Offset], Count);
-  return Count;
+  seq_puts(m, StatusMessage);
+  return 0;
 }
 
+static int dac960_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, dac960_proc_show, NULL);
+}
 
-/*
-  DAC960_ProcReadInitialStatus implements reading /proc/rd/cN/initial_status.
-*/
+static const struct file_operations dac960_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = dac960_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 
-static int DAC960_ProcReadInitialStatus(char *Page, char **Start, off_t Offset,
-                                       int Count, int *EOF, void *Data)
+static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
-  int BytesAvailable = Controller->InitialStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->CombinedStatusBuffer[Offset], Count);
-  return Count;
+       DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
+       seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer);
+       return 0;
 }
 
+static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data);
+}
 
-/*
-  DAC960_ProcReadCurrentStatus implements reading /proc/rd/cN/current_status.
-*/
+static const struct file_operations dac960_initial_status_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = dac960_initial_status_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 
-static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset,
-                                       int Count, int *EOF, void *Data)
+static int dac960_current_status_proc_show(struct seq_file *m, void *v)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private;
   unsigned char *StatusMessage =
     "No Rebuild or Consistency Check in Progress\n";
   int ProgressMessageLength = strlen(StatusMessage);
-  int BytesAvailable;
   if (jiffies != Controller->LastCurrentStatusTime)
     {
       Controller->CurrentStatusLength = 0;
@@ -6513,49 +6505,41 @@ static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset,
        }
       Controller->LastCurrentStatusTime = jiffies;
     }
-  BytesAvailable = Controller->CurrentStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->CurrentStatusBuffer[Offset], Count);
-  return Count;
+       seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer);
+       return 0;
 }
 
+static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, dac960_current_status_proc_show, PDE(inode)->data);
+}
 
-/*
-  DAC960_ProcReadUserCommand implements reading /proc/rd/cN/user_command.
-*/
+static const struct file_operations dac960_current_status_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = dac960_current_status_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 
-static int DAC960_ProcReadUserCommand(char *Page, char **Start, off_t Offset,
-                                     int Count, int *EOF, void *Data)
+static int dac960_user_command_proc_show(struct seq_file *m, void *v)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
-  int BytesAvailable = Controller->UserStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->UserStatusBuffer[Offset], Count);
-  return Count;
-}
+       DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
 
+       seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer);
+       return 0;
+}
 
-/*
-  DAC960_ProcWriteUserCommand implements writing /proc/rd/cN/user_command.
-*/
+static int dac960_user_command_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, dac960_user_command_proc_show, PDE(inode)->data);
+}
 
-static int DAC960_ProcWriteUserCommand(struct file *file,
+static ssize_t dac960_user_command_proc_write(struct file *file,
                                       const char __user *Buffer,
-                                      unsigned long Count, void *Data)
+                                      size_t Count, loff_t *pos)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data;
   unsigned char CommandBuffer[80];
   int Length;
   if (Count > sizeof(CommandBuffer)-1) return -EINVAL;
@@ -6572,6 +6556,14 @@ static int DAC960_ProcWriteUserCommand(struct file *file,
            ? Count : -EBUSY);
 }
 
+static const struct file_operations dac960_user_command_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = dac960_user_command_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .write          = dac960_user_command_proc_write,
+};
 
 /*
   DAC960_CreateProcEntries creates the /proc/rd/... entries for the
@@ -6586,23 +6578,17 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
 
        if (DAC960_ProcDirectoryEntry == NULL) {
                DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
-               StatusProcEntry = create_proc_read_entry("status", 0,
+               StatusProcEntry = proc_create("status", 0,
                                           DAC960_ProcDirectoryEntry,
-                                          DAC960_ProcReadStatus, NULL);
+                                          &dac960_proc_fops);
        }
 
       sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
       ControllerProcEntry = proc_mkdir(Controller->ControllerName,
                                       DAC960_ProcDirectoryEntry);
-      create_proc_read_entry("initial_status", 0, ControllerProcEntry,
-                            DAC960_ProcReadInitialStatus, Controller);
-      create_proc_read_entry("current_status", 0, ControllerProcEntry,
-                            DAC960_ProcReadCurrentStatus, Controller);
-      UserCommandProcEntry =
-       create_proc_read_entry("user_command", S_IWUSR | S_IRUSR,
-                              ControllerProcEntry, DAC960_ProcReadUserCommand,
-                              Controller);
-      UserCommandProcEntry->write_proc = DAC960_ProcWriteUserCommand;
+      proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
+      proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
+      UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
       Controller->ControllerProcEntry = ControllerProcEntry;
 }
 
index 1ece0b4..fb5be2d 100644 (file)
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
@@ -155,6 +157,10 @@ static struct board_type products[] = {
 
 static ctlr_info_t *hba[MAX_CTLR];
 
+static struct task_struct *cciss_scan_thread;
+static DEFINE_MUTEX(scan_mutex);
+static LIST_HEAD(scan_q);
+
 static void do_cciss_request(struct request_queue *q);
 static irqreturn_t do_cciss_intr(int irq, void *dev_id);
 static int cciss_open(struct block_device *bdev, fmode_t mode);
@@ -164,9 +170,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static int cciss_revalidate(struct gendisk *disk);
-static int rebuild_lun_table(ctlr_info_t *h, int first_time);
+static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
 static int deregister_disk(ctlr_info_t *h, int drv_index,
-                          int clear_all);
+                          int clear_all, int via_ioctl);
 
 static void cciss_read_capacity(int ctlr, int logvol, int withirq,
                        sector_t *total_size, unsigned int *block_size);
@@ -189,8 +195,13 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
 static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int add_to_scan_list(struct ctlr_info *h);
 static int scan_thread(void *data);
 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
+static void cciss_hba_release(struct device *dev);
+static void cciss_device_release(struct device *dev);
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -245,7 +256,10 @@ static inline void removeQ(CommandList_struct *c)
 
 #include "cciss_scsi.c"                /* For SCSI tape support */
 
-#define RAID_UNKNOWN 6
+static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
+       "UNKNOWN"
+};
+#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1)
 
 #ifdef CONFIG_PROC_FS
 
@@ -255,9 +269,6 @@ static inline void removeQ(CommandList_struct *c)
 #define ENG_GIG 1000000000
 #define ENG_GIG_FACTOR (ENG_GIG/512)
 #define ENGAGE_SCSI    "engage scsi"
-static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
-       "UNKNOWN"
-};
 
 static struct proc_dir_entry *proc_cciss;
 
@@ -318,7 +329,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v)
        ctlr_info_t *h = seq->private;
        unsigned ctlr = h->ctlr;
        loff_t *pos = v;
-       drive_info_struct *drv = &h->drv[*pos];
+       drive_info_struct *drv = h->drv[*pos];
 
        if (*pos > h->highest_lun)
                return 0;
@@ -331,7 +342,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v)
        vol_sz_frac *= 100;
        sector_div(vol_sz_frac, ENG_GIG_FACTOR);
 
-       if (drv->raid_level > 5)
+       if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN)
                drv->raid_level = RAID_UNKNOWN;
        seq_printf(seq, "cciss/c%dd%d:"
                        "\t%4u.%02uGB\tRAID %s\n",
@@ -454,9 +465,19 @@ static void __devinit cciss_procinit(int i)
 #define to_hba(n) container_of(n, struct ctlr_info, dev)
 #define to_drv(n) container_of(n, drive_info_struct, dev)
 
-static struct device_type cciss_host_type = {
-       .name           = "cciss_host",
-};
+static ssize_t host_store_rescan(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t count)
+{
+       struct ctlr_info *h = to_hba(dev);
+
+       add_to_scan_list(h);
+       wake_up_process(cciss_scan_thread);
+       wait_for_completion_interruptible(&h->scan_wait);
+
+       return count;
+}
+DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
 
 static ssize_t dev_show_unique_id(struct device *dev,
                                 struct device_attribute *attr,
@@ -560,11 +581,101 @@ static ssize_t dev_show_rev(struct device *dev,
 }
 DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
 
+static ssize_t cciss_show_lunid(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       drive_info_struct *drv = to_drv(dev);
+       struct ctlr_info *h = to_hba(drv->dev.parent);
+       unsigned long flags;
+       unsigned char lunid[8];
+
+       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       if (h->busy_configuring) {
+               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               return -EBUSY;
+       }
+       if (!drv->heads) {
+               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               return -ENOTTY;
+       }
+       memcpy(lunid, drv->LunID, sizeof(lunid));
+       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+               lunid[0], lunid[1], lunid[2], lunid[3],
+               lunid[4], lunid[5], lunid[6], lunid[7]);
+}
+DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL);
+
+static ssize_t cciss_show_raid_level(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       drive_info_struct *drv = to_drv(dev);
+       struct ctlr_info *h = to_hba(drv->dev.parent);
+       int raid;
+       unsigned long flags;
+
+       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       if (h->busy_configuring) {
+               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               return -EBUSY;
+       }
+       raid = drv->raid_level;
+       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       if (raid < 0 || raid > RAID_UNKNOWN)
+               raid = RAID_UNKNOWN;
+
+       return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n",
+                       raid_label[raid]);
+}
+DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL);
+
+static ssize_t cciss_show_usage_count(struct device *dev,
+                                     struct device_attribute *attr, char *buf)
+{
+       drive_info_struct *drv = to_drv(dev);
+       struct ctlr_info *h = to_hba(drv->dev.parent);
+       unsigned long flags;
+       int count;
+
+       spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+       if (h->busy_configuring) {
+               spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+               return -EBUSY;
+       }
+       count = drv->usage_count;
+       spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+       return snprintf(buf, 20, "%d\n", count);
+}
+DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
+
+static struct attribute *cciss_host_attrs[] = {
+       &dev_attr_rescan.attr,
+       NULL
+};
+
+static struct attribute_group cciss_host_attr_group = {
+       .attrs = cciss_host_attrs,
+};
+
+static const struct attribute_group *cciss_host_attr_groups[] = {
+       &cciss_host_attr_group,
+       NULL
+};
+
+static struct device_type cciss_host_type = {
+       .name           = "cciss_host",
+       .groups         = cciss_host_attr_groups,
+       .release        = cciss_hba_release,
+};
+
 static struct attribute *cciss_dev_attrs[] = {
        &dev_attr_unique_id.attr,
        &dev_attr_model.attr,
        &dev_attr_vendor.attr,
        &dev_attr_rev.attr,
+       &dev_attr_lunid.attr,
+       &dev_attr_raid_level.attr,
+       &dev_attr_usage_count.attr,
        NULL
 };
 
@@ -580,12 +691,24 @@ static const struct attribute_group *cciss_dev_attr_groups[] = {
 static struct device_type cciss_dev_type = {
        .name           = "cciss_device",
        .groups         = cciss_dev_attr_groups,
+       .release        = cciss_device_release,
 };
 
 static struct bus_type cciss_bus_type = {
        .name           = "cciss",
 };
 
+/*
+ * cciss_hba_release is called when the reference count
+ * of h->dev goes to zero.
+ */
+static void cciss_hba_release(struct device *dev)
+{
+       /*
+        * nothing to do, but need this to avoid a warning
+        * about not having a release handler from lib/kref.c.
+        */
+}
 
 /*
  * Initialize sysfs entry for each controller.  This sets up and registers
@@ -609,6 +732,16 @@ static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
 static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
 {
        device_del(&h->dev);
+       put_device(&h->dev); /* final put. */
+}
+
+/* cciss_device_release is called when the reference count
+ * of h->drv[x]dev goes to zero.
+ */
+static void cciss_device_release(struct device *dev)
+{
+       drive_info_struct *drv = to_drv(dev);
+       kfree(drv);
 }
 
 /*
@@ -617,24 +750,39 @@ static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
  * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
  * /sys/block/cciss!c#d# to this entry.
  */
-static int cciss_create_ld_sysfs_entry(struct ctlr_info *h,
-                                      drive_info_struct *drv,
+static long cciss_create_ld_sysfs_entry(struct ctlr_info *h,
                                       int drv_index)
 {
-       device_initialize(&drv->dev);
-       drv->dev.type = &cciss_dev_type;
-       drv->dev.bus = &cciss_bus_type;
-       dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index);
-       drv->dev.parent = &h->dev;
-       return device_add(&drv->dev);
+       struct device *dev;
+
+       if (h->drv[drv_index]->device_initialized)
+               return 0;
+
+       dev = &h->drv[drv_index]->dev;
+       device_initialize(dev);
+       dev->type = &cciss_dev_type;
+       dev->bus = &cciss_bus_type;
+       dev_set_name(dev, "c%dd%d", h->ctlr, drv_index);
+       dev->parent = &h->dev;
+       h->drv[drv_index]->device_initialized = 1;
+       return device_add(dev);
 }
 
 /*
  * Remove sysfs entries for a logical drive.
  */
-static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv)
+static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index,
+       int ctlr_exiting)
 {
-       device_del(&drv->dev);
+       struct device *dev = &h->drv[drv_index]->dev;
+
+       /* special case for c*d0, we only destroy it on controller exit */
+       if (drv_index == 0 && !ctlr_exiting)
+               return;
+
+       device_del(dev);
+       put_device(dev); /* the "final" put. */
+       h->drv[drv_index] = NULL;
 }
 
 /*
@@ -751,7 +899,7 @@ static int cciss_open(struct block_device *bdev, fmode_t mode)
        printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
 #endif                         /* CCISS_DEBUG */
 
-       if (host->busy_initializing || drv->busy_configuring)
+       if (drv->busy_configuring)
                return -EBUSY;
        /*
         * Root is allowed to open raw volume zero even if it's not configured
@@ -767,7 +915,8 @@ static int cciss_open(struct block_device *bdev, fmode_t mode)
                        if (MINOR(bdev->bd_dev) & 0x0f) {
                                return -ENXIO;
                                /* if it is, make sure we have a LUN ID */
-                       } else if (drv->LunID == 0) {
+                       } else if (memcmp(drv->LunID, CTLR_LUNID,
+                               sizeof(drv->LunID))) {
                                return -ENXIO;
                        }
                }
@@ -1132,12 +1281,13 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
        case CCISS_DEREGDISK:
        case CCISS_REGNEWD:
        case CCISS_REVALIDVOLS:
-               return rebuild_lun_table(host, 0);
+               return rebuild_lun_table(host, 0, 1);
 
        case CCISS_GETLUNINFO:{
                        LogvolInfo_struct luninfo;
 
-                       luninfo.LunID = drv->LunID;
+                       memcpy(&luninfo.LunID, drv->LunID,
+                               sizeof(luninfo.LunID));
                        luninfo.num_opens = drv->usage_count;
                        luninfo.num_parts = 0;
                        if (copy_to_user(argp, &luninfo,
@@ -1475,7 +1625,10 @@ static void cciss_check_queues(ctlr_info_t *h)
                /* make sure the disk has been added and the drive is real
                 * because this can be called from the middle of init_one.
                 */
-               if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
+               if (!h->drv[curr_queue])
+                       continue;
+               if (!(h->drv[curr_queue]->queue) ||
+                       !(h->drv[curr_queue]->heads))
                        continue;
                blk_start_queue(h->gendisk[curr_queue]->queue);
 
@@ -1532,13 +1685,11 @@ static void cciss_softirq_done(struct request *rq)
        spin_unlock_irqrestore(&h->lock, flags);
 }
 
-static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[],
-       uint32_t log_unit)
+static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
+       unsigned char scsi3addr[], uint32_t log_unit)
 {
-       log_unit = h->drv[log_unit].LunID & 0x03fff;
-       memset(&scsi3addr[4], 0, 4);
-       memcpy(&scsi3addr[0], &log_unit, 4);
-       scsi3addr[3] |= 0x40;
+       memcpy(scsi3addr, h->drv[log_unit]->LunID,
+               sizeof(h->drv[log_unit]->LunID));
 }
 
 /* This function gets the SCSI vendor, model, and revision of a logical drive
@@ -1615,16 +1766,23 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
        return;
 }
 
-static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
+/*
+ * cciss_add_disk sets up the block device queue for a logical drive
+ */
+static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
                                int drv_index)
 {
        disk->queue = blk_init_queue(do_cciss_request, &h->lock);
+       if (!disk->queue)
+               goto init_queue_failure;
        sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
        disk->major = h->major;
        disk->first_minor = drv_index << NWD_SHIFT;
        disk->fops = &cciss_fops;
-       disk->private_data = &h->drv[drv_index];
-       disk->driverfs_dev = &h->drv[drv_index].dev;
+       if (cciss_create_ld_sysfs_entry(h, drv_index))
+               goto cleanup_queue;
+       disk->private_data = h->drv[drv_index];
+       disk->driverfs_dev = &h->drv[drv_index]->dev;
 
        /* Set up queue information */
        blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
@@ -1642,14 +1800,21 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
        disk->queue->queuedata = h;
 
        blk_queue_logical_block_size(disk->queue,
-                                    h->drv[drv_index].block_size);
+                                    h->drv[drv_index]->block_size);
 
        /* Make sure all queue data is written out before */
-       /* setting h->drv[drv_index].queue, as setting this */
+       /* setting h->drv[drv_index]->queue, as setting this */
        /* allows the interrupt handler to start the queue */
        wmb();
-       h->drv[drv_index].queue = disk->queue;
+       h->drv[drv_index]->queue = disk->queue;
        add_disk(disk);
+       return 0;
+
+cleanup_queue:
+       blk_cleanup_queue(disk->queue);
+       disk->queue = NULL;
+init_queue_failure:
+       return -1;
 }
 
 /* This function will check the usage_count of the drive to be updated/added.
@@ -1662,7 +1827,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
  * is also the controller node.  Any changes to disk 0 will show up on
  * the next reboot.
  */
-static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
+static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
+       int via_ioctl)
 {
        ctlr_info_t *h = hba[ctlr];
        struct gendisk *disk;
@@ -1672,21 +1838,13 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
        unsigned long flags = 0;
        int ret = 0;
        drive_info_struct *drvinfo;
-       int was_only_controller_node;
 
        /* Get information about the disk and modify the driver structure */
        inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
-       drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
+       drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL);
        if (inq_buff == NULL || drvinfo == NULL)
                goto mem_msg;
 
-       /* See if we're trying to update the "controller node"
-        * this will happen the when the first logical drive gets
-        * created by ACU.
-        */
-       was_only_controller_node = (drv_index == 0 &&
-                               h->drv[0].raid_level == -1);
-
        /* testing to see if 16-byte CDBs are already being used */
        if (h->cciss_read == CCISS_READ_16) {
                cciss_read_capacity_16(h->ctlr, drv_index, 1,
@@ -1719,16 +1877,19 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
                                drvinfo->model, drvinfo->rev);
        cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
                        sizeof(drvinfo->serial_no));
+       /* Save the lunid in case we deregister the disk, below. */
+       memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
+               sizeof(drvinfo->LunID));
 
        /* Is it the same disk we already know, and nothing's changed? */
-       if (h->drv[drv_index].raid_level != -1 &&
+       if (h->drv[drv_index]->raid_level != -1 &&
                ((memcmp(drvinfo->serial_no,
-                               h->drv[drv_index].serial_no, 16) == 0) &&
-               drvinfo->block_size == h->drv[drv_index].block_size &&
-               drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
-               drvinfo->heads == h->drv[drv_index].heads &&
-               drvinfo->sectors == h->drv[drv_index].sectors &&
-               drvinfo->cylinders == h->drv[drv_index].cylinders))
+                               h->drv[drv_index]->serial_no, 16) == 0) &&
+               drvinfo->block_size == h->drv[drv_index]->block_size &&
+               drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks &&
+               drvinfo->heads == h->drv[drv_index]->heads &&
+               drvinfo->sectors == h->drv[drv_index]->sectors &&
+               drvinfo->cylinders == h->drv[drv_index]->cylinders))
                        /* The disk is unchanged, nothing to update */
                        goto freeret;
 
@@ -1738,18 +1899,17 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
         * If the disk already exists then deregister it before proceeding
         * (unless it's the first disk (for the controller node).
         */
-       if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
+       if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
                printk(KERN_WARNING "disk %d has changed.\n", drv_index);
                spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-               h->drv[drv_index].busy_configuring = 1;
+               h->drv[drv_index]->busy_configuring = 1;
                spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 
-               /* deregister_disk sets h->drv[drv_index].queue = NULL
+               /* deregister_disk sets h->drv[drv_index]->queue = NULL
                 * which keeps the interrupt handler from starting
                 * the queue.
                 */
-               ret = deregister_disk(h, drv_index, 0);
-               h->drv[drv_index].busy_configuring = 0;
+               ret = deregister_disk(h, drv_index, 0, via_ioctl);
        }
 
        /* If the disk is in use return */
@@ -1757,22 +1917,31 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
                goto freeret;
 
        /* Save the new information from cciss_geometry_inquiry
-        * and serial number inquiry.
+        * and serial number inquiry.  If the disk was deregistered
+        * above, then h->drv[drv_index] will be NULL.
         */
-       h->drv[drv_index].block_size = drvinfo->block_size;
-       h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
-       h->drv[drv_index].heads = drvinfo->heads;
-       h->drv[drv_index].sectors = drvinfo->sectors;
-       h->drv[drv_index].cylinders = drvinfo->cylinders;
-       h->drv[drv_index].raid_level = drvinfo->raid_level;
-       memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
-       memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1);
-       memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1);
-       memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1);
+       if (h->drv[drv_index] == NULL) {
+               drvinfo->device_initialized = 0;
+               h->drv[drv_index] = drvinfo;
+               drvinfo = NULL; /* so it won't be freed below. */
+       } else {
+               /* special case for cxd0 */
+               h->drv[drv_index]->block_size = drvinfo->block_size;
+               h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks;
+               h->drv[drv_index]->heads = drvinfo->heads;
+               h->drv[drv_index]->sectors = drvinfo->sectors;
+               h->drv[drv_index]->cylinders = drvinfo->cylinders;
+               h->drv[drv_index]->raid_level = drvinfo->raid_level;
+               memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16);
+               memcpy(h->drv[drv_index]->vendor, drvinfo->vendor,
+                       VENDOR_LEN + 1);
+               memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1);
+               memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1);
+       }
 
        ++h->num_luns;
        disk = h->gendisk[drv_index];
-       set_capacity(disk, h->drv[drv_index].nr_blocks);
+       set_capacity(disk, h->drv[drv_index]->nr_blocks);
 
        /* If it's not disk 0 (drv_index != 0)
         * or if it was disk 0, but there was previously
@@ -1780,8 +1949,15 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
         * (raid_leve == -1) then we want to update the
         * logical drive's information.
         */
-       if (drv_index || first_time)
-               cciss_add_disk(h, disk, drv_index);
+       if (drv_index || first_time) {
+               if (cciss_add_disk(h, disk, drv_index) != 0) {
+                       cciss_free_gendisk(h, drv_index);
+                       cciss_free_drive_info(h, drv_index);
+                       printk(KERN_WARNING "cciss:%d could not update "
+                               "disk %d\n", h->ctlr, drv_index);
+                       --h->num_luns;
+               }
+       }
 
 freeret:
        kfree(inq_buff);
@@ -1793,28 +1969,70 @@ mem_msg:
 }
 
 /* This function will find the first index of the controllers drive array
- * that has a -1 for the raid_level and will return that index.  This is
- * where new drives will be added.  If the index to be returned is greater
- * than the highest_lun index for the controller then highest_lun is set
- * to this new index.  If there are no available indexes then -1 is returned.
- * "controller_node" is used to know if this is a real logical drive, or just
- * the controller node, which determines if this counts towards highest_lun.
+ * that has a null drv pointer and allocate the drive info struct and
+ * will return that index   This is where new drives will be added.
+ * If the index to be returned is greater than the highest_lun index for
+ * the controller then highest_lun is set * to this new index.
+ * If there are no available indexes or if tha allocation fails, then -1
+ * is returned.  * "controller_node" is used to know if this is a real
+ * logical drive, or just the controller node, which determines if this
+ * counts towards highest_lun.
  */
-static int cciss_find_free_drive_index(int ctlr, int controller_node)
+static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node)
 {
        int i;
+       drive_info_struct *drv;
 
+       /* Search for an empty slot for our drive info */
        for (i = 0; i < CISS_MAX_LUN; i++) {
-               if (hba[ctlr]->drv[i].raid_level == -1) {
-                       if (i > hba[ctlr]->highest_lun)
-                               if (!controller_node)
-                                       hba[ctlr]->highest_lun = i;
+
+               /* if not cxd0 case, and it's occupied, skip it. */
+               if (h->drv[i] && i != 0)
+                       continue;
+               /*
+                * If it's cxd0 case, and drv is alloc'ed already, and a
+                * disk is configured there, skip it.
+                */
+               if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1)
+                       continue;
+
+               /*
+                * We've found an empty slot.  Update highest_lun
+                * provided this isn't just the fake cxd0 controller node.
+                */
+               if (i > h->highest_lun && !controller_node)
+                       h->highest_lun = i;
+
+               /* If adding a real disk at cxd0, and it's already alloc'ed */
+               if (i == 0 && h->drv[i] != NULL)
                        return i;
-               }
+
+               /*
+                * Found an empty slot, not already alloc'ed.  Allocate it.
+                * Mark it with raid_level == -1, so we know it's new later on.
+                */
+               drv = kzalloc(sizeof(*drv), GFP_KERNEL);
+               if (!drv)
+                       return -1;
+               drv->raid_level = -1; /* so we know it's new */
+               h->drv[i] = drv;
+               return i;
        }
        return -1;
 }
 
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index)
+{
+       kfree(h->drv[drv_index]);
+       h->drv[drv_index] = NULL;
+}
+
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index)
+{
+       put_disk(h->gendisk[drv_index]);
+       h->gendisk[drv_index] = NULL;
+}
+
 /* cciss_add_gendisk finds a free hba[]->drv structure
  * and allocates a gendisk if needed, and sets the lunid
  * in the drvinfo structure.   It returns the index into
@@ -1824,13 +2042,15 @@ static int cciss_find_free_drive_index(int ctlr, int controller_node)
  * a means to talk to the controller in case no logical
  * drives have yet been configured.
  */
-static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
+static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[],
+       int controller_node)
 {
        int drv_index;
 
-       drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
+       drv_index = cciss_alloc_drive_info(h, controller_node);
        if (drv_index == -1)
                return -1;
+
        /*Check if the gendisk needs to be allocated */
        if (!h->gendisk[drv_index]) {
                h->gendisk[drv_index] =
@@ -1839,23 +2059,24 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
                        printk(KERN_ERR "cciss%d: could not "
                                "allocate a new disk %d\n",
                                h->ctlr, drv_index);
-                       return -1;
+                       goto err_free_drive_info;
                }
        }
-       h->drv[drv_index].LunID = lunid;
-       if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index))
+       memcpy(h->drv[drv_index]->LunID, lunid,
+               sizeof(h->drv[drv_index]->LunID));
+       if (cciss_create_ld_sysfs_entry(h, drv_index))
                goto err_free_disk;
-
        /* Don't need to mark this busy because nobody */
        /* else knows about this disk yet to contend */
        /* for access to it. */
-       h->drv[drv_index].busy_configuring = 0;
+       h->drv[drv_index]->busy_configuring = 0;
        wmb();
        return drv_index;
 
 err_free_disk:
-       put_disk(h->gendisk[drv_index]);
-       h->gendisk[drv_index] = NULL;
+       cciss_free_gendisk(h, drv_index);
+err_free_drive_info:
+       cciss_free_drive_info(h, drv_index);
        return -1;
 }
 
@@ -1872,21 +2093,25 @@ static void cciss_add_controller_node(ctlr_info_t *h)
        if (h->gendisk[0] != NULL) /* already did this? Then bail. */
                return;
 
-       drv_index = cciss_add_gendisk(h, 0, 1);
-       if (drv_index == -1) {
-               printk(KERN_WARNING "cciss%d: could not "
-                       "add disk 0.\n", h->ctlr);
-               return;
-       }
-       h->drv[drv_index].block_size = 512;
-       h->drv[drv_index].nr_blocks = 0;
-       h->drv[drv_index].heads = 0;
-       h->drv[drv_index].sectors = 0;
-       h->drv[drv_index].cylinders = 0;
-       h->drv[drv_index].raid_level = -1;
-       memset(h->drv[drv_index].serial_no, 0, 16);
+       drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1);
+       if (drv_index == -1)
+               goto error;
+       h->drv[drv_index]->block_size = 512;
+       h->drv[drv_index]->nr_blocks = 0;
+       h->drv[drv_index]->heads = 0;
+       h->drv[drv_index]->sectors = 0;
+       h->drv[drv_index]->cylinders = 0;
+       h->drv[drv_index]->raid_level = -1;
+       memset(h->drv[drv_index]->serial_no, 0, 16);
        disk = h->gendisk[drv_index];
-       cciss_add_disk(h, disk, drv_index);
+       if (cciss_add_disk(h, disk, drv_index) == 0)
+               return;
+       cciss_free_gendisk(h, drv_index);
+       cciss_free_drive_info(h, drv_index);
+error:
+       printk(KERN_WARNING "cciss%d: could not "
+               "add disk 0.\n", h->ctlr);
+       return;
 }
 
 /* This function will add and remove logical drives from the Logical
@@ -1897,7 +2122,8 @@ static void cciss_add_controller_node(ctlr_info_t *h)
  * INPUT
  * h           = The controller to perform the operations on
  */
-static int rebuild_lun_table(ctlr_info_t *h, int first_time)
+static int rebuild_lun_table(ctlr_info_t *h, int first_time,
+       int via_ioctl)
 {
        int ctlr = h->ctlr;
        int num_luns;
@@ -1907,7 +2133,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
        int i;
        int drv_found;
        int drv_index = 0;
-       __u32 lunid = 0;
+       unsigned char lunid[8] = CTLR_LUNID;
        unsigned long flags;
 
        if (!capable(CAP_SYS_RAWIO))
@@ -1960,13 +2186,13 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
                drv_found = 0;
 
                /* skip holes in the array from already deleted drives */
-               if (h->drv[i].raid_level == -1)
+               if (h->drv[i] == NULL)
                        continue;
 
                for (j = 0; j < num_luns; j++) {
-                       memcpy(&lunid, &ld_buff->LUN[j][0], 4);
-                       lunid = le32_to_cpu(lunid);
-                       if (h->drv[i].LunID == lunid) {
+                       memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid));
+                       if (memcmp(h->drv[i]->LunID, lunid,
+                               sizeof(lunid)) == 0) {
                                drv_found = 1;
                                break;
                        }
@@ -1974,11 +2200,11 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
                if (!drv_found) {
                        /* Deregister it from the OS, it's gone. */
                        spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-                       h->drv[i].busy_configuring = 1;
+                       h->drv[i]->busy_configuring = 1;
                        spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-                       return_code = deregister_disk(h, i, 1);
-                       cciss_destroy_ld_sysfs_entry(&h->drv[i]);
-                       h->drv[i].busy_configuring = 0;
+                       return_code = deregister_disk(h, i, 1, via_ioctl);
+                       if (h->drv[i] != NULL)
+                               h->drv[i]->busy_configuring = 0;
                }
        }
 
@@ -1992,17 +2218,16 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
 
                drv_found = 0;
 
-               memcpy(&lunid, &ld_buff->LUN[i][0], 4);
-               lunid = le32_to_cpu(lunid);
-
+               memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid));
                /* Find if the LUN is already in the drive array
                 * of the driver.  If so then update its info
                 * if not in use.  If it does not exist then find
                 * the first free index and add it.
                 */
                for (j = 0; j <= h->highest_lun; j++) {
-                       if (h->drv[j].raid_level != -1 &&
-                               h->drv[j].LunID == lunid) {
+                       if (h->drv[j] != NULL &&
+                               memcmp(h->drv[j]->LunID, lunid,
+                                       sizeof(h->drv[j]->LunID)) == 0) {
                                drv_index = j;
                                drv_found = 1;
                                break;
@@ -2015,7 +2240,8 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
                        if (drv_index == -1)
                                goto freeret;
                }
-               cciss_update_drive_info(ctlr, drv_index, first_time);
+               cciss_update_drive_info(ctlr, drv_index, first_time,
+                       via_ioctl);
        }               /* end for */
 
 freeret:
@@ -2032,6 +2258,25 @@ mem_msg:
        goto freeret;
 }
 
+static void cciss_clear_drive_info(drive_info_struct *drive_info)
+{
+       /* zero out the disk size info */
+       drive_info->nr_blocks = 0;
+       drive_info->block_size = 0;
+       drive_info->heads = 0;
+       drive_info->sectors = 0;
+       drive_info->cylinders = 0;
+       drive_info->raid_level = -1;
+       memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no));
+       memset(drive_info->model, 0, sizeof(drive_info->model));
+       memset(drive_info->rev, 0, sizeof(drive_info->rev));
+       memset(drive_info->vendor, 0, sizeof(drive_info->vendor));
+       /*
+        * don't clear the LUNID though, we need to remember which
+        * one this one is.
+        */
+}
+
 /* This function will deregister the disk and it's queue from the
  * kernel.  It must be called with the controller lock held and the
  * drv structures busy_configuring flag set.  It's parameters are:
@@ -2046,43 +2291,48 @@ mem_msg:
  *             the disk in preparation for re-adding it.  In this case
  *             the highest_lun should be left unchanged and the LunID
  *             should not be cleared.
+ * via_ioctl
+ *    This indicates whether we've reached this path via ioctl.
+ *    This affects the maximum usage count allowed for c0d0 to be messed with.
+ *    If this path is reached via ioctl(), then the max_usage_count will
+ *    be 1, as the process calling ioctl() has got to have the device open.
+ *    If we get here via sysfs, then the max usage count will be zero.
 */
 static int deregister_disk(ctlr_info_t *h, int drv_index,
-                          int clear_all)
+                          int clear_all, int via_ioctl)
 {
        int i;
        struct gendisk *disk;
        drive_info_struct *drv;
+       int recalculate_highest_lun;
 
        if (!capable(CAP_SYS_RAWIO))
                return -EPERM;
 
-       drv = &h->drv[drv_index];
+       drv = h->drv[drv_index];
        disk = h->gendisk[drv_index];
 
        /* make sure logical volume is NOT is use */
        if (clear_all || (h->gendisk[0] == disk)) {
-               if (drv->usage_count > 1)
+               if (drv->usage_count > via_ioctl)
                        return -EBUSY;
        } else if (drv->usage_count > 0)
                return -EBUSY;
 
+       recalculate_highest_lun = (drv == h->drv[h->highest_lun]);
+
        /* invalidate the devices and deregister the disk.  If it is disk
         * zero do not deregister it but just zero out it's values.  This
         * allows us to delete disk zero but keep the controller registered.
         */
        if (h->gendisk[0] != disk) {
                struct request_queue *q = disk->queue;
-               if (disk->flags & GENHD_FL_UP)
+               if (disk->flags & GENHD_FL_UP) {
+                       cciss_destroy_ld_sysfs_entry(h, drv_index, 0);
                        del_gendisk(disk);
-               if (q) {
-                       blk_cleanup_queue(q);
-                       /* Set drv->queue to NULL so that we do not try
-                        * to call blk_start_queue on this queue in the
-                        * interrupt handler
-                        */
-                       drv->queue = NULL;
                }
+               if (q)
+                       blk_cleanup_queue(q);
                /* If clear_all is set then we are deleting the logical
                 * drive, not just refreshing its info.  For drives
                 * other than disk 0 we will call put_disk.  We do not
@@ -2105,34 +2355,20 @@ static int deregister_disk(ctlr_info_t *h, int drv_index,
                }
        } else {
                set_capacity(disk, 0);
+               cciss_clear_drive_info(drv);
        }
 
        --h->num_luns;
-       /* zero out the disk size info */
-       drv->nr_blocks = 0;
-       drv->block_size = 0;
-       drv->heads = 0;
-       drv->sectors = 0;
-       drv->cylinders = 0;
-       drv->raid_level = -1;   /* This can be used as a flag variable to
-                                * indicate that this element of the drive
-                                * array is free.
-                                */
-
-       if (clear_all) {
-               /* check to see if it was the last disk */
-               if (drv == h->drv + h->highest_lun) {
-                       /* if so, find the new hightest lun */
-                       int i, newhighest = -1;
-                       for (i = 0; i <= h->highest_lun; i++) {
-                               /* if the disk has size > 0, it is available */
-                               if (h->drv[i].heads)
-                                       newhighest = i;
-                       }
-                       h->highest_lun = newhighest;
-               }
 
-               drv->LunID = 0;
+       /* if it was the last disk, find the new hightest lun */
+       if (clear_all && recalculate_highest_lun) {
+               int i, newhighest = -1;
+               for (i = 0; i <= h->highest_lun; i++) {
+                       /* if the disk has size > 0, it is available */
+                       if (h->drv[i] && h->drv[i]->heads)
+                               newhighest = i;
+               }
+               h->highest_lun = newhighest;
        }
        return 0;
 }
@@ -2479,8 +2715,6 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
        } else {                /* Get geometry failed */
                printk(KERN_WARNING "cciss: reading geometry failed\n");
        }
-       printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
-              drv->heads, drv->sectors, drv->cylinders);
 }
 
 static void
@@ -2514,9 +2748,6 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
                *total_size = 0;
                *block_size = BLOCK_SIZE;
        }
-       if (*total_size != 0)
-               printk(KERN_INFO "      blocks= %llu block_size= %d\n",
-               (unsigned long long)*total_size+1, *block_size);
        kfree(buf);
 }
 
@@ -2568,7 +2799,8 @@ static int cciss_revalidate(struct gendisk *disk)
        InquiryData_struct *inq_buff = NULL;
 
        for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
-               if (h->drv[logvol].LunID == drv->LunID) {
+               if (memcmp(h->drv[logvol]->LunID, drv->LunID,
+                       sizeof(drv->LunID)) == 0) {
                        FOUND = 1;
                        break;
                }
@@ -3053,8 +3285,7 @@ static void do_cciss_request(struct request_queue *q)
        /* The first 2 bits are reserved for controller error reporting. */
        c->Header.Tag.lower = (c->cmdindex << 3);
        c->Header.Tag.lower |= 0x04;    /* flag for direct lookup. */
-       c->Header.LUN.LogDev.VolId = drv->LunID;
-       c->Header.LUN.LogDev.Mode = 1;
+       memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
        c->Request.CDBLen = 10; // 12 byte commands not in FW yet;
        c->Request.Type.Type = TYPE_CMD;        // It is a command.
        c->Request.Type.Attribute = ATTR_SIMPLE;
@@ -3232,20 +3463,121 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+/**
+ * add_to_scan_list() - add controller to rescan queue
+ * @h:               Pointer to the controller.
+ *
+ * Adds the controller to the rescan queue if not already on the queue.
+ *
+ * returns 1 if added to the queue, 0 if skipped (could be on the
+ * queue already, or the controller could be initializing or shutting
+ * down).
+ **/
+static int add_to_scan_list(struct ctlr_info *h)
+{
+       struct ctlr_info *test_h;
+       int found = 0;
+       int ret = 0;
+
+       if (h->busy_initializing)
+               return 0;
+
+       if (!mutex_trylock(&h->busy_shutting_down))
+               return 0;
+
+       mutex_lock(&scan_mutex);
+       list_for_each_entry(test_h, &scan_q, scan_list) {
+               if (test_h == h) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (!found && !h->busy_scanning) {
+               INIT_COMPLETION(h->scan_wait);
+               list_add_tail(&h->scan_list, &scan_q);
+               ret = 1;
+       }
+       mutex_unlock(&scan_mutex);
+       mutex_unlock(&h->busy_shutting_down);
+
+       return ret;
+}
+
+/**
+ * remove_from_scan_list() - remove controller from rescan queue
+ * @h:                    Pointer to the controller.
+ *
+ * Removes the controller from the rescan queue if present. Blocks if
+ * the controller is currently conducting a rescan.
+ **/
+static void remove_from_scan_list(struct ctlr_info *h)
+{
+       struct ctlr_info *test_h, *tmp_h;
+       int scanning = 0;
+
+       mutex_lock(&scan_mutex);
+       list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) {
+               if (test_h == h) {
+                       list_del(&h->scan_list);
+                       complete_all(&h->scan_wait);
+                       mutex_unlock(&scan_mutex);
+                       return;
+               }
+       }
+       if (&h->busy_scanning)
+               scanning = 0;
+       mutex_unlock(&scan_mutex);
+
+       if (scanning)
+               wait_for_completion(&h->scan_wait);
+}
+
+/**
+ * scan_thread() - kernel thread used to rescan controllers
+ * @data:       Ignored.
+ *
+ * A kernel thread used scan for drive topology changes on
+ * controllers. The thread processes only one controller at a time
+ * using a queue.  Controllers are added to the queue using
+ * add_to_scan_list() and removed from the queue either after done
+ * processing or using remove_from_scan_list().
+ *
+ * returns 0.
+ **/
 static int scan_thread(void *data)
 {
-       ctlr_info_t *h = data;
-       int rc;
-       DECLARE_COMPLETION_ONSTACK(wait);
-       h->rescan_wait = &wait;
+       struct ctlr_info *h;
 
-       for (;;) {
-               rc = wait_for_completion_interruptible(&wait);
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
                if (kthread_should_stop())
                        break;
-               if (!rc)
-                       rebuild_lun_table(h, 0);
+
+               while (1) {
+                       mutex_lock(&scan_mutex);
+                       if (list_empty(&scan_q)) {
+                               mutex_unlock(&scan_mutex);
+                               break;
+                       }
+
+                       h = list_entry(scan_q.next,
+                                      struct ctlr_info,
+                                      scan_list);
+                       list_del(&h->scan_list);
+                       h->busy_scanning = 1;
+                       mutex_unlock(&scan_mutex);
+
+                       if (h) {
+                               rebuild_lun_table(h, 0, 0);
+                               complete_all(&h->scan_wait);
+                               mutex_lock(&scan_mutex);
+                               h->busy_scanning = 0;
+                               mutex_unlock(&scan_mutex);
+                       }
+               }
        }
+
        return 0;
 }
 
@@ -3268,8 +3600,8 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
        case REPORT_LUNS_CHANGED:
                printk(KERN_WARNING "cciss%d: report LUN data "
                        "changed\n", h->ctlr);
-               if (h->rescan_wait)
-                       complete(h->rescan_wait);
+               add_to_scan_list(h);
+               wake_up_process(cciss_scan_thread);
                return 1;
        break;
        case POWER_OR_RESET:
@@ -3489,7 +3821,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
                if (scratchpad == CCISS_FIRMWARE_READY)
                        break;
                set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(HZ / 10);      /* wait 100ms */
+               schedule_timeout(msecs_to_jiffies(100));        /* wait 100ms */
        }
        if (scratchpad != CCISS_FIRMWARE_READY) {
                printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
@@ -3615,7 +3947,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
                        break;
                /* delay and try again */
                set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(10);
+               schedule_timeout(msecs_to_jiffies(1));
        }
 
 #ifdef CCISS_DEBUG
@@ -3669,15 +4001,16 @@ Enomem:
        return -1;
 }
 
-static void free_hba(int i)
+static void free_hba(int n)
 {
-       ctlr_info_t *p = hba[i];
-       int n;
+       ctlr_info_t *h = hba[n];
+       int i;
 
-       hba[i] = NULL;
-       for (n = 0; n < CISS_MAX_LUN; n++)
-               put_disk(p->gendisk[n]);
-       kfree(p);
+       hba[n] = NULL;
+       for (i = 0; i < h->highest_lun + 1; i++)
+               if (h->gendisk[i] != NULL)
+                       put_disk(h->gendisk[i]);
+       kfree(h);
 }
 
 /* Send a message CDB to the firmware. */
@@ -3918,6 +4251,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        hba[i]->busy_initializing = 1;
        INIT_HLIST_HEAD(&hba[i]->cmpQ);
        INIT_HLIST_HEAD(&hba[i]->reqQ);
+       mutex_init(&hba[i]->busy_shutting_down);
 
        if (cciss_pci_init(hba[i], pdev) != 0)
                goto clean0;
@@ -3926,6 +4260,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        hba[i]->ctlr = i;
        hba[i]->pdev = pdev;
 
+       init_completion(&hba[i]->scan_wait);
+
        if (cciss_create_hba_sysfs_entry(hba[i]))
                goto clean0;
 
@@ -4001,8 +4337,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        hba[i]->num_luns = 0;
        hba[i]->highest_lun = -1;
        for (j = 0; j < CISS_MAX_LUN; j++) {
-               hba[i]->drv[j].raid_level = -1;
-               hba[i]->drv[j].queue = NULL;
+               hba[i]->drv[j] = NULL;
                hba[i]->gendisk[j] = NULL;
        }
 
@@ -4035,14 +4370,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 
        hba[i]->cciss_max_sectors = 2048;
 
+       rebuild_lun_table(hba[i], 1, 0);
        hba[i]->busy_initializing = 0;
-
-       rebuild_lun_table(hba[i], 1);
-       hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
-                               "cciss_scan%02d", i);
-       if (IS_ERR(hba[i]->cciss_scan_thread))
-               return PTR_ERR(hba[i]->cciss_scan_thread);
-
        return 1;
 
 clean4:
@@ -4063,12 +4392,7 @@ clean1:
        cciss_destroy_hba_sysfs_entry(hba[i]);
 clean0:
        hba[i]->busy_initializing = 0;
-       /* cleanup any queues that may have been initialized */
-       for (j=0; j <= hba[i]->highest_lun; j++){
-               drive_info_struct *drv = &(hba[i]->drv[j]);
-               if (drv->queue)
-                       blk_cleanup_queue(drv->queue);
-       }
+
        /*
         * Deliberately omit pci_disable_device(): it does something nasty to
         * Smart Array controllers that pci_enable_device does not undo
@@ -4125,8 +4449,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
                return;
        }
 
-       kthread_stop(hba[i]->cciss_scan_thread);
+       mutex_lock(&hba[i]->busy_shutting_down);
 
+       remove_from_scan_list(hba[i]);
        remove_proc_entry(hba[i]->devname, proc_cciss);
        unregister_blkdev(hba[i]->major, hba[i]->devname);
 
@@ -4136,8 +4461,10 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
                if (disk) {
                        struct request_queue *q = disk->queue;
 
-                       if (disk->flags & GENHD_FL_UP)
+                       if (disk->flags & GENHD_FL_UP) {
+                               cciss_destroy_ld_sysfs_entry(hba[i], j, 1);
                                del_gendisk(disk);
+                       }
                        if (q)
                                blk_cleanup_queue(q);
                }
@@ -4170,6 +4497,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
        pci_release_regions(pdev);
        pci_set_drvdata(pdev, NULL);
        cciss_destroy_hba_sysfs_entry(hba[i]);
+       mutex_unlock(&hba[i]->busy_shutting_down);
        free_hba(i);
 }
 
@@ -4202,15 +4530,25 @@ static int __init cciss_init(void)
        if (err)
                return err;
 
+       /* Start the scan thread */
+       cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan");
+       if (IS_ERR(cciss_scan_thread)) {
+               err = PTR_ERR(cciss_scan_thread);
+               goto err_bus_unregister;
+       }
+
        /* Register for our PCI devices */
        err = pci_register_driver(&cciss_pci_driver);
        if (err)
-               goto err_bus_register;
+               goto err_thread_stop;
 
-       return 0;
+       return err;
 
-err_bus_register:
+err_thread_stop:
+       kthread_stop(cciss_scan_thread);
+err_bus_unregister:
        bus_unregister(&cciss_bus_type);
+
        return err;
 }
 
@@ -4227,6 +4565,7 @@ static void __exit cciss_cleanup(void)
                        cciss_remove_one(hba[i]->pdev);
                }
        }
+       kthread_stop(cciss_scan_thread);
        remove_proc_entry("driver/cciss", NULL);
        bus_unregister(&cciss_bus_type);
 }
index 06a5db2..31524cf 100644 (file)
@@ -2,6 +2,7 @@
 #define CCISS_H
 
 #include <linux/genhd.h>
+#include <linux/mutex.h>
 
 #include "cciss_cmd.h"
 
@@ -29,7 +30,7 @@ struct access_method {
 };
 typedef struct _drive_info_struct
 {
-       __u32   LunID;  
+       unsigned char LunID[8];
        int     usage_count;
        struct request_queue *queue;
        sector_t nr_blocks;
@@ -51,6 +52,7 @@ typedef struct _drive_info_struct
        char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
        char model[MODEL_LEN + 1];   /* SCSI model string */
        char rev[REV_LEN + 1];       /* SCSI revision string */
+       char device_initialized;     /* indicates whether dev is initialized */
 } drive_info_struct;
 
 struct ctlr_info 
@@ -86,7 +88,7 @@ struct ctlr_info
        BYTE    cciss_read_capacity;
 
        // information about each logical volume
-       drive_info_struct drv[CISS_MAX_LUN];
+       drive_info_struct *drv[CISS_MAX_LUN];
 
        struct access_method access;
 
@@ -108,6 +110,8 @@ struct ctlr_info
        int                     nr_frees; 
        int                     busy_configuring;
        int                     busy_initializing;
+       int                     busy_scanning;
+       struct mutex            busy_shutting_down;
 
        /* This element holds the zero based queue number of the last
         * queue to be started.  It is used for fairness.
@@ -122,8 +126,8 @@ struct ctlr_info
        /* and saved for later processing */
 #endif
        unsigned char alive;
-       struct completion *rescan_wait;
-       struct task_struct *cciss_scan_thread;
+       struct list_head scan_list;
+       struct completion scan_wait;
        struct device dev;
 };
 
index b82d438..6422651 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/blkpg.h>
 #include <linux/timer.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
@@ -177,7 +178,6 @@ static int cpqarray_register_ctlr(int ctlr, struct pci_dev *pdev);
 
 #ifdef CONFIG_PROC_FS
 static void ida_procinit(int i);
-static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #else
 static void ida_procinit(int i) {}
 #endif
@@ -206,6 +206,7 @@ static const struct block_device_operations ida_fops  = {
 #ifdef CONFIG_PROC_FS
 
 static struct proc_dir_entry *proc_array;
+static const struct file_operations ida_proc_fops;
 
 /*
  * Get us a file in /proc/array that says something about each controller.
@@ -218,19 +219,16 @@ static void __init ida_procinit(int i)
                if (!proc_array) return;
        }
 
-       create_proc_read_entry(hba[i]->devname, 0, proc_array,
-                              ida_proc_get_info, hba[i]);
+       proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]);
 }
 
 /*
  * Report information about this controller.
  */
-static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
+static int ida_proc_show(struct seq_file *m, void *v)
 {
-       off_t pos = 0;
-       off_t len = 0;
-       int size, i, ctlr;
-       ctlr_info_t *h = (ctlr_info_t*)data;
+       int i, ctlr;
+       ctlr_info_t *h = (ctlr_info_t*)m->private;
        drv_info_t *drv;
 #ifdef CPQ_PROC_PRINT_QUEUES
        cmdlist_t *c;
@@ -238,7 +236,7 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt
 #endif
 
        ctlr = h->ctlr;
-       size = sprintf(buffer, "%s:  Compaq %s Controller\n"
+       seq_printf(m, "%s:  Compaq %s Controller\n"
                "       Board ID: 0x%08lx\n"
                "       Firmware Revision: %c%c%c%c\n"
                "       Controller Sig: 0x%08lx\n"
@@ -258,55 +256,54 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt
                h->log_drives, h->phys_drives,
                h->Qdepth, h->maxQsinceinit);
 
-       pos += size; len += size;
-       
-       size = sprintf(buffer+len, "Logical Drive Info:\n");
-       pos += size; len += size;
+       seq_puts(m, "Logical Drive Info:\n");
 
        for(i=0; i<h->log_drives; i++) {
                drv = &h->drv[i];
-               size = sprintf(buffer+len, "ida/c%dd%d: blksz=%d nr_blks=%d\n",
+               seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n",
                                ctlr, i, drv->blk_size, drv->nr_blks);
-               pos += size; len += size;
        }
 
 #ifdef CPQ_PROC_PRINT_QUEUES
        spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); 
-       size = sprintf(buffer+len, "\nCurrent Queues:\n");
-       pos += size; len += size;
+       seq_puts(m, "\nCurrent Queues:\n");
 
        c = h->reqQ;
-       size = sprintf(buffer+len, "reqQ = %p", c); pos += size; len += size;
+       seq_printf(m, "reqQ = %p", c);
        if (c) c=c->next;
        while(c && c != h->reqQ) {
-               size = sprintf(buffer+len, "->%p", c);
-               pos += size; len += size;
+               seq_printf(m, "->%p", c);
                c=c->next;
        }
 
        c = h->cmpQ;
-       size = sprintf(buffer+len, "\ncmpQ = %p", c); pos += size; len += size;
+       seq_printf(m, "\ncmpQ = %p", c);
        if (c) c=c->next;
        while(c && c != h->cmpQ) {
-               size = sprintf(buffer+len, "->%p", c);
-               pos += size; len += size;
+               seq_printf(m, "->%p", c);
                c=c->next;
        }
 
-       size = sprintf(buffer+len, "\n"); pos += size; len += size;
+       seq_putc(m, '\n');
        spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); 
 #endif
-       size = sprintf(buffer+len, "nr_allocs = %d\nnr_frees = %d\n",
+       seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n",
                        h->nr_allocs, h->nr_frees);
-       pos += size; len += size;
-
-       *eof = 1;
-       *start = buffer+offset;
-       len -= offset;
-       if (len>length)
-               len = length;
-       return len;
+       return 0;
+}
+
+static int ida_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, ida_proc_show, PDE(inode)->data);
 }
+
+static const struct file_operations ida_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ida_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 module_param_array(eisa, int, NULL, 0);
index 376f1ab..23e76fe 100644 (file)
@@ -130,7 +130,7 @@ struct mapped_device {
        /*
         * A list of ios that arrived while we were suspended.
         */
-       atomic_t pending[2];
+       atomic_t pending;
        wait_queue_head_t wait;
        struct work_struct work;
        struct bio_list deferred;
@@ -453,14 +453,13 @@ static void start_io_acct(struct dm_io *io)
 {
        struct mapped_device *md = io->md;
        int cpu;
-       int rw = bio_data_dir(io->bio);
 
        io->start_time = jiffies;
 
        cpu = part_stat_lock();
        part_round_stats(cpu, &dm_disk(md)->part0);
        part_stat_unlock();
-       dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+       dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -480,9 +479,8 @@ static void end_io_acct(struct dm_io *io)
         * After this is decremented the bio must not be touched if it is
         * a barrier.
         */
-       dm_disk(md)->part0.in_flight[rw] = pending =
-               atomic_dec_return(&md->pending[rw]);
-       pending += atomic_read(&md->pending[rw^0x1]);
+       dm_disk(md)->part0.in_flight = pending =
+               atomic_dec_return(&md->pending);
 
        /* nudge anyone waiting on suspend queue */
        if (!pending)
@@ -1787,8 +1785,7 @@ static struct mapped_device *alloc_dev(int minor)
        if (!md->disk)
                goto bad_disk;
 
-       atomic_set(&md->pending[0], 0);
-       atomic_set(&md->pending[1], 0);
+       atomic_set(&md->pending, 0);
        init_waitqueue_head(&md->wait);
        INIT_WORK(&md->work, dm_wq_work);
        init_waitqueue_head(&md->eventq);
@@ -2091,8 +2088,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
                                break;
                        }
                        spin_unlock_irqrestore(q->queue_lock, flags);
-               } else if (!atomic_read(&md->pending[0]) &&
-                                       !atomic_read(&md->pending[1]))
+               } else if (!atomic_read(&md->pending))
                        break;
 
                if (interruptible == TASK_INTERRUPTIBLE &&
index 0acbf4f..8ca17a3 100644 (file)
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
        spinlock_t queue_lock;
 };
 
-static int blktrans_discard_request(struct request_queue *q,
-                                   struct request *req)
-{
-       req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-       req->cmd[0] = REQ_LB_OP_DISCARD;
-       return 0;
-}
-
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
                               struct mtd_blktrans_dev *dev,
                               struct request *req)
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
        buf = req->buffer;
 
-       if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-           req->cmd[0] == REQ_LB_OP_DISCARD)
-               return tr->discard(dev, block, nsect);
-
        if (!blk_fs_request(req))
                return -EIO;
 
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
            get_capacity(req->rq_disk))
                return -EIO;
 
+       if (blk_discard_rq(req))
+               return tr->discard(dev, block, nsect);
+
        switch(rq_data_dir(req)) {
        case READ:
                for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
        tr->blkcore_priv->rq->queuedata = tr;
        blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
        if (tr->discard)
-               blk_queue_set_discard(tr->blkcore_priv->rq,
-                                     blktrans_discard_request);
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+                                       tr->blkcore_priv->rq);
 
        tr->blkshift = ffs(tr->blksize) - 1;
 
index ee16010..c24e4e0 100644 (file)
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
        struct dst_node *n = q->queuedata;
        int err = -EIO;
 
-       if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
+       if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
                /*
                 * This is a dirty^Wnice hack, but if we complete this
                 * operation with -EOPNOTSUPP like intended, XFS
index 7673800..402cb84 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -249,6 +249,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 
        mempool_free(p, bs->bio_pool);
 }
+EXPORT_SYMBOL(bio_free);
 
 void bio_init(struct bio *bio)
 {
@@ -257,6 +258,7 @@ void bio_init(struct bio *bio)
        bio->bi_comp_cpu = -1;
        atomic_set(&bio->bi_cnt, 1);
 }
+EXPORT_SYMBOL(bio_init);
 
 /**
  * bio_alloc_bioset - allocate a bio for I/O
@@ -311,6 +313,7 @@ err_free:
        mempool_free(p, bs->bio_pool);
        return NULL;
 }
+EXPORT_SYMBOL(bio_alloc_bioset);
 
 static void bio_fs_destructor(struct bio *bio)
 {
@@ -337,6 +340,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 
        return bio;
 }
+EXPORT_SYMBOL(bio_alloc);
 
 static void bio_kmalloc_destructor(struct bio *bio)
 {
@@ -380,6 +384,7 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
 
        return bio;
 }
+EXPORT_SYMBOL(bio_kmalloc);
 
 void zero_fill_bio(struct bio *bio)
 {
@@ -416,6 +421,7 @@ void bio_put(struct bio *bio)
                bio->bi_destructor(bio);
        }
 }
+EXPORT_SYMBOL(bio_put);
 
 inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
 {
@@ -424,6 +430,7 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
 
        return bio->bi_phys_segments;
 }
+EXPORT_SYMBOL(bio_phys_segments);
 
 /**
  *     __bio_clone     -       clone a bio
@@ -451,6 +458,7 @@ void __bio_clone(struct bio *bio, struct bio *bio_src)
        bio->bi_size = bio_src->bi_size;
        bio->bi_idx = bio_src->bi_idx;
 }
+EXPORT_SYMBOL(__bio_clone);
 
 /**
  *     bio_clone       -       clone a bio
@@ -482,6 +490,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 
        return b;
 }
+EXPORT_SYMBOL(bio_clone);
 
 /**
  *     bio_get_nr_vecs         - return approx number of vecs
@@ -505,6 +514,7 @@ int bio_get_nr_vecs(struct block_device *bdev)
 
        return nr_pages;
 }
+EXPORT_SYMBOL(bio_get_nr_vecs);
 
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                          *page, unsigned int len, unsigned int offset,
@@ -635,6 +645,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
        return __bio_add_page(q, bio, page, len, offset,
                              queue_max_hw_sectors(q));
 }
+EXPORT_SYMBOL(bio_add_pc_page);
 
 /**
  *     bio_add_page    -       attempt to add page to bio
@@ -655,6 +666,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
 }
+EXPORT_SYMBOL(bio_add_page);
 
 struct bio_map_data {
        struct bio_vec *iovecs;
@@ -776,6 +788,7 @@ int bio_uncopy_user(struct bio *bio)
        bio_put(bio);
        return ret;
 }
+EXPORT_SYMBOL(bio_uncopy_user);
 
 /**
  *     bio_copy_user_iov       -       copy user data to bio
@@ -920,6 +933,7 @@ struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
 
        return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
 }
+EXPORT_SYMBOL(bio_copy_user);
 
 static struct bio *__bio_map_user_iov(struct request_queue *q,
                                      struct block_device *bdev,
@@ -1050,6 +1064,7 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
 
        return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
 }
+EXPORT_SYMBOL(bio_map_user);
 
 /**
  *     bio_map_user_iov - map user sg_iovec table into bio
@@ -1117,13 +1132,13 @@ void bio_unmap_user(struct bio *bio)
        __bio_unmap_user(bio);
        bio_put(bio);
 }
+EXPORT_SYMBOL(bio_unmap_user);
 
 static void bio_map_kern_endio(struct bio *bio, int err)
 {
        bio_put(bio);
 }
 
-
 static struct bio *__bio_map_kern(struct request_queue *q, void *data,
                                  unsigned int len, gfp_t gfp_mask)
 {
@@ -1189,6 +1204,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
        bio_put(bio);
        return ERR_PTR(-EINVAL);
 }
+EXPORT_SYMBOL(bio_map_kern);
 
 static void bio_copy_kern_endio(struct bio *bio, int err)
 {
@@ -1250,6 +1266,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 
        return bio;
 }
+EXPORT_SYMBOL(bio_copy_kern);
 
 /*
  * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
@@ -1400,6 +1417,7 @@ void bio_endio(struct bio *bio, int error)
        if (bio->bi_end_io)
                bio->bi_end_io(bio, error);
 }
+EXPORT_SYMBOL(bio_endio);
 
 void bio_pair_release(struct bio_pair *bp)
 {
@@ -1410,6 +1428,7 @@ void bio_pair_release(struct bio_pair *bp)
                mempool_free(bp, bp->bio2.bi_private);
        }
 }
+EXPORT_SYMBOL(bio_pair_release);
 
 static void bio_pair_end_1(struct bio *bi, int err)
 {
@@ -1477,6 +1496,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 
        return bp;
 }
+EXPORT_SYMBOL(bio_split);
 
 /**
  *      bio_sector_offset - Find hardware sector offset in bio
@@ -1547,6 +1567,7 @@ void bioset_free(struct bio_set *bs)
 
        kfree(bs);
 }
+EXPORT_SYMBOL(bioset_free);
 
 /**
  * bioset_create  - Create a bio_set
@@ -1592,6 +1613,7 @@ bad:
        bioset_free(bs);
        return NULL;
 }
+EXPORT_SYMBOL(bioset_create);
 
 static void __init biovec_init_slabs(void)
 {
@@ -1636,29 +1658,4 @@ static int __init init_bio(void)
 
        return 0;
 }
-
 subsys_initcall(init_bio);
-
-EXPORT_SYMBOL(bio_alloc);
-EXPORT_SYMBOL(bio_kmalloc);
-EXPORT_SYMBOL(bio_put);
-EXPORT_SYMBOL(bio_free);
-EXPORT_SYMBOL(bio_endio);
-EXPORT_SYMBOL(bio_init);
-EXPORT_SYMBOL(__bio_clone);
-EXPORT_SYMBOL(bio_clone);
-EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_add_page);
-EXPORT_SYMBOL(bio_add_pc_page);
-EXPORT_SYMBOL(bio_get_nr_vecs);
-EXPORT_SYMBOL(bio_map_user);
-EXPORT_SYMBOL(bio_unmap_user);
-EXPORT_SYMBOL(bio_map_kern);
-EXPORT_SYMBOL(bio_copy_kern);
-EXPORT_SYMBOL(bio_pair_release);
-EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_copy_user);
-EXPORT_SYMBOL(bio_uncopy_user);
-EXPORT_SYMBOL(bioset_create);
-EXPORT_SYMBOL(bioset_free);
-EXPORT_SYMBOL(bio_alloc_bioset);
index 7b685e1..f38fee0 100644 (file)
@@ -248,19 +248,11 @@ ssize_t part_stat_show(struct device *dev,
                part_stat_read(p, merges[WRITE]),
                (unsigned long long)part_stat_read(p, sectors[WRITE]),
                jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-               part_in_flight(p),
+               p->in_flight,
                jiffies_to_msecs(part_stat_read(p, io_ticks)),
                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
-ssize_t part_inflight_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
-{
-       struct hd_struct *p = dev_to_part(dev);
-
-       return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
                       struct device_attribute *attr, char *buf)
@@ -289,7 +281,6 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -301,7 +292,6 @@ static struct attribute *part_attrs[] = {
        &dev_attr_size.attr,
        &dev_attr_alignment_offset.attr,
        &dev_attr_stat.attr,
-       &dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        &dev_attr_fail.attr,
 #endif
index e23a86c..2511904 100644 (file)
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
 enum {
        REQ_LB_OP_EJECT = 0x40,         /* eject request */
        REQ_LB_OP_FLUSH = 0x41,         /* flush request */
-       REQ_LB_OP_DISCARD = 0x42,       /* discard sectors */
 };
 
 /*
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -313,6 +311,7 @@ struct queue_limits {
        unsigned int            alignment_offset;
        unsigned int            io_min;
        unsigned int            io_opt;
+       unsigned int            max_discard_sectors;
 
        unsigned short          logical_block_size;
        unsigned short          max_hw_segments;
@@ -340,7 +339,6 @@ struct request_queue
        make_request_fn         *make_request_fn;
        prep_rq_fn              *prep_rq_fn;
        unplug_fn               *unplug_fn;
-       prepare_discard_fn      *prepare_discard_fn;
        merge_bvec_fn           *merge_bvec_fn;
        prepare_flush_fn        *prepare_flush_fn;
        softirq_done_fn         *softirq_done_fn;
@@ -460,6 +458,7 @@ struct request_queue
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
 #define QUEUE_FLAG_CQ         16       /* hardware does queuing */
+#define QUEUE_FLAG_DISCARD     17      /* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_CLUSTER) |            \
@@ -591,6 +590,7 @@ enum {
 #define blk_queue_flushing(q)  ((q)->ordseq)
 #define blk_queue_stackable(q) \
        test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
+#define blk_queue_discard(q)   test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
 #define blk_fs_request(rq)     ((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)     ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -929,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+               unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -955,7 +957,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
@@ -1080,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
        return q->limits.physical_block_size;
 }
 
+static inline int bdev_physical_block_size(struct block_device *bdev)
+{
+       return queue_physical_block_size(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
        return q->limits.io_min;
 }
 
+static inline int bdev_io_min(struct block_device *bdev)
+{
+       return queue_io_min(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
        return q->limits.io_opt;
 }
 
+static inline int bdev_io_opt(struct block_device *bdev)
+{
+       return queue_io_opt(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(struct request_queue *q)
 {
-       if (q && q->limits.misaligned)
+       if (q->limits.misaligned)
                return -1;
 
-       if (q && q->limits.alignment_offset)
-               return q->limits.alignment_offset;
-
-       return 0;
+       return q->limits.alignment_offset;
 }
 
 static inline int queue_sector_alignment_offset(struct request_queue *q,
@@ -1108,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q,
                & (q->limits.io_min - 1);
 }
 
+static inline int bdev_alignment_offset(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (q->limits.misaligned)
+               return -1;
+
+       if (bdev != bdev->bd_contains)
+               return bdev->bd_part->alignment_offset;
+
+       return q->limits.alignment_offset;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
        return q ? q->dma_alignment : 511;
@@ -1146,7 +1172,11 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
+struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q,
+                                       struct delayed_work *work,
+                                       unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
        MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
index 7e4350e..3b73b99 100644 (file)
@@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
                           char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern void blk_trace_remove_sysfs(struct device *dev);
 extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
@@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)                 (-ENOTTY)
 # define blk_trace_remove(q)                           (-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)                        do { } while (0)
+# define blk_trace_remove_sysfs(dev)                   do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
        return 0;
index a1e6899..2620a8c 100644 (file)
@@ -300,6 +300,10 @@ struct inodes_stat_t {
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
 
 #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
 #define FIBMAP    _IO(0x00,1)  /* bmap access */
index 297df45..7beaa21 100644 (file)
@@ -98,7 +98,7 @@ struct hd_struct {
        int make_it_fail;
 #endif
        unsigned long stamp;
-       int in_flight[2];
+       int in_flight;
 #ifdef CONFIG_SMP
        struct disk_stats *dkstats;
 #else
@@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)                     \
        part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
+static inline void part_inc_in_flight(struct hd_struct *part)
 {
-       part->in_flight[rw]++;
+       part->in_flight++;
        if (part->partno)
-               part_to_disk(part)->part0.in_flight[rw]++;
+               part_to_disk(part)->part0.in_flight++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
+static inline void part_dec_in_flight(struct hd_struct *part)
 {
-       part->in_flight[rw]--;
+       part->in_flight--;
        if (part->partno)
-               part_to_disk(part)->part0.in_flight[rw]--;
-}
-
-static inline int part_in_flight(struct hd_struct *part)
-{
-       return part->in_flight[0] + part->in_flight[1];
+               part_to_disk(part)->part0.in_flight--;
 }
 
 /* block/blk-core.c */
@@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
-                             struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
index d86af94..00405b5 100644 (file)
@@ -488,6 +488,39 @@ TRACE_EVENT(block_remap,
                  (unsigned long long)__entry->old_sector)
 );
 
+TRACE_EVENT(block_rq_remap,
+
+       TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, rq, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = disk_devt(rq->rq_disk);
+               __entry->sector         = blk_rq_pos(rq);
+               __entry->nr_sector      = blk_rq_sectors(rq);
+               __entry->old_dev        = dev;
+               __entry->old_sector     = from;
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+);
+
 #endif /* _TRACE_BLOCK_H */
 
 /* This part must be outside protection */
index 3eb159c..d9d6206 100644 (file)
@@ -856,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 }
 
 /**
+ * blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @q:         queue the io is for
+ * @rq:                the source request
+ * @dev:       target device
+ * @from:      source sector
+ *
+ * Description:
+ *     Device mapper remaps request to other devices.
+ *     Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_rq_remap(struct request_queue *q,
+                                  struct request *rq, dev_t dev,
+                                  sector_t from)
+{
+       struct blk_trace *bt = q->blk_trace;
+       struct blk_io_trace_remap r;
+
+       if (likely(!bt))
+               return;
+
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+       r.sector_from = cpu_to_be64(from);
+
+       __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+                       rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+                       sizeof(r), &r);
+}
+
+/**
  * blk_add_driver_data - Add binary message with driver-specific data
  * @q:         queue the io is for
  * @rq:                io request
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
        WARN_ON(ret);
        ret = register_trace_block_remap(blk_add_trace_remap);
        WARN_ON(ret);
+       ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
+       WARN_ON(ret);
 }
 
 static void blk_unregister_tracepoints(void)
 {
+       unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
        unregister_trace_block_remap(blk_add_trace_remap);
        unregister_trace_block_split(blk_add_trace_split);
        unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
@@ -1657,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev)
        return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+void blk_trace_remove_sysfs(struct device *dev)
+{
+       sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
index 4de7f02..a1bc6b9 100644 (file)
@@ -1974,12 +1974,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                goto bad_swap;
        }
 
-       if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
-               p->flags |= SWP_SOLIDSTATE;
-               p->cluster_next = 1 + (random32() % p->highest_bit);
+       if (p->bdev) {
+               if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+                       p->flags |= SWP_SOLIDSTATE;
+                       p->cluster_next = 1 + (random32() % p->highest_bit);
+               }
+               if (discard_swap(p) == 0)
+                       p->flags |= SWP_DISCARDABLE;
        }
-       if (discard_swap(p) == 0)
-               p->flags |= SWP_DISCARDABLE;
 
        mutex_lock(&swapon_mutex);
        spin_lock(&swap_lock);