#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/kobj_map.h>
-#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/log2.h>
static struct device_type disk_type;
+static void disk_alloc_events(struct gendisk *disk);
static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part->nr_sects &&
+ if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects;
+ sector < part->start_sect + part_nr_sects_read(part);
}
/**
return 0;
}
-void register_disk(struct gendisk *disk)
+static void register_disk(struct gendisk *disk)
{
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
- if (!disk_partitionable(disk))
+ if (!disk_part_scan_enabled(disk))
goto exit;
/* No such device (e.g., media were just removed) */
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
- /* Register BDI before referencing it from bdev */
+ disk_alloc_events(disk);
+
+ /* Register BDI before referencing it from bdev */
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
register_disk(disk);
blk_register_queue(disk);
+ /*
+ * Take an extra ref on queue which will be put on disk_release()
+ * so that it sticks around as long as @disk is there.
+ */
+ WARN_ON_ONCE(!blk_get_queue(disk->queue));
+
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
struct hd_struct *part;
char name_buf[BDEVNAME_SIZE];
char devt_buf[BDEVT_SIZE];
- u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
+ char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
/*
* Don't show empty devices or things that have been
- * surpressed
+ * suppressed
*/
if (get_capacity(disk) == 0 ||
(disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
while ((part = disk_part_iter_next(&piter))) {
bool is_part0 = part == &disk->part0;
- uuid[0] = 0;
+ uuid_buf[0] = '\0';
if (part->info)
- part_unpack_uuid(part->info->uuid, uuid);
+ snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
+ part->info->uuid);
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf), uuid);
+ (unsigned long long)part_nr_sects_read(part) >> 1
+ , disk_name(disk, part->partno, name_buf),
+ uuid_buf);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
disk->driverfs_dev->driver != NULL)
static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
{
- static void *p;
+ void *p;
p = disk_seqf_start(seqf, pos);
if (!IS_ERR_OR_NULL(p) && !*pos)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+ if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s\n",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part->nr_sects >> 1,
+ (unsigned long long)part_nr_sects_read(part) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);
NULL
};
-static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
-{
- struct disk_part_tbl *ptbl =
- container_of(head, struct disk_part_tbl, rcu_head);
-
- kfree(ptbl);
-}
-
/**
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
* @disk: disk to replace part_tbl for
if (old_ptbl) {
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
- call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+ kfree_rcu(old_ptbl, rcu_head);
}
}
disk_replace_part_tbl(disk, NULL);
free_part_stats(&disk->part0);
free_part_info(&disk->part0);
+ if (disk->queue)
+ blk_put_queue(disk->queue);
kfree(disk);
}
struct class block_class = {
.name = "block",
};
-static char *block_devnode(struct device *dev, mode_t *mode)
+static char *block_devnode(struct device *dev, umode_t *mode)
{
struct gendisk *disk = dev_to_disk(dev);
"wsect wuse running use aveq"
"\n\n");
*/
-
+
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
part_round_stats(cpu, hd);
part_stat_unlock();
- seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
- "%u %lu %lu %llu %u %u %u %u\n",
+ seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+ "%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[0]),
- part_stat_read(hd, merges[0]),
- (unsigned long long)part_stat_read(hd, sectors[0]),
- jiffies_to_msecs(part_stat_read(hd, ticks[0])),
- part_stat_read(hd, ios[1]),
- part_stat_read(hd, merges[1]),
- (unsigned long long)part_stat_read(hd, sectors[1]),
- jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+ part_stat_read(hd, ios[READ]),
+ part_stat_read(hd, merges[READ]),
+ part_stat_read(hd, sectors[READ]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
+ part_stat_read(hd, ios[WRITE]),
+ part_stat_read(hd, merges[WRITE]),
+ part_stat_read(hd, sectors[WRITE]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
part_in_flight(hd),
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
);
}
disk_part_iter_exit(&piter);
-
+
return 0;
}
}
disk->part_tbl->part[0] = &disk->part0;
+ /*
+ * set_capacity() and get_capacity() currently don't use
+ * seqcounter to read/update the part0->nr_sects. Still init
+ * the counter as we can read the sectors in IO submission
+ * patch using seqence counters.
+ *
+ * TODO: Ideally set_capacity() and get_capacity() should be
+ * converted to make use of bd_mutex and sequence counters.
+ */
+ seqcount_init(&disk->part0.nr_sects_seq);
+ hd_ref_init(&disk->part0);
+
disk->minors = minors;
rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class;
struct block_device *bdev = bdget_disk(disk, partno);
if (bdev) {
fsync_bdev(bdev);
- res = __invalidate_device(bdev);
+ res = __invalidate_device(bdev, true);
bdput(bdev);
}
return res;
struct gendisk *disk; /* the associated disk */
spinlock_t lock;
+ struct mutex block_mutex; /* protects blocking */
int block; /* event blocking depth */
unsigned int pending; /* events already sent out */
unsigned int clearing; /* events being cleared */
return msecs_to_jiffies(intv_msecs);
}
-static void __disk_block_events(struct gendisk *disk, bool sync)
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events(). Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
{
struct disk_events *ev = disk->ev;
unsigned long flags;
bool cancel;
+ if (!ev)
+ return;
+
+ /*
+ * Outer mutex ensures that the first blocker completes canceling
+ * the event work before further blockers are allowed to finish.
+ */
+ mutex_lock(&ev->block_mutex);
+
spin_lock_irqsave(&ev->lock, flags);
cancel = !ev->block++;
spin_unlock_irqrestore(&ev->lock, flags);
- if (cancel) {
- if (sync)
- cancel_delayed_work_sync(&disk->ev->dwork);
- else
- cancel_delayed_work(&disk->ev->dwork);
- }
+ if (cancel)
+ cancel_delayed_work_sync(&disk->ev->dwork);
+
+ mutex_unlock(&ev->block_mutex);
}
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
else if (intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
-/**
- * disk_block_events - block and flush disk event checking
- * @disk: disk to block events for
- *
- * On return from this function, it is guaranteed that event checking
- * isn't in progress and won't happen until unblocked by
- * disk_unblock_events(). Events blocking is counted and the actual
- * unblocking happens after the matching number of unblocks are done.
- *
- * Note that this intentionally does not block event checking from
- * disk_clear_events().
- *
- * CONTEXT:
- * Might sleep.
- */
-void disk_block_events(struct gendisk *disk)
-{
- if (disk->ev)
- __disk_block_events(disk, true);
-}
-
/**
* disk_unblock_events - unblock disk event checking
* @disk: disk to unblock events for
void disk_unblock_events(struct gendisk *disk)
{
if (disk->ev)
- __disk_unblock_events(disk, true);
+ __disk_unblock_events(disk, false);
}
/**
- * disk_check_events - schedule immediate event checking
- * @disk: disk to check events for
+ * disk_flush_events - schedule immediate event checking and flushing
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
*
- * Schedule immediate event checking on @disk if not blocked.
+ * Schedule immediate event checking on @disk if not blocked. Events in
+ * @mask are scheduled to be cleared from the driver. Note that this
+ * doesn't clear the events from @disk->ev.
*
* CONTEXT:
- * Don't care. Safe to call from irq context.
+ * If @mask is non-zero must be called with bdev->bd_mutex held.
*/
-void disk_check_events(struct gendisk *disk)
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
- if (disk->ev) {
- __disk_block_events(disk, false);
- __disk_unblock_events(disk, true);
- }
+ struct disk_events *ev = disk->ev;
+
+ if (!ev)
+ return;
+
+ spin_lock_irq(&ev->lock);
+ ev->clearing |= mask;
+ if (!ev->block)
+ mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
+ spin_unlock_irq(&ev->lock);
}
-EXPORT_SYMBOL_GPL(disk_check_events);
/**
* disk_clear_events - synchronously check, clear and return pending events
spin_unlock_irq(&ev->lock);
/* uncondtionally schedule event check and wait for it to finish */
- __disk_block_events(disk, true);
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ disk_block_events(disk);
+ queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
flush_delayed_work(&ev->dwork);
__disk_unblock_events(disk, false);
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
spin_unlock_irq(&ev->lock);
- /* tell userland about new events */
+ /*
+ * Tell userland about new events. Only the events listed in
+ * @disk->events are reported. Unlisted events are processed the
+ * same internally but never get reported to userland.
+ */
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if (events & (1 << i))
+ if (events & disk->events & (1 << i))
envp[nr_events++] = disk_uevents[i];
if (nr_events)
if (intv < 0 && intv != -1)
return -EINVAL;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
disk->ev->poll_msecs = intv;
__disk_unblock_events(disk, true);
mutex_lock(&disk_events_mutex);
list_for_each_entry(ev, &disk_events, node)
- disk_check_events(ev->disk);
+ disk_flush_events(ev->disk, 0);
mutex_unlock(&disk_events_mutex);
&disk_events_dfl_poll_msecs, 0644);
/*
- * disk_{add|del|release}_events - initialize and destroy disk_events.
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
*/
-static void disk_add_events(struct gendisk *disk)
+static void disk_alloc_events(struct gendisk *disk)
{
struct disk_events *ev;
- if (!disk->fops->check_events || !(disk->events | disk->async_events))
+ if (!disk->fops->check_events)
return;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
return;
}
- if (sysfs_create_files(&disk_to_dev(disk)->kobj,
- disk_events_attrs) < 0) {
- pr_warn("%s: failed to create sysfs files for events\n",
- disk->disk_name);
- kfree(ev);
- return;
- }
-
- disk->ev = ev;
-
INIT_LIST_HEAD(&ev->node);
ev->disk = disk;
spin_lock_init(&ev->lock);
+ mutex_init(&ev->block_mutex);
ev->block = 1;
ev->poll_msecs = -1;
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+ disk->ev = ev;
+}
+
+static void disk_add_events(struct gendisk *disk)
+{
+ if (!disk->ev)
+ return;
+
+ /* FIXME: error handling */
+ if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+ pr_warn("%s: failed to create sysfs files for events\n",
+ disk->disk_name);
+
mutex_lock(&disk_events_mutex);
- list_add_tail(&ev->node, &disk_events);
+ list_add_tail(&disk->ev->node, &disk_events);
mutex_unlock(&disk_events_mutex);
/*
if (!disk->ev)
return;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
mutex_lock(&disk_events_mutex);
list_del_init(&disk->ev->node);