#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/kobj_map.h>
-#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/log2.h>
struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
-#define MAX_EXT_DEVT (1 << MINORBITS)
+#define NR_EXT_DEVT (1 << MINORBITS)
/* For extended devt allocation. ext_devt_mutex prevents look up
* results from going away underneath its user.
static struct device_type disk_type;
+static void disk_check_events(struct disk_events *ev,
+ unsigned int *clearing_ptr);
+static void disk_alloc_events(struct gendisk *disk);
static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
do {
if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
return -ENOMEM;
+ mutex_lock(&ext_devt_mutex);
rc = idr_get_new(&ext_devt_idr, part, &idx);
+ if (!rc && idx >= NR_EXT_DEVT) {
+ idr_remove(&ext_devt_idr, idx);
+ rc = -EBUSY;
+ }
+ mutex_unlock(&ext_devt_mutex);
} while (rc == -EAGAIN);
if (rc)
return rc;
- if (idx > MAX_EXT_DEVT) {
- idr_remove(&ext_devt_idr, idx);
- return -EBUSY;
- }
-
*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
return 0;
}
return 0;
}
-void register_disk(struct gendisk *disk)
+static void register_disk(struct gendisk *disk)
{
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
ddev->parent = disk->driverfs_dev;
- dev_set_name(ddev, disk->disk_name);
+ dev_set_name(ddev, "%s", disk->disk_name);
/* delay uevents, until we scanned partition table */
dev_set_uevent_suppress(ddev, 1);
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
- if (!disk_partitionable(disk))
+ if (!disk_part_scan_enabled(disk))
goto exit;
/* No such device (e.g., media were just removed) */
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
- /* Register BDI before referencing it from bdev */
+ disk_alloc_events(disk);
+
+ /* Register BDI before referencing it from bdev */
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
register_disk(disk);
blk_register_queue(disk);
+ /*
+ * Take an extra ref on queue which will be put on disk_release()
+ * so that it sticks around as long as @disk is there.
+ */
+ WARN_ON_ONCE(!blk_get_queue(disk->queue));
+
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
disk_part_iter_exit(&piter);
invalidate_partition(disk, 0);
- blk_free_devt(disk_to_dev(disk)->devt);
set_capacity(disk, 0);
disk->flags &= ~GENHD_FL_UP;
if (!sysfs_deprecated)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
device_del(disk_to_dev(disk));
+ blk_free_devt(disk_to_dev(disk)->devt);
}
EXPORT_SYMBOL(del_gendisk);
struct hd_struct *part;
char name_buf[BDEVNAME_SIZE];
char devt_buf[BDEVT_SIZE];
- u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
+ char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
/*
* Don't show empty devices or things that have been
- * surpressed
+ * suppressed
*/
if (get_capacity(disk) == 0 ||
(disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
while ((part = disk_part_iter_next(&piter))) {
bool is_part0 = part == &disk->part0;
- uuid[0] = 0;
+ uuid_buf[0] = '\0';
if (part->info)
- part_unpack_uuid(part->info->uuid, uuid);
+ snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
+ part->info->uuid);
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
(unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf), uuid);
+ disk_name(disk, part->partno, name_buf),
+ uuid_buf);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
disk->driverfs_dev->driver != NULL)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+ if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
NULL
};
-static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
-{
- struct disk_part_tbl *ptbl =
- container_of(head, struct disk_part_tbl, rcu_head);
-
- kfree(ptbl);
-}
-
/**
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
* @disk: disk to replace part_tbl for
if (old_ptbl) {
rcu_assign_pointer(old_ptbl->last_lookup, NULL);
- call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+ kfree_rcu(old_ptbl, rcu_head);
}
}
disk_replace_part_tbl(disk, NULL);
free_part_stats(&disk->part0);
free_part_info(&disk->part0);
+ if (disk->queue)
+ blk_put_queue(disk->queue);
kfree(disk);
}
+
+static int disk_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+ int cnt = 0;
+
+ disk_part_iter_init(&piter, disk, 0);
+ while((part = disk_part_iter_next(&piter)))
+ cnt++;
+ disk_part_iter_exit(&piter);
+ add_uevent_var(env, "NPARTS=%u", cnt);
+ return 0;
+}
+
struct class block_class = {
.name = "block",
};
-static char *block_devnode(struct device *dev, mode_t *mode)
+static char *block_devnode(struct device *dev, umode_t *mode)
{
struct gendisk *disk = dev_to_disk(dev);
.groups = disk_attr_groups,
.release = disk_release,
.devnode = block_devnode,
+ .uevent = disk_uevent,
};
#ifdef CONFIG_PROC_FS
"wsect wuse running use aveq"
"\n\n");
*/
-
+
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
part_round_stats(cpu, hd);
part_stat_unlock();
- seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
- "%u %lu %lu %llu %u %u %u %u\n",
+ seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+ "%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
- part_stat_read(hd, ios[0]),
- part_stat_read(hd, merges[0]),
- (unsigned long long)part_stat_read(hd, sectors[0]),
- jiffies_to_msecs(part_stat_read(hd, ticks[0])),
- part_stat_read(hd, ios[1]),
- part_stat_read(hd, merges[1]),
- (unsigned long long)part_stat_read(hd, sectors[1]),
- jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+ part_stat_read(hd, ios[READ]),
+ part_stat_read(hd, merges[READ]),
+ part_stat_read(hd, sectors[READ]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
+ part_stat_read(hd, ios[WRITE]),
+ part_stat_read(hd, merges[WRITE]),
+ part_stat_read(hd, sectors[WRITE]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
part_in_flight(hd),
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
);
}
disk_part_iter_exit(&piter);
-
+
return 0;
}
struct block_device *bdev = bdget_disk(disk, partno);
if (bdev) {
fsync_bdev(bdev);
- res = __invalidate_device(bdev);
+ res = __invalidate_device(bdev, true);
bdput(bdev);
}
return res;
struct gendisk *disk; /* the associated disk */
spinlock_t lock;
+ struct mutex block_mutex; /* protects blocking */
int block; /* event blocking depth */
unsigned int pending; /* events already sent out */
unsigned int clearing; /* events being cleared */
return msecs_to_jiffies(intv_msecs);
}
-static void __disk_block_events(struct gendisk *disk, bool sync)
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events(). Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
{
struct disk_events *ev = disk->ev;
unsigned long flags;
bool cancel;
+ if (!ev)
+ return;
+
+ /*
+ * Outer mutex ensures that the first blocker completes canceling
+ * the event work before further blockers are allowed to finish.
+ */
+ mutex_lock(&ev->block_mutex);
+
spin_lock_irqsave(&ev->lock, flags);
cancel = !ev->block++;
spin_unlock_irqrestore(&ev->lock, flags);
- if (cancel) {
- if (sync)
- cancel_delayed_work_sync(&disk->ev->dwork);
- else
- cancel_delayed_work(&disk->ev->dwork);
- }
+ if (cancel)
+ cancel_delayed_work_sync(&disk->ev->dwork);
+
+ mutex_unlock(&ev->block_mutex);
}
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
else if (intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
-/**
- * disk_block_events - block and flush disk event checking
- * @disk: disk to block events for
- *
- * On return from this function, it is guaranteed that event checking
- * isn't in progress and won't happen until unblocked by
- * disk_unblock_events(). Events blocking is counted and the actual
- * unblocking happens after the matching number of unblocks are done.
- *
- * Note that this intentionally does not block event checking from
- * disk_clear_events().
- *
- * CONTEXT:
- * Might sleep.
- */
-void disk_block_events(struct gendisk *disk)
-{
- if (disk->ev)
- __disk_block_events(disk, true);
-}
-
/**
* disk_unblock_events - unblock disk event checking
* @disk: disk to unblock events for
void disk_unblock_events(struct gendisk *disk)
{
if (disk->ev)
- __disk_unblock_events(disk, true);
+ __disk_unblock_events(disk, false);
}
/**
- * disk_check_events - schedule immediate event checking
- * @disk: disk to check events for
+ * disk_flush_events - schedule immediate event checking and flushing
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
*
- * Schedule immediate event checking on @disk if not blocked.
+ * Schedule immediate event checking on @disk if not blocked. Events in
+ * @mask are scheduled to be cleared from the driver. Note that this
+ * doesn't clear the events from @disk->ev.
*
* CONTEXT:
- * Don't care. Safe to call from irq context.
+ * If @mask is non-zero must be called with bdev->bd_mutex held.
*/
-void disk_check_events(struct gendisk *disk)
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
- if (disk->ev) {
- __disk_block_events(disk, false);
- __disk_unblock_events(disk, true);
+ struct disk_events *ev = disk->ev;
+
+ if (!ev)
+ return;
+
+ spin_lock_irq(&ev->lock);
+ ev->clearing |= mask;
+ if (!ev->block) {
+ cancel_delayed_work(&ev->dwork);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
}
+ spin_unlock_irq(&ev->lock);
}
-EXPORT_SYMBOL_GPL(disk_check_events);
/**
* disk_clear_events - synchronously check, clear and return pending events
const struct block_device_operations *bdops = disk->fops;
struct disk_events *ev = disk->ev;
unsigned int pending;
+ unsigned int clearing = mask;
if (!ev) {
/* for drivers still using the old ->media_changed method */
return 0;
}
- /* tell the workfn about the events being cleared */
+ disk_block_events(disk);
+
+ /*
+ * store the union of mask and ev->clearing on the stack so that the
+ * race with disk_flush_events does not cause ambiguity (ev->clearing
+ * can still be modified even if events are blocked).
+ */
spin_lock_irq(&ev->lock);
- ev->clearing |= mask;
+ clearing |= ev->clearing;
+ ev->clearing = 0;
spin_unlock_irq(&ev->lock);
- /* uncondtionally schedule event check and wait for it to finish */
- __disk_block_events(disk, true);
- queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
- flush_delayed_work(&ev->dwork);
- __disk_unblock_events(disk, false);
+ disk_check_events(ev, &clearing);
+ /*
+ * if ev->clearing is not 0, the disk_flush_events got called in the
+ * middle of this function, so we want to run the workfn without delay.
+ */
+ __disk_unblock_events(disk, ev->clearing ? true : false);
/* then, fetch and clear pending events */
spin_lock_irq(&ev->lock);
- WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
pending = ev->pending & mask;
ev->pending &= ~mask;
spin_unlock_irq(&ev->lock);
+ WARN_ON_ONCE(clearing & mask);
return pending;
}
+/*
+ * Separate this part out so that a different pointer for clearing_ptr can be
+ * passed in for disk_clear_events.
+ */
static void disk_events_workfn(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
+
+ disk_check_events(ev, &ev->clearing);
+}
+
+static void disk_check_events(struct disk_events *ev,
+ unsigned int *clearing_ptr)
+{
struct gendisk *disk = ev->disk;
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
- unsigned int clearing = ev->clearing;
+ unsigned int clearing = *clearing_ptr;
unsigned int events;
unsigned long intv;
int nr_events = 0, i;
events &= ~ev->pending;
ev->pending |= events;
- ev->clearing &= ~clearing;
+ *clearing_ptr &= ~clearing;
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
- queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+ queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
spin_unlock_irq(&ev->lock);
- /* tell userland about new events */
+ /*
+ * Tell userland about new events. Only the events listed in
+ * @disk->events are reported. Unlisted events are processed the
+ * same internally but never get reported to userland.
+ */
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if (events & (1 << i))
+ if (events & disk->events & (1 << i))
envp[nr_events++] = disk_uevents[i];
if (nr_events)
if (intv < 0 && intv != -1)
return -EINVAL;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
disk->ev->poll_msecs = intv;
__disk_unblock_events(disk, true);
mutex_lock(&disk_events_mutex);
list_for_each_entry(ev, &disk_events, node)
- disk_check_events(ev->disk);
+ disk_flush_events(ev->disk, 0);
mutex_unlock(&disk_events_mutex);
&disk_events_dfl_poll_msecs, 0644);
/*
- * disk_{add|del|release}_events - initialize and destroy disk_events.
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
*/
-static void disk_add_events(struct gendisk *disk)
+static void disk_alloc_events(struct gendisk *disk)
{
struct disk_events *ev;
- if (!disk->fops->check_events || !(disk->events | disk->async_events))
+ if (!disk->fops->check_events)
return;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
return;
}
- if (sysfs_create_files(&disk_to_dev(disk)->kobj,
- disk_events_attrs) < 0) {
- pr_warn("%s: failed to create sysfs files for events\n",
- disk->disk_name);
- kfree(ev);
- return;
- }
-
- disk->ev = ev;
-
INIT_LIST_HEAD(&ev->node);
ev->disk = disk;
spin_lock_init(&ev->lock);
+ mutex_init(&ev->block_mutex);
ev->block = 1;
ev->poll_msecs = -1;
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+ disk->ev = ev;
+}
+
+static void disk_add_events(struct gendisk *disk)
+{
+ if (!disk->ev)
+ return;
+
+ /* FIXME: error handling */
+ if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+ pr_warn("%s: failed to create sysfs files for events\n",
+ disk->disk_name);
+
mutex_lock(&disk_events_mutex);
- list_add_tail(&ev->node, &disk_events);
+ list_add_tail(&disk->ev->node, &disk_events);
mutex_unlock(&disk_events_mutex);
/*
if (!disk->ev)
return;
- __disk_block_events(disk, true);
+ disk_block_events(disk);
mutex_lock(&disk_events_mutex);
list_del_init(&disk->ev->node);