Merge tag 'xfs-5.8-merge-9' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[linux-block.git] / block / genhd.c
index 9c2e13ce0d19554a01eb72c03d5c0fcba7a67a5a..1a765932766441e277c4667bb619bfbe8fc52776 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/ctype.h>
 #include <linux/fs.h>
 #include <linux/genhd.h>
 #include <linux/kdev_t.h>
@@ -26,7 +27,7 @@
 #include "blk.h"
 
 static DEFINE_MUTEX(block_class_lock);
-struct kobject *block_depr;
+static struct kobject *block_depr;
 
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT            (1 << MINORBITS)
@@ -46,36 +47,77 @@ static void disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
 
-void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
+/*
+ * Set disk capacity and notify if the size is not currently
+ * zero and will not be set to zero
+ */
+void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
+                                       bool revalidate)
 {
-       if (queue_is_mq(q))
-               return;
+       sector_t capacity = get_capacity(disk);
+
+       set_capacity(disk, size);
+
+       if (revalidate)
+               revalidate_disk(disk);
 
-       part_stat_local_inc(part, in_flight[rw]);
-       if (part->partno)
-               part_stat_local_inc(&part_to_disk(part)->part0, in_flight[rw]);
+       if (capacity != size && capacity != 0 && size != 0) {
+               char *envp[] = { "RESIZE=1", NULL };
+
+               kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+       }
 }
 
-void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
+EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify);
+
+/*
+ * Format the device name of the indicated disk into the supplied buffer and
+ * return a pointer to that same buffer for convenience.
+ */
+char *disk_name(struct gendisk *hd, int partno, char *buf)
 {
-       if (queue_is_mq(q))
-               return;
+       if (!partno)
+               snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
+       else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
+               snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
+       else
+               snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
 
-       part_stat_local_dec(part, in_flight[rw]);
-       if (part->partno)
-               part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]);
+       return buf;
 }
 
-unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
+const char *bdevname(struct block_device *bdev, char *buf)
+{
+       return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
+}
+EXPORT_SYMBOL(bdevname);
+
+static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
 {
        int cpu;
-       unsigned int inflight;
 
-       if (queue_is_mq(q)) {
-               return blk_mq_in_flight(q, part);
+       memset(stat, 0, sizeof(struct disk_stats));
+       for_each_possible_cpu(cpu) {
+               struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
+               int group;
+
+               for (group = 0; group < NR_STAT_GROUPS; group++) {
+                       stat->nsecs[group] += ptr->nsecs[group];
+                       stat->sectors[group] += ptr->sectors[group];
+                       stat->ios[group] += ptr->ios[group];
+                       stat->merges[group] += ptr->merges[group];
+               }
+
+               stat->io_ticks += ptr->io_ticks;
        }
+}
+
+static unsigned int part_in_flight(struct request_queue *q,
+               struct hd_struct *part)
+{
+       unsigned int inflight = 0;
+       int cpu;
 
-       inflight = 0;
        for_each_possible_cpu(cpu) {
                inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
                            part_stat_local_read_cpu(part, in_flight[1], cpu);
@@ -86,16 +128,11 @@ unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part)
        return inflight;
 }
 
-void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
-                      unsigned int inflight[2])
+static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
+               unsigned int inflight[2])
 {
        int cpu;
 
-       if (queue_is_mq(q)) {
-               blk_mq_in_flight_rw(q, part, inflight);
-               return;
-       }
-
        inflight[0] = 0;
        inflight[1] = 0;
        for_each_possible_cpu(cpu) {
@@ -143,7 +180,6 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
 
        return part;
 }
-EXPORT_SYMBOL_GPL(disk_get_part);
 
 /**
  * disk_part_iter_init - initialize partition iterator
@@ -271,11 +307,13 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector)
  * primarily used for stats accounting.
  *
  * CONTEXT:
- * RCU read locked.  The returned partition pointer is valid only
- * while preemption is disabled.
+ * RCU read locked.  The returned partition pointer is always valid
+ * because its refcount is grabbed except for part0, which lifetime
+ * is same with the disk.
  *
  * RETURNS:
  * Found partition on success, part0 is returned if no partition matches
+ * or the matched partition is being deleted.
  */
 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 {
@@ -283,23 +321,34 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
        struct hd_struct *part;
        int i;
 
+       rcu_read_lock();
        ptbl = rcu_dereference(disk->part_tbl);
 
        part = rcu_dereference(ptbl->last_lookup);
-       if (part && sector_in_part(part, sector))
-               return part;
+       if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
+               goto out_unlock;
 
        for (i = 1; i < ptbl->len; i++) {
                part = rcu_dereference(ptbl->part[i]);
 
                if (part && sector_in_part(part, sector)) {
+                       /*
+                        * only live partition can be cached for lookup,
+                        * so use-after-free on cached & deleting partition
+                        * can be avoided
+                        */
+                       if (!hd_struct_try_get(part))
+                               break;
                        rcu_assign_pointer(ptbl->last_lookup, part);
-                       return part;
+                       goto out_unlock;
                }
        }
-       return &disk->part0;
+
+       part = &disk->part0;
+out_unlock:
+       rcu_read_unlock();
+       return part;
 }
-EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 
 /**
  * disk_has_partitions
@@ -768,13 +817,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
                disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
                disk->flags |= GENHD_FL_NO_PART_SCAN;
        } else {
+               struct backing_dev_info *bdi = disk->queue->backing_dev_info;
+               struct device *dev = disk_to_dev(disk);
                int ret;
 
                /* Register BDI before referencing it from bdev */
-               disk_to_dev(disk)->devt = devt;
-               ret = bdi_register_owner(disk->queue->backing_dev_info,
-                                               disk_to_dev(disk));
+               dev->devt = devt;
+               ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
                WARN_ON(ret);
+               bdi_set_owner(bdi, dev);
                blk_register_region(disk_devt(disk), disk->minors, NULL,
                                    exact_match, exact_lock, disk);
        }
@@ -806,6 +857,25 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
 }
 EXPORT_SYMBOL(device_add_disk_no_queue_reg);
 
+static void invalidate_partition(struct gendisk *disk, int partno)
+{
+       struct block_device *bdev;
+
+       bdev = bdget_disk(disk, partno);
+       if (!bdev)
+               return;
+
+       fsync_bdev(bdev);
+       __invalidate_device(bdev, true);
+
+       /*
+        * Unhash the bdev inode for this device so that it gets evicted as soon
+        * as last inode reference is dropped.
+        */
+       remove_inode_hash(bdev->bd_inode);
+       bdput(bdev);
+}
+
 void del_gendisk(struct gendisk *disk)
 {
        struct disk_part_iter piter;
@@ -824,13 +894,11 @@ void del_gendisk(struct gendisk *disk)
                             DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
        while ((part = disk_part_iter_next(&piter))) {
                invalidate_partition(disk, part->partno);
-               bdev_unhash_inode(part_devt(part));
-               delete_partition(disk, part->partno);
+               delete_partition(disk, part);
        }
        disk_part_iter_exit(&piter);
 
        invalidate_partition(disk, 0);
-       bdev_unhash_inode(disk_devt(disk));
        set_capacity(disk, 0);
        disk->flags &= ~GENHD_FL_UP;
        up_write(&disk->lookup_sem);
@@ -944,7 +1012,6 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
        }
        return disk;
 }
-EXPORT_SYMBOL(get_gendisk);
 
 /**
  * bdget_disk - do bdget() by gendisk and partition number
@@ -1190,6 +1257,74 @@ static ssize_t disk_ro_show(struct device *dev,
        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 }
 
+ssize_t part_size_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+
+       return sprintf(buf, "%llu\n",
+               (unsigned long long)part_nr_sects_read(p));
+}
+
+ssize_t part_stat_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q = part_to_disk(p)->queue;
+       struct disk_stats stat;
+       unsigned int inflight;
+
+       part_stat_read_all(p, &stat);
+       if (queue_is_mq(q))
+               inflight = blk_mq_in_flight(q, p);
+       else
+               inflight = part_in_flight(q, p);
+
+       return sprintf(buf,
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8lu %8llu %8u "
+               "%8u %8u %8u "
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8u"
+               "\n",
+               stat.ios[STAT_READ],
+               stat.merges[STAT_READ],
+               (unsigned long long)stat.sectors[STAT_READ],
+               (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
+               stat.ios[STAT_WRITE],
+               stat.merges[STAT_WRITE],
+               (unsigned long long)stat.sectors[STAT_WRITE],
+               (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
+               inflight,
+               jiffies_to_msecs(stat.io_ticks),
+               (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+                                     stat.nsecs[STAT_WRITE] +
+                                     stat.nsecs[STAT_DISCARD] +
+                                     stat.nsecs[STAT_FLUSH],
+                                               NSEC_PER_MSEC),
+               stat.ios[STAT_DISCARD],
+               stat.merges[STAT_DISCARD],
+               (unsigned long long)stat.sectors[STAT_DISCARD],
+               (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
+               stat.ios[STAT_FLUSH],
+               (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
+}
+
+ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q = part_to_disk(p)->queue;
+       unsigned int inflight[2];
+
+       if (queue_is_mq(q))
+               blk_mq_in_flight_rw(q, p, inflight);
+       else
+               part_in_flight_rw(q, p, inflight);
+
+       return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
+}
+
 static ssize_t disk_capability_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
 {
@@ -1228,10 +1363,33 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+
 #ifdef CONFIG_FAIL_MAKE_REQUEST
+ssize_t part_fail_show(struct device *dev,
+                      struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+
+       return sprintf(buf, "%d\n", p->make_it_fail);
+}
+
+ssize_t part_fail_store(struct device *dev,
+                       struct device_attribute *attr,
+                       const char *buf, size_t count)
+{
+       struct hd_struct *p = dev_to_part(dev);
+       int i;
+
+       if (count > 0 && sscanf(buf, "%d", &i) > 0)
+               p->make_it_fail = (i == 0) ? 0 : 1;
+
+       return count;
+}
+
 static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
-#endif
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
+
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 static struct device_attribute dev_attr_fail_timeout =
        __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
@@ -1378,8 +1536,8 @@ static char *block_devnode(struct device *dev, umode_t *mode,
 {
        struct gendisk *disk = dev_to_disk(dev);
 
-       if (disk->devnode)
-               return disk->devnode(disk, mode);
+       if (disk->fops->devnode)
+               return disk->fops->devnode(disk, mode);
        return NULL;
 }
 
@@ -1405,6 +1563,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
        struct hd_struct *hd;
        char buf[BDEVNAME_SIZE];
        unsigned int inflight;
+       struct disk_stats stat;
 
        /*
        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1416,7 +1575,12 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 
        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
        while ((hd = disk_part_iter_next(&piter))) {
-               inflight = part_in_flight(gp->queue, hd);
+               part_stat_read_all(hd, &stat);
+               if (queue_is_mq(gp->queue))
+                       inflight = blk_mq_in_flight(gp->queue, hd);
+               else
+                       inflight = part_in_flight(gp->queue, hd);
+
                seq_printf(seqf, "%4d %7d %s "
                           "%lu %lu %lu %u "
                           "%lu %lu %lu %u "
@@ -1426,23 +1590,31 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           "\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
-                          part_stat_read(hd, ios[STAT_READ]),
-                          part_stat_read(hd, merges[STAT_READ]),
-                          part_stat_read(hd, sectors[STAT_READ]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_READ),
-                          part_stat_read(hd, ios[STAT_WRITE]),
-                          part_stat_read(hd, merges[STAT_WRITE]),
-                          part_stat_read(hd, sectors[STAT_WRITE]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
+                          stat.ios[STAT_READ],
+                          stat.merges[STAT_READ],
+                          stat.sectors[STAT_READ],
+                          (unsigned int)div_u64(stat.nsecs[STAT_READ],
+                                                       NSEC_PER_MSEC),
+                          stat.ios[STAT_WRITE],
+                          stat.merges[STAT_WRITE],
+                          stat.sectors[STAT_WRITE],
+                          (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+                                                       NSEC_PER_MSEC),
                           inflight,
-                          jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-                          jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
-                          part_stat_read(hd, ios[STAT_DISCARD]),
-                          part_stat_read(hd, merges[STAT_DISCARD]),
-                          part_stat_read(hd, sectors[STAT_DISCARD]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
-                          part_stat_read(hd, ios[STAT_FLUSH]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
+                          jiffies_to_msecs(stat.io_ticks),
+                          (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+                                                stat.nsecs[STAT_WRITE] +
+                                                stat.nsecs[STAT_DISCARD] +
+                                                stat.nsecs[STAT_FLUSH],
+                                                       NSEC_PER_MSEC),
+                          stat.ios[STAT_DISCARD],
+                          stat.merges[STAT_DISCARD],
+                          stat.sectors[STAT_DISCARD],
+                          (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+                                                NSEC_PER_MSEC),
+                          stat.ios[STAT_FLUSH],
+                          (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+                                                NSEC_PER_MSEC)
                        );
        }
        disk_part_iter_exit(&piter);
@@ -1499,7 +1671,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
        class_dev_iter_exit(&iter);
        return devt;
 }
-EXPORT_SYMBOL(blk_lookup_devt);
 
 struct gendisk *__alloc_disk_node(int minors, int node_id)
 {
@@ -1515,14 +1686,15 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 
        disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
        if (disk) {
-               if (!init_part_stats(&disk->part0)) {
+               disk->part0.dkstats = alloc_percpu(struct disk_stats);
+               if (!disk->part0.dkstats) {
                        kfree(disk);
                        return NULL;
                }
                init_rwsem(&disk->lookup_sem);
                disk->node_id = node_id;
                if (disk_expand_part_tbl(disk, 0)) {
-                       free_part_stats(&disk->part0);
+                       free_percpu(disk->part0.dkstats);
                        kfree(disk);
                        return NULL;
                }
@@ -1538,7 +1710,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
                 * TODO: Ideally set_capacity() and get_capacity() should be
                 * converted to make use of bd_mutex and sequence counters.
                 */
-               seqcount_init(&disk->part0.nr_sects_seq);
+               hd_sects_seq_init(&disk->part0);
                if (hd_ref_init(&disk->part0)) {
                        hd_free_part(&disk->part0);
                        kfree(disk);
@@ -1641,20 +1813,6 @@ int bdev_read_only(struct block_device *bdev)
 
 EXPORT_SYMBOL(bdev_read_only);
 
-int invalidate_partition(struct gendisk *disk, int partno)
-{
-       int res = 0;
-       struct block_device *bdev = bdget_disk(disk, partno);
-       if (bdev) {
-               fsync_bdev(bdev);
-               res = __invalidate_device(bdev, true);
-               bdput(bdev);
-       }
-       return res;
-}
-
-EXPORT_SYMBOL(invalidate_partition);
-
 /*
  * Disk events - monitor disk events like media change and eject request.
  */