block: Expose discard granularity
authorMartin K. Petersen <martin.petersen@oracle.com>
Tue, 10 Nov 2009 10:50:21 +0000 (11:50 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Tue, 10 Nov 2009 10:50:21 +0000 (11:50 +0100)
While SSDs track block usage on a per-sector basis, RAID arrays often
have allocation blocks that are bigger.  Allow the discard granularity
and alignment to be set and teach the topology stacking logic how to
handle them.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
block/blk-settings.c
block/blk-sysfs.c
block/genhd.c
fs/partitions/check.c
include/linux/blkdev.h
include/linux/genhd.h

index 66d4aa8799b7517de258e3e334a69372154580a9..7f986cafacd58c37d33c855f54b9aa164003fb82 100644 (file)
@@ -96,7 +96,10 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->max_segment_size = MAX_SEGMENT_SIZE;
        lim->max_sectors = BLK_DEF_MAX_SECTORS;
        lim->max_hw_sectors = INT_MAX;
-       lim->max_discard_sectors = SAFE_MAX_SECTORS;
+       lim->max_discard_sectors = 0;
+       lim->discard_granularity = 0;
+       lim->discard_alignment = 0;
+       lim->discard_misaligned = 0;
        lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
        lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
        lim->alignment_offset = 0;
@@ -488,6 +491,16 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+static unsigned int lcm(unsigned int a, unsigned int b)
+{
+       if (a && b)
+               return (a * b) / gcd(a, b);
+       else if (b)
+               return b;
+
+       return a;
+}
+
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t: the stacking driver limits (top)
@@ -502,6 +515,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                     sector_t offset)
 {
+       int ret;
+
+       ret = 0;
+
        t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
        t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
        t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
@@ -531,7 +548,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
        if (offset &&
            (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
                t->misaligned = 1;
-               return -1;
+               ret = -1;
+       }
+
+       if (offset &&
+           (offset & (b->discard_granularity - 1)) != b->discard_alignment) {
+               t->discard_misaligned = 1;
+               ret = -1;
        }
 
        /* If top has no alignment offset, inherit from bottom */
@@ -539,23 +562,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                t->alignment_offset =
                        b->alignment_offset & (b->physical_block_size - 1);
 
+       if (!t->discard_alignment)
+               t->discard_alignment =
+                       b->discard_alignment & (b->discard_granularity - 1);
+
        /* Top device aligned on logical block boundary? */
        if (t->alignment_offset & (t->logical_block_size - 1)) {
                t->misaligned = 1;
-               return -1;
+               ret = -1;
        }
 
-       /* Find lcm() of optimal I/O size */
-       if (t->io_opt && b->io_opt)
-               t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
-       else if (b->io_opt)
-               t->io_opt = b->io_opt;
+       /* Find lcm() of optimal I/O size and granularity */
+       t->io_opt = lcm(t->io_opt, b->io_opt);
+       t->discard_granularity = lcm(t->discard_granularity,
+                                    b->discard_granularity);
 
        /* Verify that optimal I/O size is a multiple of io_min */
        if (t->io_min && t->io_opt % t->io_min)
-               return -1;
+               ret = -1;
 
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
index 8a6d81afb284ebbf7111653393b6594d91924491..3147145edc15f72fad3aa4cb83ba03c1920039e2 100644 (file)
@@ -126,6 +126,16 @@ static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
        return queue_var_show(queue_io_opt(q), page);
 }
 
+static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(q->limits.discard_granularity, page);
+}
+
+static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(q->limits.max_discard_sectors << 9, page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -293,6 +303,16 @@ static struct queue_sysfs_entry queue_io_opt_entry = {
        .show = queue_io_opt_show,
 };
 
+static struct queue_sysfs_entry queue_discard_granularity_entry = {
+       .attr = {.name = "discard_granularity", .mode = S_IRUGO },
+       .show = queue_discard_granularity_show,
+};
+
+static struct queue_sysfs_entry queue_discard_max_entry = {
+       .attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
+       .show = queue_discard_max_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
        .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
        .show = queue_nonrot_show,
@@ -328,6 +348,8 @@ static struct attribute *default_attrs[] = {
        &queue_physical_block_size_entry.attr,
        &queue_io_min_entry.attr,
        &queue_io_opt_entry.attr,
+       &queue_discard_granularity_entry.attr,
+       &queue_discard_max_entry.attr,
        &queue_nonrot_entry.attr,
        &queue_nomerges_entry.attr,
        &queue_rq_affinity_entry.attr,
index 517e4332cb37481dcad965e8f9986f029b42e64a..b11a4ad7d571fcfa71a69a2ab8e9cfe5a4d82673 100644 (file)
@@ -861,12 +861,23 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
        return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_discard_alignment_show(struct device *dev,
+                                          struct device_attribute *attr,
+                                          char *buf)
+{
+       struct gendisk *disk = dev_to_disk(dev);
+
+       return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
+                  NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
@@ -887,6 +898,7 @@ static struct attribute *disk_attrs[] = {
        &dev_attr_ro.attr,
        &dev_attr_size.attr,
        &dev_attr_alignment_offset.attr,
+       &dev_attr_discard_alignment.attr,
        &dev_attr_capability.attr,
        &dev_attr_stat.attr,
        &dev_attr_inflight.attr,
index 7b685e10cbad8cc9b052cec48737fbf7bf6f7afb..64bc8998ac9aa22f5604ed7ccccc644e0b4fb563 100644 (file)
@@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev,
        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 }
 
+ssize_t part_discard_alignment_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+       return sprintf(buf, "%u\n", p->discard_alignment);
+}
+
 ssize_t part_stat_show(struct device *dev,
                       struct device_attribute *attr, char *buf)
 {
@@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
+                  NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = {
        &dev_attr_start.attr,
        &dev_attr_size.attr,
        &dev_attr_alignment_offset.attr,
+       &dev_attr_discard_alignment.attr,
        &dev_attr_stat.attr,
        &dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
        p->start_sect = start;
        p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
+       p->discard_alignment = queue_sector_discard_alignment(disk->queue,
+                                                             start);
        p->nr_sects = len;
        p->partno = partno;
        p->policy = get_disk_ro(disk);
index 39c601f783a077c96864435ca50e194501502b2e..1cc02972fbe22cc0fda12e186cf16d6dcbeedc3c 100644 (file)
@@ -312,12 +312,15 @@ struct queue_limits {
        unsigned int            io_min;
        unsigned int            io_opt;
        unsigned int            max_discard_sectors;
+       unsigned int            discard_granularity;
+       unsigned int            discard_alignment;
 
        unsigned short          logical_block_size;
        unsigned short          max_hw_segments;
        unsigned short          max_phys_segments;
 
        unsigned char           misaligned;
+       unsigned char           discard_misaligned;
        unsigned char           no_cluster;
 };
 
@@ -1121,6 +1124,21 @@ static inline int bdev_alignment_offset(struct block_device *bdev)
        return q->limits.alignment_offset;
 }
 
+static inline int queue_discard_alignment(struct request_queue *q)
+{
+       if (q->limits.discard_misaligned)
+               return -1;
+
+       return q->limits.discard_alignment;
+}
+
+static inline int queue_sector_discard_alignment(struct request_queue *q,
+                                                sector_t sector)
+{
+       return ((sector << 9) - q->limits.discard_alignment)
+               & (q->limits.discard_granularity - 1);
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
        return q ? q->dma_alignment : 511;
index 297df45ffd0ad27e8219aaf3715a00d030f5bf54..c6c0c41af35fda29c5f9e1429bd3db4784780bf8 100644 (file)
@@ -91,6 +91,7 @@ struct hd_struct {
        sector_t start_sect;
        sector_t nr_sects;
        sector_t alignment_offset;
+       unsigned int discard_alignment;
        struct device __dev;
        struct kobject *holder_dir;
        int policy, partno;