dax: Arrange for dax_supported check to span multiple devices
authorDan Williams <dan.j.williams@intel.com>
Thu, 16 May 2019 20:26:29 +0000 (13:26 -0700)
committerDan Williams <dan.j.williams@intel.com>
Mon, 20 May 2019 22:02:08 +0000 (15:02 -0700)
Pankaj reports that starting with commit ad428cdb525a "dax: Check the
end of the block-device capacity with dax_direct_access()" device-mapper
no longer allows dax operation. This results from the stricter checks in
__bdev_dax_supported() that validate that the start and end of a
block-device map to the same 'pagemap' instance.

Teach the dax-core and device-mapper to validate the 'pagemap' on a
per-target basis. This is accomplished by refactoring the
bdev_dax_supported() internals into generic_fsdax_supported() which
takes a sector range to validate. Consequently generic_fsdax_supported()
is suitable to be used in a device-mapper ->iterate_devices() callback.
A new ->dax_supported() operation is added to allow composite devices to
split and route upper-level bdev_dax_supported() requests.

Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
Cc: <stable@vger.kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reported-by: Pankaj Gupta <pagupta@redhat.com>
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
Tested-by: Pankaj Gupta <pagupta@redhat.com>
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
drivers/dax/super.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/dm.h
drivers/nvdimm/pmem.c
drivers/s390/block/dcssblk.c
include/linux/dax.h

index bbd57ca0634a1314e79a4d762e1e1e68b99ff3d2..3a7b0a0bf469d63663fd5fbdd2dc16ad138d9b5a 100644 (file)
@@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
 #endif
 
-/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @bdev: block device to check
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: true if supported, false if unsupported
- */
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+bool __generic_fsdax_supported(struct dax_device *dax_dev,
+               struct block_device *bdev, int blocksize, sector_t start,
+               sector_t sectors)
 {
-       struct dax_device *dax_dev;
        bool dax_enabled = false;
        pgoff_t pgoff, pgoff_end;
-       struct request_queue *q;
        char buf[BDEVNAME_SIZE];
        void *kaddr, *end_kaddr;
        pfn_t pfn, end_pfn;
@@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
                return false;
        }
 
-       q = bdev_get_queue(bdev);
-       if (!q || !blk_queue_dax(q)) {
-               pr_debug("%s: error: request queue doesn't support dax\n",
-                               bdevname(bdev, buf));
-               return false;
-       }
-
-       err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+       err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
        if (err) {
                pr_debug("%s: error: unaligned partition for dax\n",
                                bdevname(bdev, buf));
                return false;
        }
 
-       last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
+       last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
        err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
        if (err) {
                pr_debug("%s: error: unaligned partition for dax\n",
@@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
                return false;
        }
 
-       dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-       if (!dax_dev) {
-               pr_debug("%s: error: device does not support dax\n",
-                               bdevname(bdev, buf));
-               return false;
-       }
-
        id = dax_read_lock();
        len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
        len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
        dax_read_unlock(id);
 
-       put_dax(dax_dev);
-
        if (len < 1 || len2 < 1) {
                pr_debug("%s: error: dax access failed (%ld)\n",
                                bdevname(bdev, buf), len < 1 ? len : len2);
@@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
        }
        return true;
 }
+EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @bdev: block device to check
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: true if supported, false if unsupported
+ */
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+{
+       struct dax_device *dax_dev;
+       struct request_queue *q;
+       char buf[BDEVNAME_SIZE];
+       bool ret;
+       int id;
+
+       q = bdev_get_queue(bdev);
+       if (!q || !blk_queue_dax(q)) {
+               pr_debug("%s: error: request queue doesn't support dax\n",
+                               bdevname(bdev, buf));
+               return false;
+       }
+
+       dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+       if (!dax_dev) {
+               pr_debug("%s: error: device does not support dax\n",
+                               bdevname(bdev, buf));
+               return false;
+       }
+
+       id = dax_read_lock();
+       ret = dax_supported(dax_dev, bdev, blocksize, 0,
+                       i_size_read(bdev->bd_inode) / 512);
+       dax_read_unlock(id);
+
+       put_dax(dax_dev);
+
+       return ret;
+}
 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
 #endif
 
@@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 }
 EXPORT_SYMBOL_GPL(dax_direct_access);
 
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+               int blocksize, sector_t start, sector_t len)
+{
+       if (!dax_alive(dax_dev))
+               return false;
+
+       return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
+}
+
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i)
 {
index cde3b49b2a9107abafd76d190c9fc61209141f7b..350cf045145628cc37ca0dfef3b1a3d1dabe7dc7 100644 (file)
@@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
 }
 EXPORT_SYMBOL_GPL(dm_table_set_type);
 
+/* validate the dax capability of the target device span */
 static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
-                              sector_t start, sector_t len, void *data)
+                                      sector_t start, sector_t len, void *data)
 {
-       return bdev_dax_supported(dev->bdev, PAGE_SIZE);
+       int blocksize = *(int *) data;
+
+       return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
+                       start, len);
 }
 
-static bool dm_table_supports_dax(struct dm_table *t)
+bool dm_table_supports_dax(struct dm_table *t, int blocksize)
 {
        struct dm_target *ti;
        unsigned i;
@@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct dm_table *t)
                        return false;
 
                if (!ti->type->iterate_devices ||
-                   !ti->type->iterate_devices(ti, device_supports_dax, NULL))
+                   !ti->type->iterate_devices(ti, device_supports_dax,
+                           &blocksize))
                        return false;
        }
 
@@ -979,7 +984,7 @@ static int dm_table_determine_type(struct dm_table *t)
 verify_bio_based:
                /* We must use this table as bio-based */
                t->type = DM_TYPE_BIO_BASED;
-               if (dm_table_supports_dax(t) ||
+               if (dm_table_supports_dax(t, PAGE_SIZE) ||
                    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
                        t->type = DM_TYPE_DAX_BIO_BASED;
                } else {
@@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
        }
        blk_queue_write_cache(q, wc, fua);
 
-       if (dm_table_supports_dax(t))
+       if (dm_table_supports_dax(t, PAGE_SIZE))
                blk_queue_flag_set(QUEUE_FLAG_DAX, q);
        else
                blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
index 1fb1333fefec12b881ec5e32f0a12bad8af6108c..b7c0ad01084dddd7073f3e67cf9ee53c7a25faf5 100644 (file)
@@ -1107,6 +1107,25 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
        return ret;
 }
 
+static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+               int blocksize, sector_t start, sector_t len)
+{
+       struct mapped_device *md = dax_get_private(dax_dev);
+       struct dm_table *map;
+       int srcu_idx;
+       bool ret;
+
+       map = dm_get_live_table(md, &srcu_idx);
+       if (!map)
+               return false;
+
+       ret = dm_table_supports_dax(map, blocksize);
+
+       dm_put_live_table(md, srcu_idx);
+
+       return ret;
+}
+
 static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
                                    void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -3192,6 +3211,7 @@ static const struct block_device_operations dm_blk_dops = {
 
 static const struct dax_operations dm_dax_ops = {
        .direct_access = dm_dax_direct_access,
+       .dax_supported = dm_dax_supported,
        .copy_from_iter = dm_dax_copy_from_iter,
        .copy_to_iter = dm_dax_copy_to_iter,
 };
index 2d539b82ec08ff2d4f2cfd7479989d4fc9ac3aee..17e3db54404ca792c13f4d2e2b4db011ebe5a17d 100644 (file)
@@ -72,6 +72,7 @@ bool dm_table_bio_based(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
+bool dm_table_supports_dax(struct dm_table *t, int blocksize);
 
 void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
index 0279eb1da3ef5ae40c5ab80ef6940732dca03bf0..845c5b430cdd6446f4b20806a01a9452f9ecd3b7 100644 (file)
@@ -295,6 +295,7 @@ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 
 static const struct dax_operations pmem_dax_ops = {
        .direct_access = pmem_dax_direct_access,
+       .dax_supported = generic_fsdax_supported,
        .copy_from_iter = pmem_copy_from_iter,
        .copy_to_iter = pmem_copy_to_iter,
 };
index 4e8aedd50cb0d6ecf55f67d1071a8b39004f5c9a..d04d4378ca50fe4ca18712217441697dfa408ae6 100644 (file)
@@ -59,6 +59,7 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
 
 static const struct dax_operations dcssblk_dax_ops = {
        .direct_access = dcssblk_dax_direct_access,
+       .dax_supported = generic_fsdax_supported,
        .copy_from_iter = dcssblk_dax_copy_from_iter,
        .copy_to_iter = dcssblk_dax_copy_to_iter,
 };
index 0dd316a74a295132ea6b6c04f914356c5c4064d6..becaea5f4488880d54d7eb9a51716fba5d58d3f2 100644 (file)
@@ -19,6 +19,12 @@ struct dax_operations {
         */
        long (*direct_access)(struct dax_device *, pgoff_t, long,
                        void **, pfn_t *);
+       /*
+        * Validate whether this device is usable as an fsdax backing
+        * device.
+        */
+       bool (*dax_supported)(struct dax_device *, struct block_device *, int,
+                       sector_t, sector_t);
        /* copy_from_iter: required operation for fs-dax direct-i/o */
        size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
                        struct iov_iter *);
@@ -75,6 +81,17 @@ static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
        return __bdev_dax_supported(bdev, blocksize);
 }
 
+bool __generic_fsdax_supported(struct dax_device *dax_dev,
+               struct block_device *bdev, int blocksize, sector_t start,
+               sector_t sectors);
+static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+               struct block_device *bdev, int blocksize, sector_t start,
+               sector_t sectors)
+{
+       return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
+                       sectors);
+}
+
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
 {
        return dax_get_by_host(host);
@@ -99,6 +116,13 @@ static inline bool bdev_dax_supported(struct block_device *bdev,
        return false;
 }
 
+static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+               struct block_device *bdev, int blocksize, sector_t start,
+               sector_t sectors)
+{
+       return false;
+}
+
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
 {
        return NULL;
@@ -142,6 +166,8 @@ bool dax_alive(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
                void **kaddr, pfn_t *pfn);
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+               int blocksize, sector_t start, sector_t len);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,