Merge tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Jan 2015 01:25:01 +0000 (17:25 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Jan 2015 01:25:01 +0000 (17:25 -0800)
Pull device mapper fixes from Mike Snitzer:
 "Two stable fixes for dm-cache and one 3.19 DM core fix:

   - fix potential for dm-cache metadata corruption via stale metadata
     buffers being used when switching an inactive cache table to
     active; this could occur due to each table having it's own bufio
     client rather than sharing the client between tables.

   - fix dm-cache target to properly account for discard IO while
     suspending otherwise IO quiescing could complete prematurely.

   - fix DM core's handling of multiple internal suspends by maintaining
     an 'internal_suspend_count' and only resuming the device when this
     count drops to zero"

* tag 'dm-3.19-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: fix handling of multiple internal suspends
  dm cache: fix problematic dual use of a single migration count variable
  dm cache: share cache-metadata object across inactive and active DM tables

drivers/md/dm-cache-metadata.c
drivers/md/dm-cache-target.c
drivers/md/dm.c

index 9fc616c2755ed752a50930e1095df8a7a34303f2..21b156242e42668b5f8a1b43910374fe2c7a77f6 100644 (file)
@@ -94,6 +94,9 @@ struct cache_disk_superblock {
 } __packed;
 
 struct dm_cache_metadata {
+       atomic_t ref_count;
+       struct list_head list;
+
        struct block_device *bdev;
        struct dm_block_manager *bm;
        struct dm_space_map *metadata_sm;
@@ -669,10 +672,10 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
 
 /*----------------------------------------------------------------*/
 
-struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
-                                                sector_t data_block_size,
-                                                bool may_format_device,
-                                                size_t policy_hint_size)
+static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
+                                              sector_t data_block_size,
+                                              bool may_format_device,
+                                              size_t policy_hint_size)
 {
        int r;
        struct dm_cache_metadata *cmd;
@@ -683,6 +686,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
                return NULL;
        }
 
+       atomic_set(&cmd->ref_count, 1);
        init_rwsem(&cmd->root_lock);
        cmd->bdev = bdev;
        cmd->data_block_size = data_block_size;
@@ -705,10 +709,95 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
        return cmd;
 }
 
+/*
+ * We keep a little list of ref counted metadata objects to prevent two
+ * different target instances creating separate bufio instances.  This is
+ * an issue if a table is reloaded before the suspend.
+ */
+static DEFINE_MUTEX(table_lock);
+static LIST_HEAD(table);
+
+static struct dm_cache_metadata *lookup(struct block_device *bdev)
+{
+       struct dm_cache_metadata *cmd;
+
+       list_for_each_entry(cmd, &table, list)
+               if (cmd->bdev == bdev) {
+                       atomic_inc(&cmd->ref_count);
+                       return cmd;
+               }
+
+       return NULL;
+}
+
+static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
+                                               sector_t data_block_size,
+                                               bool may_format_device,
+                                               size_t policy_hint_size)
+{
+       struct dm_cache_metadata *cmd, *cmd2;
+
+       mutex_lock(&table_lock);
+       cmd = lookup(bdev);
+       mutex_unlock(&table_lock);
+
+       if (cmd)
+               return cmd;
+
+       cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
+       if (cmd) {
+               mutex_lock(&table_lock);
+               cmd2 = lookup(bdev);
+               if (cmd2) {
+                       mutex_unlock(&table_lock);
+                       __destroy_persistent_data_objects(cmd);
+                       kfree(cmd);
+                       return cmd2;
+               }
+               list_add(&cmd->list, &table);
+               mutex_unlock(&table_lock);
+       }
+
+       return cmd;
+}
+
+static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
+{
+       if (cmd->data_block_size != data_block_size) {
+               DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
+                     (unsigned long long) data_block_size,
+                     (unsigned long long) cmd->data_block_size);
+               return false;
+       }
+
+       return true;
+}
+
+struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
+                                                sector_t data_block_size,
+                                                bool may_format_device,
+                                                size_t policy_hint_size)
+{
+       struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
+                                                      may_format_device, policy_hint_size);
+       if (cmd && !same_params(cmd, data_block_size)) {
+               dm_cache_metadata_close(cmd);
+               return NULL;
+       }
+
+       return cmd;
+}
+
 void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
 {
-       __destroy_persistent_data_objects(cmd);
-       kfree(cmd);
+       if (atomic_dec_and_test(&cmd->ref_count)) {
+               mutex_lock(&table_lock);
+               list_del(&cmd->list);
+               mutex_unlock(&table_lock);
+
+               __destroy_persistent_data_objects(cmd);
+               kfree(cmd);
+       }
 }
 
 /*
index 1e96d7889f51eaa08b7d65b04c1a43e063931708..e1650539cc2f826d9efe7f878352570bcc31e101 100644 (file)
@@ -221,7 +221,13 @@ struct cache {
        struct list_head need_commit_migrations;
        sector_t migration_threshold;
        wait_queue_head_t migration_wait;
-       atomic_t nr_migrations;
+       atomic_t nr_allocated_migrations;
+
+       /*
+        * The number of in flight migrations that are performing
+        * background io. eg, promotion, writeback.
+        */
+       atomic_t nr_io_migrations;
 
        wait_queue_head_t quiescing_wait;
        atomic_t quiescing;
@@ -258,7 +264,6 @@ struct cache {
        struct dm_deferred_set *all_io_ds;
 
        mempool_t *migration_pool;
-       struct dm_cache_migration *next_migration;
 
        struct dm_cache_policy *policy;
        unsigned policy_nr_args;
@@ -350,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel
        dm_bio_prison_free_cell(cache->prison, cell);
 }
 
+static struct dm_cache_migration *alloc_migration(struct cache *cache)
+{
+       struct dm_cache_migration *mg;
+
+       mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+       if (mg) {
+               mg->cache = cache;
+               atomic_inc(&mg->cache->nr_allocated_migrations);
+       }
+
+       return mg;
+}
+
+static void free_migration(struct dm_cache_migration *mg)
+{
+       if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
+               wake_up(&mg->cache->migration_wait);
+
+       mempool_free(mg, mg->cache->migration_pool);
+}
+
 static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
 {
        if (!p->mg) {
-               p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+               p->mg = alloc_migration(cache);
                if (!p->mg)
                        return -ENOMEM;
        }
@@ -382,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
                free_prison_cell(cache, p->cell1);
 
        if (p->mg)
-               mempool_free(p->mg, cache->migration_pool);
+               free_migration(p->mg);
 }
 
 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
@@ -854,24 +880,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
  * Migration covers moving data from the origin device to the cache, or
  * vice versa.
  *--------------------------------------------------------------*/
-static void free_migration(struct dm_cache_migration *mg)
-{
-       mempool_free(mg, mg->cache->migration_pool);
-}
-
-static void inc_nr_migrations(struct cache *cache)
+static void inc_io_migrations(struct cache *cache)
 {
-       atomic_inc(&cache->nr_migrations);
+       atomic_inc(&cache->nr_io_migrations);
 }
 
-static void dec_nr_migrations(struct cache *cache)
+static void dec_io_migrations(struct cache *cache)
 {
-       atomic_dec(&cache->nr_migrations);
-
-       /*
-        * Wake the worker in case we're suspending the target.
-        */
-       wake_up(&cache->migration_wait);
+       atomic_dec(&cache->nr_io_migrations);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
@@ -894,11 +910,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
        wake_worker(cache);
 }
 
-static void cleanup_migration(struct dm_cache_migration *mg)
+static void free_io_migration(struct dm_cache_migration *mg)
 {
-       struct cache *cache = mg->cache;
+       dec_io_migrations(mg->cache);
        free_migration(mg);
-       dec_nr_migrations(cache);
 }
 
 static void migration_failure(struct dm_cache_migration *mg)
@@ -923,7 +938,7 @@ static void migration_failure(struct dm_cache_migration *mg)
                cell_defer(cache, mg->new_ocell, true);
        }
 
-       cleanup_migration(mg);
+       free_io_migration(mg);
 }
 
 static void migration_success_pre_commit(struct dm_cache_migration *mg)
@@ -934,7 +949,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
        if (mg->writeback) {
                clear_dirty(cache, mg->old_oblock, mg->cblock);
                cell_defer(cache, mg->old_ocell, false);
-               cleanup_migration(mg);
+               free_io_migration(mg);
                return;
 
        } else if (mg->demote) {
@@ -944,14 +959,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg)
                                             mg->old_oblock);
                        if (mg->promote)
                                cell_defer(cache, mg->new_ocell, true);
-                       cleanup_migration(mg);
+                       free_io_migration(mg);
                        return;
                }
        } else {
                if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
                        DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
                        policy_remove_mapping(cache->policy, mg->new_oblock);
-                       cleanup_migration(mg);
+                       free_io_migration(mg);
                        return;
                }
        }
@@ -984,7 +999,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
                } else {
                        if (mg->invalidate)
                                policy_remove_mapping(cache->policy, mg->old_oblock);
-                       cleanup_migration(mg);
+                       free_io_migration(mg);
                }
 
        } else {
@@ -999,7 +1014,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg)
                        bio_endio(mg->new_ocell->holder, 0);
                        cell_defer(cache, mg->new_ocell, false);
                }
-               cleanup_migration(mg);
+               free_io_migration(mg);
        }
 }
 
@@ -1251,7 +1266,7 @@ static void promote(struct cache *cache, struct prealloc *structs,
        mg->new_ocell = cell;
        mg->start_jiffies = jiffies;
 
-       inc_nr_migrations(cache);
+       inc_io_migrations(cache);
        quiesce_migration(mg);
 }
 
@@ -1275,7 +1290,7 @@ static void writeback(struct cache *cache, struct prealloc *structs,
        mg->new_ocell = NULL;
        mg->start_jiffies = jiffies;
 
-       inc_nr_migrations(cache);
+       inc_io_migrations(cache);
        quiesce_migration(mg);
 }
 
@@ -1302,7 +1317,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
        mg->new_ocell = new_ocell;
        mg->start_jiffies = jiffies;
 
-       inc_nr_migrations(cache);
+       inc_io_migrations(cache);
        quiesce_migration(mg);
 }
 
@@ -1330,7 +1345,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs,
        mg->new_ocell = NULL;
        mg->start_jiffies = jiffies;
 
-       inc_nr_migrations(cache);
+       inc_io_migrations(cache);
        quiesce_migration(mg);
 }
 
@@ -1412,7 +1427,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs,
 
 static bool spare_migration_bandwidth(struct cache *cache)
 {
-       sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
+       sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
                cache->sectors_per_block;
        return current_volume < cache->migration_threshold;
 }
@@ -1764,7 +1779,7 @@ static void stop_quiescing(struct cache *cache)
 
 static void wait_for_migrations(struct cache *cache)
 {
-       wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
+       wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
 }
 
 static void stop_worker(struct cache *cache)
@@ -1876,9 +1891,6 @@ static void destroy(struct cache *cache)
 {
        unsigned i;
 
-       if (cache->next_migration)
-               mempool_free(cache->next_migration, cache->migration_pool);
-
        if (cache->migration_pool)
                mempool_destroy(cache->migration_pool);
 
@@ -2424,7 +2436,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
        INIT_LIST_HEAD(&cache->quiesced_migrations);
        INIT_LIST_HEAD(&cache->completed_migrations);
        INIT_LIST_HEAD(&cache->need_commit_migrations);
-       atomic_set(&cache->nr_migrations, 0);
+       atomic_set(&cache->nr_allocated_migrations, 0);
+       atomic_set(&cache->nr_io_migrations, 0);
        init_waitqueue_head(&cache->migration_wait);
 
        init_waitqueue_head(&cache->quiescing_wait);
@@ -2487,8 +2500,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
                goto bad;
        }
 
-       cache->next_migration = NULL;
-
        cache->need_tick_bio = true;
        cache->sized = false;
        cache->invalidate = false;
index b98cd9d84435fe15ea1cb202850508f83b83204b..2caf5b374649afaecff37a5ab5153d7ccc5ec437 100644 (file)
@@ -206,6 +206,9 @@ struct mapped_device {
        /* zero-length flush that will be cloned and submitted to targets */
        struct bio flush_bio;
 
+       /* the number of internal suspends */
+       unsigned internal_suspend_count;
+
        struct dm_stats stats;
 };
 
@@ -2928,7 +2931,7 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
 {
        struct dm_table *map = NULL;
 
-       if (dm_suspended_internally_md(md))
+       if (md->internal_suspend_count++)
                return; /* nested internal suspend */
 
        if (dm_suspended_md(md)) {
@@ -2953,7 +2956,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
 
 static void __dm_internal_resume(struct mapped_device *md)
 {
-       if (!dm_suspended_internally_md(md))
+       BUG_ON(!md->internal_suspend_count);
+
+       if (--md->internal_suspend_count)
                return; /* resume from nested internal suspend */
 
        if (dm_suspended_md(md))