Merge branch 'for-3.20/core' into for-3.20/drivers
authorJens Axboe <axboe@fb.com>
Fri, 23 Jan 2015 21:18:49 +0000 (14:18 -0700)
committerJens Axboe <axboe@fb.com>
Fri, 23 Jan 2015 21:18:49 +0000 (14:18 -0700)
We need the tagging changes for the libata conversion.

arch/powerpc/sysdev/axonram.c
drivers/block/brd.c
drivers/block/loop.c
drivers/block/loop.h
drivers/block/null_blk.c
drivers/block/nvme-core.c

index 20f8afe855d10fd0bdf2ce5797b5cf6946bf0cf2..ee90db17b0972af4ba3c085cc770786b68f51559 100644 (file)
@@ -147,7 +147,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
        loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
 
        *kaddr = (void *)(bank->ph_addr + offset);
-       *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT;
+       *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
 
        return bank->size - offset;
 }
index 89e90ec52f28f25b06421ec4a66f06fb7767751f..c01b921b1b4a8a492188dee63c9ace3665aa2498 100644 (file)
@@ -438,19 +438,18 @@ static const struct block_device_operations brd_fops = {
 /*
  * And now the modules code and kernel interface.
  */
-static int rd_nr;
-int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
-static int max_part;
-static int part_shift;
-static int part_show = 0;
+static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
 module_param(rd_nr, int, S_IRUGO);
 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
+
+int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
 module_param(rd_size, int, S_IRUGO);
 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
+
+static int max_part = 1;
 module_param(max_part, int, S_IRUGO);
-MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
-module_param(part_show, int, S_IRUGO);
-MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions");
+MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 MODULE_ALIAS("rd");
@@ -487,25 +486,33 @@ static struct brd_device *brd_alloc(int i)
        brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
        if (!brd->brd_queue)
                goto out_free_dev;
+
        blk_queue_make_request(brd->brd_queue, brd_make_request);
        blk_queue_max_hw_sectors(brd->brd_queue, 1024);
        blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
+       /* This is so fdisk will align partitions on 4k, because of
+        * direct_access API needing 4k alignment, returning a PFN
+        * (This is only a problem on very small devices <= 4M,
+        *  otherwise fdisk will align on 1M. Regardless this call
+        *  is harmless)
+        */
+       blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
+
        brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
        brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
        brd->brd_queue->limits.discard_zeroes_data = 1;
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
 
-       disk = brd->brd_disk = alloc_disk(1 << part_shift);
+       disk = brd->brd_disk = alloc_disk(max_part);
        if (!disk)
                goto out_free_queue;
        disk->major             = RAMDISK_MAJOR;
-       disk->first_minor       = i << part_shift;
+       disk->first_minor       = i * max_part;
        disk->fops              = &brd_fops;
        disk->private_data      = brd;
        disk->queue             = brd->brd_queue;
-       if (!part_show)
-               disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+       disk->flags             = GENHD_FL_EXT_DEVT;
        sprintf(disk->disk_name, "ram%d", i);
        set_capacity(disk, rd_size * 2);
 
@@ -527,10 +534,11 @@ static void brd_free(struct brd_device *brd)
        kfree(brd);
 }
 
-static struct brd_device *brd_init_one(int i)
+static struct brd_device *brd_init_one(int i, bool *new)
 {
        struct brd_device *brd;
 
+       *new = false;
        list_for_each_entry(brd, &brd_devices, brd_list) {
                if (brd->brd_number == i)
                        goto out;
@@ -541,6 +549,7 @@ static struct brd_device *brd_init_one(int i)
                add_disk(brd->brd_disk);
                list_add_tail(&brd->brd_list, &brd_devices);
        }
+       *new = true;
 out:
        return brd;
 }
@@ -556,70 +565,46 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 {
        struct brd_device *brd;
        struct kobject *kobj;
+       bool new;
 
        mutex_lock(&brd_devices_mutex);
-       brd = brd_init_one(MINOR(dev) >> part_shift);
+       brd = brd_init_one(MINOR(dev) / max_part, &new);
        kobj = brd ? get_disk(brd->brd_disk) : NULL;
        mutex_unlock(&brd_devices_mutex);
 
-       *part = 0;
+       if (new)
+               *part = 0;
+
        return kobj;
 }
 
 static int __init brd_init(void)
 {
-       int i, nr;
-       unsigned long range;
        struct brd_device *brd, *next;
+       int i;
 
        /*
         * brd module now has a feature to instantiate underlying device
         * structure on-demand, provided that there is an access dev node.
-        * However, this will not work well with user space tool that doesn't
-        * know about such "feature".  In order to not break any existing
-        * tool, we do the following:
         *
-        * (1) if rd_nr is specified, create that many upfront, and this
-        *     also becomes a hard limit.
-        * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT
-        *     (default 16) rd device on module load, user can further
-        *     extend brd device by create dev node themselves and have
-        *     kernel automatically instantiate actual device on-demand.
+        * (1) if rd_nr is specified, create that many upfront. else
+        *     it defaults to CONFIG_BLK_DEV_RAM_COUNT
+        * (2) User can further extend brd devices by create dev node themselves
+        *     and have kernel automatically instantiate actual device
+        *     on-demand. Example:
+        *              mknod /path/devnod_name b 1 X   # 1 is the rd major
+        *              fdisk -l /path/devnod_name
+        *      If (X / max_part) was not already created it will be created
+        *      dynamically.
         */
 
-       part_shift = 0;
-       if (max_part > 0) {
-               part_shift = fls(max_part);
-
-               /*
-                * Adjust max_part according to part_shift as it is exported
-                * to user space so that user can decide correct minor number
-                * if [s]he want to create more devices.
-                *
-                * Note that -1 is required because partition 0 is reserved
-                * for the whole disk.
-                */
-               max_part = (1UL << part_shift) - 1;
-       }
-
-       if ((1UL << part_shift) > DISK_MAX_PARTS)
-               return -EINVAL;
-
-       if (rd_nr > 1UL << (MINORBITS - part_shift))
-               return -EINVAL;
-
-       if (rd_nr) {
-               nr = rd_nr;
-               range = rd_nr << part_shift;
-       } else {
-               nr = CONFIG_BLK_DEV_RAM_COUNT;
-               range = 1UL << MINORBITS;
-       }
-
        if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
                return -EIO;
 
-       for (i = 0; i < nr; i++) {
+       if (unlikely(!max_part))
+               max_part = 1;
+
+       for (i = 0; i < rd_nr; i++) {
                brd = brd_alloc(i);
                if (!brd)
                        goto out_free;
@@ -631,10 +616,10 @@ static int __init brd_init(void)
        list_for_each_entry(brd, &brd_devices, brd_list)
                add_disk(brd->brd_disk);
 
-       blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range,
+       blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
                                  THIS_MODULE, brd_probe, NULL, NULL);
 
-       printk(KERN_INFO "brd: module loaded\n");
+       pr_info("brd: module loaded\n");
        return 0;
 
 out_free:
@@ -644,21 +629,21 @@ out_free:
        }
        unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
 
+       pr_info("brd: module NOT loaded !!!\n");
        return -ENOMEM;
 }
 
 static void __exit brd_exit(void)
 {
-       unsigned long range;
        struct brd_device *brd, *next;
 
-       range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS;
-
        list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
                brd_del_one(brd);
 
-       blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range);
+       blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS);
        unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
+
+       pr_info("brd: module unloaded\n");
 }
 
 module_init(brd_init);
index 6cb1beb47c25d1d2a7db113ca9f173a9ef8b68d3..d1f168b73634321dadb6c57571c0cd8a9de8304b 100644 (file)
@@ -85,6 +85,8 @@ static DEFINE_MUTEX(loop_index_mutex);
 static int max_part;
 static int part_shift;
 
+static struct workqueue_struct *loop_wq;
+
 /*
  * Transfer functions
  */
@@ -284,12 +286,12 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
        return ret;
 }
 
-static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
+static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos)
 {
        int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
                        struct page *page);
        struct bio_vec bvec;
-       struct bvec_iter iter;
+       struct req_iterator iter;
        struct page *page = NULL;
        int ret = 0;
 
@@ -303,7 +305,7 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
                do_lo_send = do_lo_send_direct_write;
        }
 
-       bio_for_each_segment(bvec, bio, iter) {
+       rq_for_each_segment(bvec, rq, iter) {
                ret = do_lo_send(lo, &bvec, pos, page);
                if (ret < 0)
                        break;
@@ -391,19 +393,22 @@ do_lo_receive(struct loop_device *lo,
 }
 
 static int
-lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
+lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos)
 {
        struct bio_vec bvec;
-       struct bvec_iter iter;
+       struct req_iterator iter;
        ssize_t s;
 
-       bio_for_each_segment(bvec, bio, iter) {
+       rq_for_each_segment(bvec, rq, iter) {
                s = do_lo_receive(lo, &bvec, bsize, pos);
                if (s < 0)
                        return s;
 
                if (s != bvec.bv_len) {
-                       zero_fill_bio(bio);
+                       struct bio *bio;
+
+                       __rq_for_each_bio(bio, rq)
+                               zero_fill_bio(bio);
                        break;
                }
                pos += bvec.bv_len;
@@ -411,106 +416,58 @@ lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
        return 0;
 }
 
-static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
+static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
 {
-       loff_t pos;
+       /*
+        * We use punch hole to reclaim the free space used by the
+        * image a.k.a. discard. However we do not support discard if
+        * encryption is enabled, because it may give an attacker
+        * useful information.
+        */
+       struct file *file = lo->lo_backing_file;
+       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
        int ret;
 
-       pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset;
-
-       if (bio_rw(bio) == WRITE) {
-               struct file *file = lo->lo_backing_file;
-
-               if (bio->bi_rw & REQ_FLUSH) {
-                       ret = vfs_fsync(file, 0);
-                       if (unlikely(ret && ret != -EINVAL)) {
-                               ret = -EIO;
-                               goto out;
-                       }
-               }
-
-               /*
-                * We use punch hole to reclaim the free space used by the
-                * image a.k.a. discard. However we do not support discard if
-                * encryption is enabled, because it may give an attacker
-                * useful information.
-                */
-               if (bio->bi_rw & REQ_DISCARD) {
-                       struct file *file = lo->lo_backing_file;
-                       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
-
-                       if ((!file->f_op->fallocate) ||
-                           lo->lo_encrypt_key_size) {
-                               ret = -EOPNOTSUPP;
-                               goto out;
-                       }
-                       ret = file->f_op->fallocate(file, mode, pos,
-                                                   bio->bi_iter.bi_size);
-                       if (unlikely(ret && ret != -EINVAL &&
-                                    ret != -EOPNOTSUPP))
-                               ret = -EIO;
-                       goto out;
-               }
-
-               ret = lo_send(lo, bio, pos);
-
-               if ((bio->bi_rw & REQ_FUA) && !ret) {
-                       ret = vfs_fsync(file, 0);
-                       if (unlikely(ret && ret != -EINVAL))
-                               ret = -EIO;
-               }
-       } else
-               ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
+       if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
 
-out:
+       ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
+       if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
+               ret = -EIO;
+ out:
        return ret;
 }
 
-/*
- * Add bio to back of pending list
- */
-static void loop_add_bio(struct loop_device *lo, struct bio *bio)
+static int lo_req_flush(struct loop_device *lo, struct request *rq)
 {
-       lo->lo_bio_count++;
-       bio_list_add(&lo->lo_bio_list, bio);
-}
+       struct file *file = lo->lo_backing_file;
+       int ret = vfs_fsync(file, 0);
+       if (unlikely(ret && ret != -EINVAL))
+               ret = -EIO;
 
-/*
- * Grab first pending buffer
- */
-static struct bio *loop_get_bio(struct loop_device *lo)
-{
-       lo->lo_bio_count--;
-       return bio_list_pop(&lo->lo_bio_list);
+       return ret;
 }
 
-static void loop_make_request(struct request_queue *q, struct bio *old_bio)
+static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 {
-       struct loop_device *lo = q->queuedata;
-       int rw = bio_rw(old_bio);
-
-       if (rw == READA)
-               rw = READ;
+       loff_t pos;
+       int ret;
 
-       BUG_ON(!lo || (rw != READ && rw != WRITE));
+       pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
 
-       spin_lock_irq(&lo->lo_lock);
-       if (lo->lo_state != Lo_bound)
-               goto out;
-       if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
-               goto out;
-       if (lo->lo_bio_count >= q->nr_congestion_on)
-               wait_event_lock_irq(lo->lo_req_wait,
-                                   lo->lo_bio_count < q->nr_congestion_off,
-                                   lo->lo_lock);
-       loop_add_bio(lo, old_bio);
-       wake_up(&lo->lo_event);
-       spin_unlock_irq(&lo->lo_lock);
-       return;
+       if (rq->cmd_flags & REQ_WRITE) {
+               if (rq->cmd_flags & REQ_FLUSH)
+                       ret = lo_req_flush(lo, rq);
+               else if (rq->cmd_flags & REQ_DISCARD)
+                       ret = lo_discard(lo, rq, pos);
+               else
+                       ret = lo_send(lo, rq, pos);
+       } else
+               ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
 
-out:
-       spin_unlock_irq(&lo->lo_lock);
-       bio_io_error(old_bio);
+       return ret;
 }
 
 struct switch_request {
@@ -518,57 +475,26 @@ struct switch_request {
        struct completion wait;
 };
 
-static void do_loop_switch(struct loop_device *, struct switch_request *);
-
-static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
-{
-       if (unlikely(!bio->bi_bdev)) {
-               do_loop_switch(lo, bio->bi_private);
-               bio_put(bio);
-       } else {
-               int ret = do_bio_filebacked(lo, bio);
-               bio_endio(bio, ret);
-       }
-}
-
 /*
- * worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn. it also does loop decrypting
- * on reads for block backed loop, as that is too heavy to do from
- * b_end_io context where irqs may be disabled.
- *
- * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
- * calling kthread_stop().  Therefore once kthread_should_stop() is
- * true, make_request will not place any more requests.  Therefore
- * once kthread_should_stop() is true and lo_bio is NULL, we are
- * done with the loop.
+ * Do the actual switch; called from the BIO completion routine
  */
-static int loop_thread(void *data)
+static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
 {
-       struct loop_device *lo = data;
-       struct bio *bio;
-
-       set_user_nice(current, MIN_NICE);
-
-       while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
-
-               wait_event_interruptible(lo->lo_event,
-                               !bio_list_empty(&lo->lo_bio_list) ||
-                               kthread_should_stop());
-
-               if (bio_list_empty(&lo->lo_bio_list))
-                       continue;
-               spin_lock_irq(&lo->lo_lock);
-               bio = loop_get_bio(lo);
-               if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
-                       wake_up(&lo->lo_req_wait);
-               spin_unlock_irq(&lo->lo_lock);
+       struct file *file = p->file;
+       struct file *old_file = lo->lo_backing_file;
+       struct address_space *mapping;
 
-               BUG_ON(!bio);
-               loop_handle_bio(lo, bio);
-       }
+       /* if no new file, only flush of queued bios requested */
+       if (!file)
+               return;
 
-       return 0;
+       mapping = file->f_mapping;
+       mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
+       lo->lo_backing_file = file;
+       lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
+               mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
+       lo->old_gfp_mask = mapping_gfp_mask(mapping);
+       mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 }
 
 /*
@@ -579,15 +505,18 @@ static int loop_thread(void *data)
 static int loop_switch(struct loop_device *lo, struct file *file)
 {
        struct switch_request w;
-       struct bio *bio = bio_alloc(GFP_KERNEL, 0);
-       if (!bio)
-               return -ENOMEM;
-       init_completion(&w.wait);
+
        w.file = file;
-       bio->bi_private = &w;
-       bio->bi_bdev = NULL;
-       loop_make_request(lo->lo_queue, bio);
-       wait_for_completion(&w.wait);
+
+       /* freeze queue and wait for completion of scheduled requests */
+       blk_mq_freeze_queue(lo->lo_queue);
+
+       /* do the switch action */
+       do_loop_switch(lo, &w);
+
+       /* unfreeze */
+       blk_mq_unfreeze_queue(lo->lo_queue);
+
        return 0;
 }
 
@@ -596,38 +525,9 @@ static int loop_switch(struct loop_device *lo, struct file *file)
  */
 static int loop_flush(struct loop_device *lo)
 {
-       /* loop not yet configured, no running thread, nothing to flush */
-       if (!lo->lo_thread)
-               return 0;
-
        return loop_switch(lo, NULL);
 }
 
-/*
- * Do the actual switch; called from the BIO completion routine
- */
-static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
-{
-       struct file *file = p->file;
-       struct file *old_file = lo->lo_backing_file;
-       struct address_space *mapping;
-
-       /* if no new file, only flush of queued bios requested */
-       if (!file)
-               goto out;
-
-       mapping = file->f_mapping;
-       mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
-       lo->lo_backing_file = file;
-       lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
-               mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
-       lo->old_gfp_mask = mapping_gfp_mask(mapping);
-       mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-out:
-       complete(&p->wait);
-}
-
-
 /*
  * loop_change_fd switched the backing store of a loopback device to
  * a new file. This is useful for operating system installers to free up
@@ -889,12 +789,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        lo->transfer = transfer_none;
        lo->ioctl = NULL;
        lo->lo_sizelimit = 0;
-       lo->lo_bio_count = 0;
        lo->old_gfp_mask = mapping_gfp_mask(mapping);
        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-       bio_list_init(&lo->lo_bio_list);
-
        if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
                blk_queue_flush(lo->lo_queue, REQ_FLUSH);
 
@@ -906,14 +803,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
        set_blocksize(bdev, lo_blocksize);
 
-       lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
-                                               lo->lo_number);
-       if (IS_ERR(lo->lo_thread)) {
-               error = PTR_ERR(lo->lo_thread);
-               goto out_clr;
-       }
        lo->lo_state = Lo_bound;
-       wake_up_process(lo->lo_thread);
        if (part_shift)
                lo->lo_flags |= LO_FLAGS_PARTSCAN;
        if (lo->lo_flags & LO_FLAGS_PARTSCAN)
@@ -925,18 +815,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        bdgrab(bdev);
        return 0;
 
-out_clr:
-       loop_sysfs_exit(lo);
-       lo->lo_thread = NULL;
-       lo->lo_device = NULL;
-       lo->lo_backing_file = NULL;
-       lo->lo_flags = 0;
-       set_capacity(lo->lo_disk, 0);
-       invalidate_bdev(bdev);
-       bd_set_size(bdev, 0);
-       kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
-       mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
-       lo->lo_state = Lo_unbound;
  out_putf:
        fput(file);
  out:
@@ -1012,11 +890,6 @@ static int loop_clr_fd(struct loop_device *lo)
 
        spin_lock_irq(&lo->lo_lock);
        lo->lo_state = Lo_rundown;
-       spin_unlock_irq(&lo->lo_lock);
-
-       kthread_stop(lo->lo_thread);
-
-       spin_lock_irq(&lo->lo_lock);
        lo->lo_backing_file = NULL;
        spin_unlock_irq(&lo->lo_lock);
 
@@ -1028,7 +901,6 @@ static int loop_clr_fd(struct loop_device *lo)
        lo->lo_offset = 0;
        lo->lo_sizelimit = 0;
        lo->lo_encrypt_key_size = 0;
-       lo->lo_thread = NULL;
        memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
        memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
        memset(lo->lo_file_name, 0, LO_NAME_SIZE);
@@ -1601,6 +1473,105 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
+static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+               const struct blk_mq_queue_data *bd)
+{
+       struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+
+       blk_mq_start_request(bd->rq);
+
+       if (cmd->rq->cmd_flags & REQ_WRITE) {
+               struct loop_device *lo = cmd->rq->q->queuedata;
+               bool need_sched = true;
+
+               spin_lock_irq(&lo->lo_lock);
+               if (lo->write_started)
+                       need_sched = false;
+               else
+                       lo->write_started = true;
+               list_add_tail(&cmd->list, &lo->write_cmd_head);
+               spin_unlock_irq(&lo->lo_lock);
+
+               if (need_sched)
+                       queue_work(loop_wq, &lo->write_work);
+       } else {
+               queue_work(loop_wq, &cmd->read_work);
+       }
+
+       return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static void loop_handle_cmd(struct loop_cmd *cmd)
+{
+       const bool write = cmd->rq->cmd_flags & REQ_WRITE;
+       struct loop_device *lo = cmd->rq->q->queuedata;
+       int ret = -EIO;
+
+       if (lo->lo_state != Lo_bound)
+               goto failed;
+
+       if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
+               goto failed;
+
+       ret = do_req_filebacked(lo, cmd->rq);
+
+ failed:
+       if (ret)
+               cmd->rq->errors = -EIO;
+       blk_mq_complete_request(cmd->rq);
+}
+
+static void loop_queue_write_work(struct work_struct *work)
+{
+       struct loop_device *lo =
+               container_of(work, struct loop_device, write_work);
+       LIST_HEAD(cmd_list);
+
+       spin_lock_irq(&lo->lo_lock);
+ repeat:
+       list_splice_init(&lo->write_cmd_head, &cmd_list);
+       spin_unlock_irq(&lo->lo_lock);
+
+       while (!list_empty(&cmd_list)) {
+               struct loop_cmd *cmd = list_first_entry(&cmd_list,
+                               struct loop_cmd, list);
+               list_del_init(&cmd->list);
+               loop_handle_cmd(cmd);
+       }
+
+       spin_lock_irq(&lo->lo_lock);
+       if (!list_empty(&lo->write_cmd_head))
+               goto repeat;
+       lo->write_started = false;
+       spin_unlock_irq(&lo->lo_lock);
+}
+
+static void loop_queue_read_work(struct work_struct *work)
+{
+       struct loop_cmd *cmd =
+               container_of(work, struct loop_cmd, read_work);
+
+       loop_handle_cmd(cmd);
+}
+
+static int loop_init_request(void *data, struct request *rq,
+               unsigned int hctx_idx, unsigned int request_idx,
+               unsigned int numa_node)
+{
+       struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
+       cmd->rq = rq;
+       INIT_WORK(&cmd->read_work, loop_queue_read_work);
+
+       return 0;
+}
+
+static struct blk_mq_ops loop_mq_ops = {
+       .queue_rq       = loop_queue_rq,
+       .map_queue      = blk_mq_map_queue,
+       .init_request   = loop_init_request,
+};
+
 static int loop_add(struct loop_device **l, int i)
 {
        struct loop_device *lo;
@@ -1627,16 +1598,28 @@ static int loop_add(struct loop_device **l, int i)
        i = err;
 
        err = -ENOMEM;
-       lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-       if (!lo->lo_queue)
+       lo->tag_set.ops = &loop_mq_ops;
+       lo->tag_set.nr_hw_queues = 1;
+       lo->tag_set.queue_depth = 128;
+       lo->tag_set.numa_node = NUMA_NO_NODE;
+       lo->tag_set.cmd_size = sizeof(struct loop_cmd);
+       lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+       lo->tag_set.driver_data = lo;
+
+       err = blk_mq_alloc_tag_set(&lo->tag_set);
+       if (err)
                goto out_free_idr;
 
-       /*
-        * set queue make_request_fn
-        */
-       blk_queue_make_request(lo->lo_queue, loop_make_request);
+       lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
+       if (IS_ERR_OR_NULL(lo->lo_queue)) {
+               err = PTR_ERR(lo->lo_queue);
+               goto out_cleanup_tags;
+       }
        lo->lo_queue->queuedata = lo;
 
+       INIT_LIST_HEAD(&lo->write_cmd_head);
+       INIT_WORK(&lo->write_work, loop_queue_write_work);
+
        disk = lo->lo_disk = alloc_disk(1 << part_shift);
        if (!disk)
                goto out_free_queue;
@@ -1664,9 +1647,6 @@ static int loop_add(struct loop_device **l, int i)
        disk->flags |= GENHD_FL_EXT_DEVT;
        mutex_init(&lo->lo_ctl_mutex);
        lo->lo_number           = i;
-       lo->lo_thread           = NULL;
-       init_waitqueue_head(&lo->lo_event);
-       init_waitqueue_head(&lo->lo_req_wait);
        spin_lock_init(&lo->lo_lock);
        disk->major             = LOOP_MAJOR;
        disk->first_minor       = i << part_shift;
@@ -1680,6 +1660,8 @@ static int loop_add(struct loop_device **l, int i)
 
 out_free_queue:
        blk_cleanup_queue(lo->lo_queue);
+out_cleanup_tags:
+       blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
        idr_remove(&loop_index_idr, i);
 out_free_dev:
@@ -1692,6 +1674,7 @@ static void loop_remove(struct loop_device *lo)
 {
        del_gendisk(lo->lo_disk);
        blk_cleanup_queue(lo->lo_queue);
+       blk_mq_free_tag_set(&lo->tag_set);
        put_disk(lo->lo_disk);
        kfree(lo);
 }
@@ -1875,6 +1858,13 @@ static int __init loop_init(void)
                goto misc_out;
        }
 
+       loop_wq = alloc_workqueue("kloopd",
+                       WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
+       if (!loop_wq) {
+               err = -ENOMEM;
+               goto misc_out;
+       }
+
        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                  THIS_MODULE, loop_probe, NULL, NULL);
 
@@ -1912,6 +1902,8 @@ static void __exit loop_exit(void)
        blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
        unregister_blkdev(LOOP_MAJOR, "loop");
 
+       destroy_workqueue(loop_wq);
+
        misc_deregister(&loop_misc);
 }
 
index 90df5d6485b696bf897dbdf204fa4fea3c414b63..301c27f8323ffd9d53c563ed6ee7785a38b26eaa 100644 (file)
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/workqueue.h>
 #include <uapi/linux/loop.h>
 
 /* Possible states of device */
@@ -52,19 +54,23 @@ struct loop_device {
        gfp_t           old_gfp_mask;
 
        spinlock_t              lo_lock;
-       struct bio_list         lo_bio_list;
-       unsigned int            lo_bio_count;
+       struct list_head        write_cmd_head;
+       struct work_struct      write_work;
+       bool                    write_started;
        int                     lo_state;
        struct mutex            lo_ctl_mutex;
-       struct task_struct      *lo_thread;
-       wait_queue_head_t       lo_event;
-       /* wait queue for incoming requests */
-       wait_queue_head_t       lo_req_wait;
 
        struct request_queue    *lo_queue;
+       struct blk_mq_tag_set   tag_set;
        struct gendisk          *lo_disk;
 };
 
+struct loop_cmd {
+       struct work_struct read_work;
+       struct request *rq;
+       struct list_head list;
+};
+
 /* Support for loadable transfer modules */
 struct loop_func_table {
        int number;     /* filter type */ 
index aa2224aa7caa34d5854aebfb7ceaf4cebd29eccc..65cd61a4145ed2049944621c50b374cf742041ca 100644 (file)
@@ -579,7 +579,7 @@ static int null_add_dev(void)
        sector_div(size, bs);
        set_capacity(disk, size);
 
-       disk->flags |= GENHD_FL_EXT_DEVT;
+       disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
        disk->major             = null_major;
        disk->first_minor       = nullb->index;
        disk->fops              = &null_fops;
index f7d083bb3bd5c0e437e6ce8cb6f640b2c65b7dda..f4aa6416083889dfdac0d60c5d07a752deff7128 100644 (file)
@@ -1108,21 +1108,14 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
 
 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
-       LLIST_HEAD(q_list);
-       struct nvme_queue *nvmeq, *next;
-       struct llist_node *entry;
        int i;
 
        for (i = dev->queue_count - 1; i >= lowest; i--) {
                struct nvme_queue *nvmeq = dev->queues[i];
-               llist_add(&nvmeq->node, &q_list);
                dev->queue_count--;
                dev->queues[i] = NULL;
-       }
-       synchronize_rcu();
-       entry = llist_del_all(&q_list);
-       llist_for_each_entry_safe(nvmeq, next, entry, node)
                nvme_free_queue(nvmeq);
+       }
 }
 
 /**