block: loop: don't hold lo_ctl_mutex in lo_open
[linux-2.6-block.git] / drivers / block / loop.c
index c4fd1e45ce1e82a8f303aacda953539dba99b3be..b3e294e529ecc11ac8b982f9859e655eb0bc0783 100644 (file)
@@ -86,30 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex);
 static int max_part;
 static int part_shift;
 
-static struct workqueue_struct *loop_wq;
-
-/*
- * Transfer functions
- */
-static int transfer_none(struct loop_device *lo, int cmd,
-                        struct page *raw_page, unsigned raw_off,
-                        struct page *loop_page, unsigned loop_off,
-                        int size, sector_t real_block)
-{
-       char *raw_buf = kmap_atomic(raw_page) + raw_off;
-       char *loop_buf = kmap_atomic(loop_page) + loop_off;
-
-       if (cmd == READ)
-               memcpy(loop_buf, raw_buf, size);
-       else
-               memcpy(raw_buf, loop_buf, size);
-
-       kunmap_atomic(loop_buf);
-       kunmap_atomic(raw_buf);
-       cond_resched();
-       return 0;
-}
-
 static int transfer_xor(struct loop_device *lo, int cmd,
                        struct page *raw_page, unsigned raw_off,
                        struct page *loop_page, unsigned loop_off,
@@ -148,14 +124,13 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 
 static struct loop_func_table none_funcs = {
        .number = LO_CRYPT_NONE,
-       .transfer = transfer_none,
-};     
+}; 
 
 static struct loop_func_table xor_funcs = {
        .number = LO_CRYPT_XOR,
        .transfer = transfer_xor,
        .init = xor_init
-};     
+}; 
 
 /* xfer_funcs[0] is special - its release function is never called */
 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
@@ -215,207 +190,169 @@ lo_do_transfer(struct loop_device *lo, int cmd,
               struct page *lpage, unsigned loffs,
               int size, sector_t rblock)
 {
-       if (unlikely(!lo->transfer))
+       int ret;
+
+       ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+       if (likely(!ret))
                return 0;
 
-       return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+       printk_ratelimited(KERN_ERR
+               "loop: Transfer error at byte offset %llu, length %i.\n",
+               (unsigned long long)rblock << 9, size);
+       return ret;
 }
 
-/**
- * __do_lo_send_write - helper for writing data to a loop device
- *
- * This helper just factors out common code between do_lo_send_direct_write()
- * and do_lo_send_write().
- */
-static int __do_lo_send_write(struct file *file,
-               u8 *buf, const int len, loff_t pos)
+static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 {
-       struct kvec kvec = {.iov_base = buf, .iov_len = len};
-       struct iov_iter from;
+       struct iov_iter i;
        ssize_t bw;
 
-       iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len);
+       iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);
 
        file_start_write(file);
-       bw = vfs_iter_write(file, &from, &pos);
+       bw = vfs_iter_write(file, &i, ppos);
        file_end_write(file);
-       if (likely(bw == len))
+
+       if (likely(bw ==  bvec->bv_len))
                return 0;
-       printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
-                       (unsigned long long)pos, len);
+
+       printk_ratelimited(KERN_ERR
+               "loop: Write error at byte offset %llu, length %i.\n",
+               (unsigned long long)*ppos, bvec->bv_len);
        if (bw >= 0)
                bw = -EIO;
        return bw;
 }
 
-/**
- * do_lo_send_direct_write - helper for writing data to a loop device
- *
- * This is the fast, non-transforming version that does not need double
- * buffering.
- */
-static int do_lo_send_direct_write(struct loop_device *lo,
-               struct bio_vec *bvec, loff_t pos, struct page *page)
+static int lo_write_simple(struct loop_device *lo, struct request *rq,
+               loff_t pos)
 {
-       ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
-                       kmap(bvec->bv_page) + bvec->bv_offset,
-                       bvec->bv_len, pos);
-       kunmap(bvec->bv_page);
-       cond_resched();
-       return bw;
+       struct bio_vec bvec;
+       struct req_iterator iter;
+       int ret = 0;
+
+       rq_for_each_segment(bvec, rq, iter) {
+               ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
+               if (ret < 0)
+                       break;
+               cond_resched();
+       }
+
+       return ret;
 }
 
-/**
- * do_lo_send_write - helper for writing data to a loop device
- *
+/*
  * This is the slow, transforming version that needs to double buffer the
  * data as it cannot do the transformations in place without having direct
  * access to the destination pages of the backing file.
  */
-static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
-               loff_t pos, struct page *page)
-{
-       int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
-                       bvec->bv_offset, bvec->bv_len, pos >> 9);
-       if (likely(!ret))
-               return __do_lo_send_write(lo->lo_backing_file,
-                               page_address(page), bvec->bv_len,
-                               pos);
-       printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
-                       "length %i.\n", (unsigned long long)pos, bvec->bv_len);
-       if (ret > 0)
-               ret = -EIO;
-       return ret;
-}
-
-static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos)
+static int lo_write_transfer(struct loop_device *lo, struct request *rq,
+               loff_t pos)
 {
-       int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
-                       struct page *page);
-       struct bio_vec bvec;
+       struct bio_vec bvec, b;
        struct req_iterator iter;
-       struct page *page = NULL;
+       struct page *page;
        int ret = 0;
 
-       if (lo->transfer != transfer_none) {
-               page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
-               if (unlikely(!page))
-                       goto fail;
-               kmap(page);
-               do_lo_send = do_lo_send_write;
-       } else {
-               do_lo_send = do_lo_send_direct_write;
-       }
+       page = alloc_page(GFP_NOIO);
+       if (unlikely(!page))
+               return -ENOMEM;
 
        rq_for_each_segment(bvec, rq, iter) {
-               ret = do_lo_send(lo, &bvec, pos, page);
+               ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page,
+                       bvec.bv_offset, bvec.bv_len, pos >> 9);
+               if (unlikely(ret))
+                       break;
+
+               b.bv_page = page;
+               b.bv_offset = 0;
+               b.bv_len = bvec.bv_len;
+               ret = lo_write_bvec(lo->lo_backing_file, &b, &pos);
                if (ret < 0)
                        break;
-               pos += bvec.bv_len;
-       }
-       if (page) {
-               kunmap(page);
-               __free_page(page);
        }
-out:
+
+       __free_page(page);
        return ret;
-fail:
-       printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
-       ret = -ENOMEM;
-       goto out;
 }
 
-struct lo_read_data {
-       struct loop_device *lo;
-       struct page *page;
-       unsigned offset;
-       int bsize;
-};
+static int lo_read_simple(struct loop_device *lo, struct request *rq,
+               loff_t pos)
+{
+       struct bio_vec bvec;
+       struct req_iterator iter;
+       struct iov_iter i;
+       ssize_t len;
 
-static int
-lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-               struct splice_desc *sd)
-{
-       struct lo_read_data *p = sd->u.data;
-       struct loop_device *lo = p->lo;
-       struct page *page = buf->page;
-       sector_t IV;
-       int size;
-
-       IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
-                                                       (buf->offset >> 9);
-       size = sd->len;
-       if (size > p->bsize)
-               size = p->bsize;
-
-       if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
-               printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
-                      page->index);
-               size = -EINVAL;
-       }
+       rq_for_each_segment(bvec, rq, iter) {
+               iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+               len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+               if (len < 0)
+                       return len;
 
-       flush_dcache_page(p->page);
+               flush_dcache_page(bvec.bv_page);
 
-       if (size > 0)
-               p->offset += size;
+               if (len != bvec.bv_len) {
+                       struct bio *bio;
 
-       return size;
-}
+                       __rq_for_each_bio(bio, rq)
+                               zero_fill_bio(bio);
+                       break;
+               }
+               cond_resched();
+       }
 
-static int
-lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
-{
-       return __splice_from_pipe(pipe, sd, lo_splice_actor);
+       return 0;
 }
 
-static ssize_t
-do_lo_receive(struct loop_device *lo,
-             struct bio_vec *bvec, int bsize, loff_t pos)
+static int lo_read_transfer(struct loop_device *lo, struct request *rq,
+               loff_t pos)
 {
-       struct lo_read_data cookie;
-       struct splice_desc sd;
-       struct file *file;
-       ssize_t retval;
+       struct bio_vec bvec, b;
+       struct req_iterator iter;
+       struct iov_iter i;
+       struct page *page;
+       ssize_t len;
+       int ret = 0;
 
-       cookie.lo = lo;
-       cookie.page = bvec->bv_page;
-       cookie.offset = bvec->bv_offset;
-       cookie.bsize = bsize;
+       page = alloc_page(GFP_NOIO);
+       if (unlikely(!page))
+               return -ENOMEM;
 
-       sd.len = 0;
-       sd.total_len = bvec->bv_len;
-       sd.flags = 0;
-       sd.pos = pos;
-       sd.u.data = &cookie;
+       rq_for_each_segment(bvec, rq, iter) {
+               loff_t offset = pos;
 
-       file = lo->lo_backing_file;
-       retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+               b.bv_page = page;
+               b.bv_offset = 0;
+               b.bv_len = bvec.bv_len;
 
-       return retval;
-}
+               iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+               len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
+               if (len < 0) {
+                       ret = len;
+                       goto out_free_page;
+               }
 
-static int
-lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos)
-{
-       struct bio_vec bvec;
-       struct req_iterator iter;
-       ssize_t s;
+               ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page,
+                       bvec.bv_offset, len, offset >> 9);
+               if (ret)
+                       goto out_free_page;
 
-       rq_for_each_segment(bvec, rq, iter) {
-               s = do_lo_receive(lo, &bvec, bsize, pos);
-               if (s < 0)
-                       return s;
+               flush_dcache_page(bvec.bv_page);
 
-               if (s != bvec.bv_len) {
+               if (len != bvec.bv_len) {
                        struct bio *bio;
 
                        __rq_for_each_bio(bio, rq)
                                zero_fill_bio(bio);
                        break;
                }
-               pos += bvec.bv_len;
        }
-       return 0;
+
+       ret = 0;
+out_free_page:
+       __free_page(page);
+       return ret;
 }
 
 static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
@@ -464,10 +401,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
                        ret = lo_req_flush(lo, rq);
                else if (rq->cmd_flags & REQ_DISCARD)
                        ret = lo_discard(lo, rq, pos);
+               else if (lo->transfer)
+                       ret = lo_write_transfer(lo, rq, pos);
                else
-                       ret = lo_send(lo, rq, pos);
-       } else
-               ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
+                       ret = lo_write_simple(lo, rq, pos);
+
+       } else {
+               if (lo->transfer)
+                       ret = lo_read_transfer(lo, rq, pos);
+               else
+                       ret = lo_read_simple(lo, rq, pos);
+       }
 
        return ret;
 }
@@ -779,6 +723,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        size = get_loop_size(lo, file);
        if ((loff_t)(sector_t)size != size)
                goto out_putf;
+       error = -ENOMEM;
+       lo->wq = alloc_workqueue("kloopd%d",
+                       WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
+                       lo->lo_number);
+       if (!lo->wq)
+               goto out_putf;
 
        error = 0;
 
@@ -788,7 +738,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        lo->lo_device = bdev;
        lo->lo_flags = lo_flags;
        lo->lo_backing_file = file;
-       lo->transfer = transfer_none;
+       lo->transfer = NULL;
        lo->ioctl = NULL;
        lo->lo_sizelimit = 0;
        lo->old_gfp_mask = mapping_gfp_mask(mapping);
@@ -881,7 +831,7 @@ static int loop_clr_fd(struct loop_device *lo)
         * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
         * command to fail with EBUSY.
         */
-       if (lo->lo_refcnt > 1) {
+       if (atomic_read(&lo->lo_refcnt) > 1) {
                lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
                mutex_unlock(&lo->lo_ctl_mutex);
                return 0;
@@ -890,6 +840,9 @@ static int loop_clr_fd(struct loop_device *lo)
        if (filp == NULL)
                return -EINVAL;
 
+       /* freeze request queue during the transition */
+       blk_mq_freeze_queue(lo->lo_queue);
+
        spin_lock_irq(&lo->lo_lock);
        lo->lo_state = Lo_rundown;
        lo->lo_backing_file = NULL;
@@ -921,11 +874,15 @@ static int loop_clr_fd(struct loop_device *lo)
        lo->lo_state = Lo_unbound;
        /* This is safe: open() is still holding a reference. */
        module_put(THIS_MODULE);
+       blk_mq_unfreeze_queue(lo->lo_queue);
+
        if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
                ioctl_by_bdev(bdev, BLKRRPART, 0);
        lo->lo_flags = 0;
        if (!part_shift)
                lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+       destroy_workqueue(lo->wq);
+       lo->wq = NULL;
        mutex_unlock(&lo->lo_ctl_mutex);
        /*
         * Need not hold lo_ctl_mutex to fput backing file.
@@ -1007,7 +964,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
                memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
                       info->lo_encrypt_key_size);
                lo->lo_key_owner = uid;
-       }       
+       }
 
        return 0;
 }
@@ -1378,9 +1335,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
                goto out;
        }
 
-       mutex_lock(&lo->lo_ctl_mutex);
-       lo->lo_refcnt++;
-       mutex_unlock(&lo->lo_ctl_mutex);
+       atomic_inc(&lo->lo_refcnt);
 out:
        mutex_unlock(&loop_index_mutex);
        return err;
@@ -1391,11 +1346,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
        struct loop_device *lo = disk->private_data;
        int err;
 
-       mutex_lock(&lo->lo_ctl_mutex);
-
-       if (--lo->lo_refcnt)
-               goto out;
+       if (atomic_dec_return(&lo->lo_refcnt))
+               return;
 
+       mutex_lock(&lo->lo_ctl_mutex);
        if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
                /*
                 * In autoclear mode, stop the loop thread
@@ -1479,9 +1433,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+       struct loop_device *lo = cmd->rq->q->queuedata;
 
        blk_mq_start_request(bd->rq);
 
+       if (lo->lo_state != Lo_bound)
+               return -EIO;
+
        if (cmd->rq->cmd_flags & REQ_WRITE) {
                struct loop_device *lo = cmd->rq->q->queuedata;
                bool need_sched = true;
@@ -1495,9 +1453,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                spin_unlock_irq(&lo->lo_lock);
 
                if (need_sched)
-                       queue_work(loop_wq, &lo->write_work);
+                       queue_work(lo->wq, &lo->write_work);
        } else {
-               queue_work(loop_wq, &cmd->read_work);
+               queue_work(lo->wq, &cmd->read_work);
        }
 
        return BLK_MQ_RQ_QUEUE_OK;
@@ -1509,9 +1467,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
        struct loop_device *lo = cmd->rq->q->queuedata;
        int ret = -EIO;
 
-       if (lo->lo_state != Lo_bound)
-               goto failed;
-
        if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
                goto failed;
 
@@ -1648,6 +1603,7 @@ static int loop_add(struct loop_device **l, int i)
                disk->flags |= GENHD_FL_NO_PART_SCAN;
        disk->flags |= GENHD_FL_EXT_DEVT;
        mutex_init(&lo->lo_ctl_mutex);
+       atomic_set(&lo->lo_refcnt, 0);
        lo->lo_number           = i;
        spin_lock_init(&lo->lo_lock);
        disk->major             = LOOP_MAJOR;
@@ -1765,7 +1721,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
                        mutex_unlock(&lo->lo_ctl_mutex);
                        break;
                }
-               if (lo->lo_refcnt > 0) {
+               if (atomic_read(&lo->lo_refcnt) > 0) {
                        ret = -EBUSY;
                        mutex_unlock(&lo->lo_ctl_mutex);
                        break;
@@ -1860,13 +1816,6 @@ static int __init loop_init(void)
                goto misc_out;
        }
 
-       loop_wq = alloc_workqueue("kloopd",
-                       WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
-       if (!loop_wq) {
-               err = -ENOMEM;
-               goto misc_out;
-       }
-
        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                  THIS_MODULE, loop_probe, NULL, NULL);
 
@@ -1904,8 +1853,6 @@ static void __exit loop_exit(void)
        blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
        unregister_blkdev(LOOP_MAJOR, "loop");
 
-       destroy_workqueue(loop_wq);
-
        misc_deregister(&loop_misc);
 }