Merge branch 'for-4.7/drivers' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)
diff --combined drivers/block/loop.c

index 80cf8add46ff3667d896fca88aaea3fbf338ad27,7e5e27ac45bbea7c946e7f0ee8da777c11fa447c..1fa8cc235977f404bc995d73659714fbccad7066
--- 1/drivers/block/loop.c
--- 2/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@@ -488,12 -488,6 +488,12 @@@ static int lo_rw_aio(struct loop_devic
         bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
         iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
                       bio_segments(bio), blk_rq_bytes(cmd->rq));
+ +      /*
+ +       * This bio may be started from the middle of the 'bvec'
+ +       * because of bio splitting, so offset from the bvec must
+ +       * be passed to iov iterator
+ +       */
+ +      iter.iov_offset = bio->bi_iter.bi_bvec_done;
   
         cmd->iocb.ki_pos = pos;
         cmd->iocb.ki_filp = file;
@@@ -943,7 -937,7 +943,7 @@@ static int loop_set_fd(struct loop_devi
         mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
   
         if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
-               blk_queue_flush(lo->lo_queue, REQ_FLUSH);
+               blk_queue_write_cache(lo->lo_queue, true, false);
   
         loop_update_dio(lo);
         set_capacity(lo->lo_disk, size);
diff --combined drivers/md/md.c

index 14d3b37944df031214c2c6951ed15c46da104842,5d61e76cec343d1895c870aa95776e7a69e4d0a8..c9a475c33cc7401dc067716e6cf3aaf648c46d93
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -284,8 -284,6 +284,8 @@@ static blk_qc_t md_make_request(struct 
          * go away inside make_request
          */
         sectors = bio_sectors(bio);
+ +      /* bio could be mergeable after passing to underlayer */
+ +      bio->bi_rw &= ~REQ_NOMERGE;
         mddev->pers->make_request(mddev, bio);
   
         cpu = part_stat_lock();
@@@ -5039,7 -5037,7 +5039,7 @@@ static int md_alloc(dev_t dev, char *na
         disk->fops = &md_fops;
         disk->private_data = mddev;
         disk->queue = mddev->queue;
-       blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
+       blk_queue_write_cache(mddev->queue, true, true);
         /* Allow extended partitions.  This makes the
          * 'mdp' device redundant, but we can't really
          * remove it now.
diff --combined drivers/mmc/card/block.c

index 5f2a3d69344f2b8652635afffa7ab566d1bd6a44,32daf433a9fb2622f28b97d8348d3e17c1f93c62..ddc96206288a1f281fa8b0663ef033b3fed774bb
--- 1/drivers/mmc/card/block.c
--- 2/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@@ -35,7 -35,6 +35,7 @@@
   #include <linux/capability.h>
   #include <linux/compat.h>
   #include <linux/pm_runtime.h>
+ +#include <linux/idr.h>
   
   #include <linux/mmc/ioctl.h>
   #include <linux/mmc/card.h>
@@@ -79,14 -78,15 +79,14 @@@ static int perdev_minors = CONFIG_MMC_B
   /*
    * We've only got one major, so number of mmcblk devices is
    * limited to (1 << 20) / number of minors per device.  It is also
- - * currently limited by the size of the static bitmaps below.
+ + * limited by the MAX_DEVICES below.
    */
   static int max_devices;
   
   #define MAX_DEVICES 256
   
- -/* TODO: Replace these with struct ida */
- -static DECLARE_BITMAP(dev_use, MAX_DEVICES);
- -static DECLARE_BITMAP(name_use, MAX_DEVICES);
+ +static DEFINE_IDA(mmc_blk_ida);
+ +static DEFINE_SPINLOCK(mmc_blk_lock);
   
   /*
    * There is one mmc_blk_data per slot.
@@@ -105,6 -105,7 +105,6 @@@ struct mmc_blk_data 
         unsigned int    usage;
         unsigned int    read_only;
         unsigned int    part_type;
- -      unsigned int    name_idx;
         unsigned int    reset_done;
   #define MMC_BLK_READ          BIT(0)
   #define MMC_BLK_WRITE         BIT(1)
@@@ -179,9 -180,7 +179,9 @@@ static void mmc_blk_put(struct mmc_blk_
                 int devidx = mmc_get_devidx(md->disk);
                 blk_cleanup_queue(md->queue.queue);
   
- -              __clear_bit(devidx, dev_use);
+ +              spin_lock(&mmc_blk_lock);
+ +              ida_remove(&mmc_blk_ida, devidx);
+ +              spin_unlock(&mmc_blk_lock);
   
                 put_disk(md->disk);
                 kfree(md);
@@@ -948,22 -947,16 +948,22 @@@ static int mmc_blk_cmd_error(struct req
                         req->rq_disk->disk_name, "timed out", name, status);
   
                 /* If the status cmd initially failed, retry the r/w cmd */
- -              if (!status_valid)
+ +              if (!status_valid) {
+ +                      pr_err("%s: status not valid, retrying timeout\n",
+ +                              req->rq_disk->disk_name);
                         return ERR_RETRY;
+ +              }
   
                 /*
                  * If it was a r/w cmd crc error, or illegal command
                  * (eg, issued in wrong state) then retry - we should
                  * have corrected the state problem above.
                  */
- -              if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+ +              if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
+ +                      pr_err("%s: command error, retrying timeout\n",
+ +                              req->rq_disk->disk_name);
                         return ERR_RETRY;
+ +              }
   
                 /* Otherwise abort the command */
                 return ERR_ABORT;
@@@ -2198,23 -2191,10 +2198,23 @@@ static struct mmc_blk_data *mmc_blk_all
         struct mmc_blk_data *md;
         int devidx, ret;
   
- -      devidx = find_first_zero_bit(dev_use, max_devices);
- -      if (devidx >= max_devices)
- -              return ERR_PTR(-ENOSPC);
- -      __set_bit(devidx, dev_use);
+ +again:
+ +      if (!ida_pre_get(&mmc_blk_ida, GFP_KERNEL))
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      spin_lock(&mmc_blk_lock);
+ +      ret = ida_get_new(&mmc_blk_ida, &devidx);
+ +      spin_unlock(&mmc_blk_lock);
+ +
+ +      if (ret == -EAGAIN)
+ +              goto again;
+ +      else if (ret)
+ +              return ERR_PTR(ret);
+ +
+ +      if (devidx >= max_devices) {
+ +              ret = -ENOSPC;
+ +              goto out;
+ +      }
   
         md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
         if (!md) {
@@@ -2222,6 -2202,19 +2222,6 @@@
                 goto out;
         }
   
- -      /*
- -       * !subname implies we are creating main mmc_blk_data that will be
- -       * associated with mmc_card with dev_set_drvdata. Due to device
- -       * partitions, devidx will not coincide with a per-physical card
- -       * index anymore so we keep track of a name index.
- -       */
- -      if (!subname) {
- -              md->name_idx = find_first_zero_bit(name_use, max_devices);
- -              __set_bit(md->name_idx, name_use);
- -      } else
- -              md->name_idx = ((struct mmc_blk_data *)
- -                              dev_to_disk(parent)->private_data)->name_idx;
- -
         md->area_type = area_type;
   
         /*
@@@ -2271,7 -2264,7 +2271,7 @@@
          */
   
         snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
- -               "mmcblk%u%s", md->name_idx, subname ? subname : "");
+ +               "mmcblk%u%s", card->host->index, subname ? subname : "");
   
         if (mmc_card_mmc(card))
                 blk_queue_logical_block_size(md->queue.queue,
@@@ -2293,7 -2286,7 +2293,7 @@@
             ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
              card->ext_csd.rel_sectors)) {
                 md->flags |= MMC_BLK_REL_WR;
-               blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
+               blk_queue_write_cache(md->queue.queue, true, true);
         }
   
         if (mmc_card_mmc(card) &&
@@@ -2311,9 -2304,6 +2311,9 @@@
    err_kfree:
         kfree(md);
    out:
+ +      spin_lock(&mmc_blk_lock);
+ +      ida_remove(&mmc_blk_ida, devidx);
+ +      spin_unlock(&mmc_blk_lock);
         return ERR_PTR(ret);
   }
   
@@@ -2428,6 -2418,7 +2428,6 @@@ static void mmc_blk_remove_parts(struc
         struct list_head *pos, *q;
         struct mmc_blk_data *part_md;
   
- -      __clear_bit(md->name_idx, name_use);
         list_for_each_safe(pos, q, &md->part) {
                 part_md = list_entry(pos, struct mmc_blk_data, part);
                 list_del(pos);
diff --combined drivers/nvme/host/pci.c

index 4fd733ff72b1cb7cf38023701d640a059b4e4670,fb741d09831aaa6536e3ccdb4e69b7de486d448d..0f093f14d3482394630b66c8586da5beb455d8c4
--- 1/drivers/nvme/host/pci.c
--- 2/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@@ -54,8 -54,7 +54,7 @@@
    * We handle AEN commands ourselves and don't even let the
    * block layer know about them.
    */
- #define NVME_NR_AEN_COMMANDS  1
- #define NVME_AQ_BLKMQ_DEPTH   (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+ #define NVME_AQ_BLKMQ_DEPTH   (NVME_AQ_DEPTH - NVME_NR_AERS)
   
   static int use_threaded_interrupts;
   module_param(use_threaded_interrupts, int, 0);
@@@ -92,9 -91,7 +91,7 @@@ struct nvme_dev 
         struct msix_entry *entry;
         void __iomem *bar;
         struct work_struct reset_work;
-       struct work_struct scan_work;
         struct work_struct remove_work;
-       struct work_struct async_work;
         struct timer_list watchdog_timer;
         struct mutex shutdown_lock;
         bool subsystem;
@@@ -102,11 -99,6 +99,6 @@@
         dma_addr_t cmb_dma_addr;
         u64 cmb_size;
         u32 cmbsz;
-       unsigned long flags;
- 
- #define NVME_CTRL_RESETTING    0
- #define NVME_CTRL_REMOVING     1
- 
         struct nvme_ctrl ctrl;
         struct completion ioq_wait;
   };
@@@ -271,40 -263,6 +263,6 @@@ static int nvme_init_request(void *data
         return 0;
   }
   
- static void nvme_queue_scan(struct nvme_dev *dev)
- {
-       /*
-        * Do not queue new scan work when a controller is reset during
-        * removal.
-        */
-       if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
-               return;
-       queue_work(nvme_workq, &dev->scan_work);
- }
- 
- static void nvme_complete_async_event(struct nvme_dev *dev,
-               struct nvme_completion *cqe)
- {
-       u16 status = le16_to_cpu(cqe->status) >> 1;
-       u32 result = le32_to_cpu(cqe->result);
- 
-       if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
-               ++dev->ctrl.event_limit;
-               queue_work(nvme_workq, &dev->async_work);
-       }
- 
-       if (status != NVME_SC_SUCCESS)
-               return;
- 
-       switch (result & 0xff07) {
-       case NVME_AER_NOTICE_NS_CHANGED:
-               dev_info(dev->ctrl.device, "rescanning\n");
-               nvme_queue_scan(dev);
-       default:
-               dev_warn(dev->ctrl.device, "async event result %08x\n", result);
-       }
- }
- 
   /**
    * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
    * @nvmeq: The queue to use
@@@ -334,16 -292,11 +292,11 @@@ static __le64 **iod_list(struct reques
         return (__le64 **)(iod->sg + req->nr_phys_segments);
   }
   
- static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+ static int nvme_init_iod(struct request *rq, unsigned size,
+               struct nvme_dev *dev)
   {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
         int nseg = rq->nr_phys_segments;
-       unsigned size;
- 
-       if (rq->cmd_flags & REQ_DISCARD)
-               size = sizeof(struct nvme_dsm_range);
-       else
-               size = blk_rq_bytes(rq);
   
         if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
                 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@@ -368,6 -321,8 +321,8 @@@ static void nvme_free_iod(struct nvme_d
         __le64 **list = iod_list(req);
         dma_addr_t prp_dma = iod->first_dma;
   
+       nvme_cleanup_cmd(req);
+ 
         if (iod->npages == 0)
                 dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
         for (i = 0; i < iod->npages; i++) {
@@@ -529,7 -484,7 +484,7 @@@ static bool nvme_setup_prps(struct nvme
   }
   
   static int nvme_map_data(struct nvme_dev *dev, struct request *req,
-               struct nvme_command *cmnd)
+               unsigned size, struct nvme_command *cmnd)
   {
         struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
         struct request_queue *q = req->q;
@@@ -546,7 -501,7 +501,7 @@@
         if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
                 goto out;
   
-       if (!nvme_setup_prps(dev, req, blk_rq_bytes(req)))
+       if (!nvme_setup_prps(dev, req, size))
                 goto out_unmap;
   
         ret = BLK_MQ_RQ_QUEUE_ERROR;
@@@ -595,37 -550,6 +550,6 @@@ static void nvme_unmap_data(struct nvme
         nvme_free_iod(dev, req);
   }
   
- /*
-  * We reuse the small pool to allocate the 16-byte range here as it is not
-  * worth having a special pool for these or additional cases to handle freeing
-  * the iod.
-  */
- static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-               struct request *req, struct nvme_command *cmnd)
- {
-       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_dsm_range *range;
- 
-       range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
-                                               &iod->first_dma);
-       if (!range)
-               return BLK_MQ_RQ_QUEUE_BUSY;
-       iod_list(req)[0] = (__le64 *)range;
-       iod->npages = 0;
- 
-       range->cattr = cpu_to_le32(0);
-       range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
-       range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- 
-       memset(cmnd, 0, sizeof(*cmnd));
-       cmnd->dsm.opcode = nvme_cmd_dsm;
-       cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
-       cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
-       cmnd->dsm.nr = 0;
-       cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-       return BLK_MQ_RQ_QUEUE_OK;
- }
- 
   /*
    * NOTE: ns is NULL when called on the admin queue.
    */
@@@ -637,6 -561,7 +561,7 @@@ static int nvme_queue_rq(struct blk_mq_
         struct nvme_dev *dev = nvmeq->dev;
         struct request *req = bd->rq;
         struct nvme_command cmnd;
+       unsigned map_len;
         int ret = BLK_MQ_RQ_QUEUE_OK;
   
         /*
@@@ -652,23 -577,17 +577,17 @@@
                 }
         }
   
-       ret = nvme_init_iod(req, dev);
+       map_len = nvme_map_len(req);
+       ret = nvme_init_iod(req, map_len, dev);
         if (ret)
                 return ret;
   
-       if (req->cmd_flags & REQ_DISCARD) {
-               ret = nvme_setup_discard(nvmeq, ns, req, &cmnd);
-       } else {
-               if (req->cmd_type == REQ_TYPE_DRV_PRIV)
-                       memcpy(&cmnd, req->cmd, sizeof(cmnd));
-               else if (req->cmd_flags & REQ_FLUSH)
-                       nvme_setup_flush(ns, &cmnd);
-               else
-                       nvme_setup_rw(ns, req, &cmnd);
+       ret = nvme_setup_cmd(ns, req, &cmnd);
+       if (ret)
+               goto out;
   
-               if (req->nr_phys_segments)
-                       ret = nvme_map_data(dev, req, &cmnd);
-       }
+       if (req->nr_phys_segments)
+               ret = nvme_map_data(dev, req, map_len, &cmnd);
   
         if (ret)
                 goto out;
@@@ -764,7 -683,7 +683,7 @@@ static void __nvme_process_cq(struct nv
                  */
                 if (unlikely(nvmeq->qid == 0 &&
                                 cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
-                       nvme_complete_async_event(nvmeq->dev, &cqe);
+                       nvme_complete_async_event(&nvmeq->dev->ctrl, &cqe);
                         continue;
                 }
   
@@@ -833,21 -752,18 +752,18 @@@ static int nvme_poll(struct blk_mq_hw_c
         return 0;
   }
   
- static void nvme_async_event_work(struct work_struct *work)
+ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
   {
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
         struct nvme_queue *nvmeq = dev->queues[0];
         struct nvme_command c;
   
         memset(&c, 0, sizeof(c));
         c.common.opcode = nvme_admin_async_event;
+       c.common.command_id = NVME_AQ_BLKMQ_DEPTH + aer_idx;
   
         spin_lock_irq(&nvmeq->q_lock);
-       while (dev->ctrl.event_limit > 0) {
-               c.common.command_id = NVME_AQ_BLKMQ_DEPTH +
-                       --dev->ctrl.event_limit;
-               __nvme_submit_cmd(nvmeq, &c);
-       }
+       __nvme_submit_cmd(nvmeq, &c);
         spin_unlock_irq(&nvmeq->q_lock);
   }
   
@@@ -939,7 -855,7 +855,7 @@@ static enum blk_eh_timer_return nvme_ti
          * cancellation error. All outstanding requests are completed on
          * shutdown, so we return BLK_EH_HANDLED.
          */
-       if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
+       if (dev->ctrl.state == NVME_CTRL_RESETTING) {
                 dev_warn(dev->ctrl.device,
                          "I/O %d QID %d timeout, disable controller\n",
                          req->tag, nvmeq->qid);
@@@ -1003,16 -919,15 +919,15 @@@
         return BLK_EH_RESET_TIMER;
   }
   
- static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
+ static void nvme_cancel_io(struct request *req, void *data, bool reserved)
   {
-       struct nvme_queue *nvmeq = data;
         int status;
   
         if (!blk_mq_request_started(req))
                 return;
   
-       dev_dbg_ratelimited(nvmeq->dev->ctrl.device,
-                "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
+       dev_dbg_ratelimited(((struct nvme_dev *) data)->ctrl.device,
+                               "Cancelling I/O %d", req->tag);
   
         status = NVME_SC_ABORT_REQ;
         if (blk_queue_dying(req->q))
@@@ -1069,14 -984,6 +984,6 @@@ static int nvme_suspend_queue(struct nv
         return 0;
   }
   
- static void nvme_clear_queue(struct nvme_queue *nvmeq)
- {
-       spin_lock_irq(&nvmeq->q_lock);
-       if (nvmeq->tags && *nvmeq->tags)
-               blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
-       spin_unlock_irq(&nvmeq->q_lock);
- }
- 
   static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
   {
         struct nvme_queue *nvmeq = dev->queues[0];
@@@ -1350,22 -1257,44 +1257,44 @@@ static int nvme_configure_admin_queue(s
         return result;
   }
   
+ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+ {
+ 
+       /* If true, indicates loss of adapter communication, possibly by a
+        * NVMe Subsystem reset.
+        */
+       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+ 
+       /* If there is a reset ongoing, we shouldn't reset again. */
+       if (work_busy(&dev->reset_work))
+               return false;
+ 
+       /* We shouldn't reset unless the controller is on fatal error state
+        * _or_ if we lost the communication with it.
+        */
+       if (!(csts & NVME_CSTS_CFS) && !nssro)
+               return false;
+ 
+       /* If PCI error recovery process is happening, we cannot reset or
+        * the recovery mechanism will surely fail.
+        */
+       if (pci_channel_offline(to_pci_dev(dev->dev)))
+               return false;
+ 
+       return true;
+ }
+ 
   static void nvme_watchdog_timer(unsigned long data)
   {
         struct nvme_dev *dev = (struct nvme_dev *)data;
         u32 csts = readl(dev->bar + NVME_REG_CSTS);
   
-       /*
-        * Skip controllers currently under reset.
-        */
-       if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) &&
-           ((csts & NVME_CSTS_CFS) ||
-            (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) {
-               if (queue_work(nvme_workq, &dev->reset_work)) {
+       /* Skip controllers under certain specific conditions. */
+       if (nvme_should_reset(dev, csts)) {
+               if (queue_work(nvme_workq, &dev->reset_work))
                         dev_warn(dev->dev,
                                 "Failed status: 0x%x, reset controller.\n",
                                 csts);
-               }
                 return;
         }
   
@@@ -1551,8 -1480,9 +1480,9 @@@ static int nvme_setup_io_queues(struct 
         return result;
   }
   
- static void nvme_set_irq_hints(struct nvme_dev *dev)
+ static void nvme_pci_post_scan(struct nvme_ctrl *ctrl)
   {
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
         struct nvme_queue *nvmeq;
         int i;
   
@@@ -1567,16 -1497,6 +1497,6 @@@
         }
   }
   
- static void nvme_dev_scan(struct work_struct *work)
- {
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
- 
-       if (!dev->tagset.tags)
-               return;
-       nvme_scan_namespaces(&dev->ctrl);
-       nvme_set_irq_hints(dev);
- }
- 
   static void nvme_del_queue_end(struct request *req, int error)
   {
         struct nvme_queue *nvmeq = req->end_io_data;
@@@ -1592,7 -1512,13 +1512,13 @@@ static void nvme_del_cq_end(struct requ
         if (!error) {
                 unsigned long flags;
   
-               spin_lock_irqsave(&nvmeq->q_lock, flags);
+               /*
+                * We might be called with the AQ q_lock held
+                * and the I/O queue q_lock should always
+                * nest inside the AQ one.
+                */
+               spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
+                                       SINGLE_DEPTH_NESTING);
                 nvme_process_cq(nvmeq);
                 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
         }
@@@ -1684,7 -1610,6 +1610,6 @@@ static int nvme_dev_add(struct nvme_de
                 nvme_free_queues(dev, dev->online_queues);
         }
   
-       nvme_queue_scan(dev);
         return 0;
   }
   
@@@ -1797,8 -1722,8 +1722,8 @@@ static void nvme_dev_disable(struct nvm
         }
         nvme_pci_disable(dev);
   
-       for (i = dev->queue_count - 1; i >= 0; i--)
-               nvme_clear_queue(dev->queues[i]);
+       blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_io, dev);
+       blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_io, dev);
         mutex_unlock(&dev->shutdown_lock);
   }
   
@@@ -1854,7 -1779,7 +1779,7 @@@ static void nvme_reset_work(struct work
         struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
         int result = -ENODEV;
   
-       if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
+       if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
                 goto out;
   
         /*
@@@ -1864,11 -1789,9 +1789,9 @@@
         if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
                 nvme_dev_disable(dev, false);
   
-       if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
                 goto out;
   
-       set_bit(NVME_CTRL_RESETTING, &dev->flags);
- 
         result = nvme_pci_enable(dev);
         if (result)
                 goto out;
@@@ -1890,8 -1813,14 +1813,14 @@@
         if (result)
                 goto out;
   
-       dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
-       queue_work(nvme_workq, &dev->async_work);
+       /*
+        * A controller that can not execute IO typically requires user
+        * intervention to correct. For such degraded controllers, the driver
+        * should not submit commands the user did not request, so skip
+        * registering for asynchronous event notification on this condition.
+        */
+       if (dev->online_queues > 1)
+               nvme_queue_async_events(&dev->ctrl);
   
         mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
   
@@@ -1901,13 -1830,20 +1830,20 @@@
          */
         if (dev->online_queues < 2) {
                 dev_warn(dev->ctrl.device, "IO queues not created\n");
+               nvme_kill_queues(&dev->ctrl);
                 nvme_remove_namespaces(&dev->ctrl);
         } else {
                 nvme_start_queues(&dev->ctrl);
                 nvme_dev_add(dev);
         }
   
-       clear_bit(NVME_CTRL_RESETTING, &dev->flags);
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
+               dev_warn(dev->ctrl.device, "failed to mark controller live\n");
+               goto out;
+       }
+ 
+       if (dev->online_queues > 1)
+               nvme_queue_scan(&dev->ctrl);
         return;
   
    out:
@@@ -1955,13 -1891,6 +1891,6 @@@ static int nvme_pci_reg_read64(struct n
         return 0;
   }
   
- static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl)
- {
-       struct nvme_dev *dev = to_nvme_dev(ctrl);
- 
-       return !dev->bar || dev->online_queues < 2;
- }
- 
   static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
   {
         return nvme_reset(to_nvme_dev(ctrl));
@@@ -1972,9 -1901,10 +1901,10 @@@ static const struct nvme_ctrl_ops nvme_
         .reg_read32             = nvme_pci_reg_read32,
         .reg_write32            = nvme_pci_reg_write32,
         .reg_read64             = nvme_pci_reg_read64,
-       .io_incapable           = nvme_pci_io_incapable,
         .reset_ctrl             = nvme_pci_reset_ctrl,
         .free_ctrl              = nvme_pci_free_ctrl,
+       .post_scan              = nvme_pci_post_scan,
+       .submit_async_event     = nvme_pci_submit_async_event,
   };
   
   static int nvme_dev_map(struct nvme_dev *dev)
@@@ -2026,10 -1956,8 +1956,8 @@@ static int nvme_probe(struct pci_dev *p
         if (result)
                 goto free;
   
-       INIT_WORK(&dev->scan_work, nvme_dev_scan);
         INIT_WORK(&dev->reset_work, nvme_reset_work);
         INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
-       INIT_WORK(&dev->async_work, nvme_async_event_work);
         setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
                 (unsigned long)dev);
         mutex_init(&dev->shutdown_lock);
@@@ -2086,15 -2014,13 +2014,12 @@@ static void nvme_remove(struct pci_dev 
   {
         struct nvme_dev *dev = pci_get_drvdata(pdev);
   
-       set_bit(NVME_CTRL_REMOVING, &dev->flags);
+       nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+ 
         pci_set_drvdata(pdev, NULL);
-       flush_work(&dev->async_work);
         flush_work(&dev->reset_work);
-       flush_work(&dev->scan_work);
-       nvme_remove_namespaces(&dev->ctrl);
         nvme_uninit_ctrl(&dev->ctrl);
         nvme_dev_disable(dev, true);
--      flush_work(&dev->reset_work);
         nvme_dev_remove_admin(dev);
         nvme_free_queues(dev, 0);
         nvme_release_cmb(dev);
diff --combined include/linux/blkdev.h

index b79131acf6c0cf76cb096a4d2721779c061d0277,57c085917da69c87d79faa0c6ad70d669fabcfcf..1fd8fdff2f813305fd7d4adb37a6d716b59aecc5
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -433,8 -433,6 +433,6 @@@ struct request_queue 
         /*
          * for flush operations
          */
-       unsigned int            flush_flags;
-       unsigned int            flush_not_queueable:1;
         struct blk_flush_queue  *fq;
   
         struct list_head        requeue_list;
@@@ -493,6 -491,7 +491,7 @@@
   #define QUEUE_FLAG_POLL              22       /* IO polling enabled if set */
   #define QUEUE_FLAG_WC        23       /* Write back caching */
   #define QUEUE_FLAG_FUA               24       /* device supports FUA writes */
+ #define QUEUE_FLAG_FLUSH_NQ    25     /* flush not queueuable */
   
   #define QUEUE_FLAG_DEFAULT    ((1 << QUEUE_FLAG_IO_STAT) |            \
                                  (1 << QUEUE_FLAG_STACKABLE)    |       \
@@@ -1009,7 -1008,6 +1008,6 @@@ extern void blk_queue_update_dma_alignm
   extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
   extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
   extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
- extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
   extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
   extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
   extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
@@@ -1131,8 -1129,6 +1129,8 @@@ static inline struct request *blk_map_q
   extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
   extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
+ +extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ +              sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop);
   extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
   extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
@@@ -1368,7 -1364,7 +1366,7 @@@ static inline unsigned int block_size(s
   
   static inline bool queue_flush_queueable(struct request_queue *q)
   {
-       return !q->flush_not_queueable;
+       return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
   }
   
   typedef struct {struct page *v;} Sector;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 17 May 2016 23:03:32 +0000 (16:03 -0700)
		1	2
drivers/block/loop.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/md.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/card/block.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history