Pull block fixes from Jens Axboe:
- NVMe pull request from Christoph, fixing namespace locking when
dealing with the effects log, and a rapid add/remove issue (Keith)
- blktrace tweak, ensuring requests with -1 sectors are shown (Jan)
- link power management quirk for a Smasung SSD (Hans)
- m68k nfblock dynamic major number fix (Chengguang)
- series fixing blk-iolatency inflight counter issue (Liu)
- ensure that we clear ->private when setting up the aio kiocb (Mike)
- __find_get_block_slow() rate limit print (Tetsuo)
* tag 'for-linus-
20190209' of git://git.kernel.dk/linux-block:
blk-mq: remove duplicated definition of blk_mq_freeze_queue
Blk-iolatency: warn on negative inflight IO counter
blk-iolatency: fix IO hang due to negative inflight counter
blktrace: Show requests without sector
fs: ratelimit __find_get_block_slow() failure message.
m68k: set proper major_num when specifying module param major_num
libata: Add NOLPM quirk for SAMSUNG MZ7TE512HMHP-000L1 SSD
nvme-pci: fix rapid add remove sequence
nvme: lock NS list changes while handling command effects
aio: initialize kiocb private in case any filesystems expect it.
static int __init nfhd_init(void)
{
u32 blocks, bsize;
+ int ret;
int i;
nfhd_id = nf_get_id("XHDI");
if (!nfhd_id)
return -ENODEV;
- major_num = register_blkdev(major_num, "nfhd");
- if (major_num <= 0) {
+ ret = register_blkdev(major_num, "nfhd");
+ if (ret < 0) {
pr_warn("nfhd: unable to get major number\n");
- return major_num;
+ return ret;
}
+ if (!major_num)
+ major_num = ret;
+
for (i = NFHD_DEV_OFFSET; i < 24; i++) {
if (nfhd_get_capacity(i, 0, &blocks, &bsize))
continue;
#include <linux/sched/loadavg.h>
#include <linux/sched/signal.h>
#include <trace/events/block.h>
+#include <linux/blk-mq.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
u64 now = ktime_to_ns(ktime_get());
bool issue_as_root = bio_issue_as_root_blkg(bio);
bool enabled = false;
+ int inflight = 0;
blkg = bio->bi_blkg;
if (!blkg || !bio_flagged(bio, BIO_TRACKED))
return;
enabled = blk_iolatency_enabled(iolat->blkiolat);
+ if (!enabled)
+ return;
+
while (blkg && blkg->parent) {
iolat = blkg_to_lat(blkg);
if (!iolat) {
}
rqw = &iolat->rq_wait;
- atomic_dec(&rqw->inflight);
- if (!enabled || iolat->min_lat_nsec == 0)
+ inflight = atomic_dec_return(&rqw->inflight);
+ WARN_ON_ONCE(inflight < 0);
+ if (iolat->min_lat_nsec == 0)
goto next;
iolatency_record_time(iolat, &bio->bi_issue, now,
issue_as_root);
return 0;
}
-static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+/*
+ * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
+ * return 0.
+ */
+static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
struct iolatency_grp *iolat = blkg_to_lat(blkg);
- struct blk_iolatency *blkiolat = iolat->blkiolat;
u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val;
BLKIOLATENCY_MAX_WIN_SIZE);
if (!oldval && val)
- atomic_inc(&blkiolat->enabled);
+ return 1;
if (oldval && !val)
- atomic_dec(&blkiolat->enabled);
+ return -1;
+ return 0;
}
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
u64 lat_val = 0;
u64 oldval;
int ret;
+ int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
if (ret)
blkg = ctx.blkg;
oldval = iolat->min_lat_nsec;
- iolatency_set_min_lat_nsec(blkg, lat_val);
+ enable = iolatency_set_min_lat_nsec(blkg, lat_val);
+ if (enable) {
+ WARN_ON_ONCE(!blk_get_queue(blkg->q));
+ blkg_get(blkg);
+ }
+
if (oldval != iolat->min_lat_nsec) {
iolatency_clear_scaling(blkg);
}
ret = 0;
out:
blkg_conf_finish(&ctx);
+ if (ret == 0 && enable) {
+ struct iolatency_grp *tmp = blkg_to_lat(blkg);
+ struct blk_iolatency *blkiolat = tmp->blkiolat;
+
+ blk_mq_freeze_queue(blkg->q);
+
+ if (enable == 1)
+ atomic_inc(&blkiolat->enabled);
+ else if (enable == -1)
+ atomic_dec(&blkiolat->enabled);
+ else
+ WARN_ON_ONCE(1);
+
+ blk_mq_unfreeze_queue(blkg->q);
+
+ blkg_put(blkg);
+ blk_put_queue(blkg->q);
+ }
return ret ?: nbytes;
}
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
+ int ret;
- iolatency_set_min_lat_nsec(blkg, 0);
+ ret = iolatency_set_min_lat_nsec(blkg, 0);
+ if (ret == 1)
+ atomic_inc(&blkiolat->enabled);
+ if (ret == -1)
+ atomic_dec(&blkiolat->enabled);
iolatency_clear_scaling(blkg);
}
struct kobject kobj;
} ____cacheline_aligned_in_smp;
-void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
{ "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, },
{ "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, },
+ { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, },
/* devices that don't properly handle queued TRIM commands */
{ "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
* effects say only one namespace is affected.
*/
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+ mutex_lock(&ctrl->scan_lock);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
}
*/
if (effects & NVME_CMD_EFFECTS_LBCC)
nvme_update_formats(ctrl);
- if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK))
+ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
+ mutex_unlock(&ctrl->scan_lock);
+ }
if (effects & NVME_CMD_EFFECTS_CCC)
nvme_init_identify(ctrl);
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
if (nvme_identify_ctrl(ctrl, &id))
return;
+ mutex_lock(&ctrl->scan_lock);
nn = le32_to_cpu(id->nn);
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
}
nvme_scan_ns_sequential(ctrl, nn);
out_free_id:
+ mutex_unlock(&ctrl->scan_lock);
kfree(id);
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
ctrl->state = NVME_CTRL_NEW;
spin_lock_init(&ctrl->lock);
+ mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev;
enum nvme_ctrl_state state;
bool identified;
spinlock_t lock;
+ struct mutex scan_lock;
const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
struct request_queue *connect_q;
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
- /*
- * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
- * initializing procedure here.
- */
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
- dev_warn(dev->ctrl.device,
- "failed to mark controller CONNECTING\n");
- goto out;
- }
-
+ mutex_lock(&dev->shutdown_lock);
result = nvme_pci_enable(dev);
if (result)
goto out;
*/
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_segments = NVME_MAX_SEGS;
+ mutex_unlock(&dev->shutdown_lock);
+
+ /*
+ * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
+ * initializing procedure here.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller CONNECTING\n");
+ goto out;
+ }
result = nvme_init_identify(&dev->ctrl);
if (result)
if (unlikely(!req->ki_filp))
return -EBADF;
req->ki_complete = aio_complete_rw;
+ req->private = NULL;
req->ki_pos = iocb->aio_offset;
req->ki_flags = iocb_flags(req->ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD)
struct buffer_head *head;
struct page *page;
int all_mapped = 1;
+ static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
* file io on the block device and getblk. It gets dealt with
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
- if (all_mapped) {
- printk("__find_get_block_slow() failed. "
- "block=%llu, b_blocknr=%llu\n",
- (unsigned long long)block,
- (unsigned long long)bh->b_blocknr);
- printk("b_state=0x%08lx, b_size=%zu\n",
- bh->b_state, bh->b_size);
- printk("device %pg blocksize: %d\n", bdev,
- 1 << bd_inode->i_blkbits);
+ ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
+ if (all_mapped && __ratelimit(&last_warned)) {
+ printk("__find_get_block_slow() failed. block=%llu, "
+ "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
+ "device %pg blocksize: %d\n",
+ (unsigned long long)block,
+ (unsigned long long)bh->b_blocknr,
+ bh->b_state, bh->b_size, bdev,
+ 1 << bd_inode->i_blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
static inline sector_t blk_rq_trace_sector(struct request *rq)
{
- return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq);
+ /*
+ * Tracing should ignore starting sector for passthrough requests and
+ * requests where starting sector didn't get set.
+ */
+ if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1)
+ return 0;
+ return blk_rq_pos(rq);
}
static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)