NVMe: Use IDA for namespace disk naming
[linux-2.6-block.git] / drivers / nvme / host / core.c
CommitLineData
21d34711
CH
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/blkdev.h>
16#include <linux/blk-mq.h>
5fd4ce1b 17#include <linux/delay.h>
21d34711 18#include <linux/errno.h>
1673f1f0 19#include <linux/hdreg.h>
21d34711 20#include <linux/kernel.h>
5bae7f73
CH
21#include <linux/module.h>
22#include <linux/list_sort.h>
21d34711
CH
23#include <linux/slab.h>
24#include <linux/types.h>
1673f1f0
CH
25#include <linux/pr.h>
26#include <linux/ptrace.h>
27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h>
29#include <scsi/sg.h>
30#include <asm/unaligned.h>
21d34711
CH
31
32#include "nvme.h"
33
f3ca80fc
CH
34#define NVME_MINORS (1U << MINORBITS)
35
5bae7f73
CH
36static int nvme_major;
37module_param(nvme_major, int, 0);
38
f3ca80fc
CH
39static int nvme_char_major;
40module_param(nvme_char_major, int, 0);
41
42static LIST_HEAD(nvme_ctrl_list);
1673f1f0
CH
43DEFINE_SPINLOCK(dev_list_lock);
44
f3ca80fc
CH
45static struct class *nvme_class;
46
1673f1f0
CH
47static void nvme_free_ns(struct kref *kref)
48{
49 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
50
51 if (ns->type == NVME_NS_LIGHTNVM)
52 nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
53
54 spin_lock(&dev_list_lock);
55 ns->disk->private_data = NULL;
56 spin_unlock(&dev_list_lock);
57
1673f1f0 58 put_disk(ns->disk);
075790eb
KB
59 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
60 nvme_put_ctrl(ns->ctrl);
1673f1f0
CH
61 kfree(ns);
62}
63
5bae7f73 64static void nvme_put_ns(struct nvme_ns *ns)
1673f1f0
CH
65{
66 kref_put(&ns->kref, nvme_free_ns);
67}
68
69static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
70{
71 struct nvme_ns *ns;
72
73 spin_lock(&dev_list_lock);
74 ns = disk->private_data;
75 if (ns && !kref_get_unless_zero(&ns->kref))
76 ns = NULL;
77 spin_unlock(&dev_list_lock);
78
79 return ns;
80}
81
7688faa6
CH
82void nvme_requeue_req(struct request *req)
83{
84 unsigned long flags;
85
86 blk_mq_requeue_request(req);
87 spin_lock_irqsave(req->q->queue_lock, flags);
88 if (!blk_queue_stopped(req->q))
89 blk_mq_kick_requeue_list(req->q);
90 spin_unlock_irqrestore(req->q->queue_lock, flags);
91}
92
4160982e
CH
93struct request *nvme_alloc_request(struct request_queue *q,
94 struct nvme_command *cmd, unsigned int flags)
21d34711
CH
95{
96 bool write = cmd->common.opcode & 1;
21d34711 97 struct request *req;
21d34711 98
4160982e 99 req = blk_mq_alloc_request(q, write, flags);
21d34711 100 if (IS_ERR(req))
4160982e 101 return req;
21d34711
CH
102
103 req->cmd_type = REQ_TYPE_DRV_PRIV;
104 req->cmd_flags |= REQ_FAILFAST_DRIVER;
105 req->__data_len = 0;
106 req->__sector = (sector_t) -1;
107 req->bio = req->biotail = NULL;
108
21d34711
CH
109 req->cmd = (unsigned char *)cmd;
110 req->cmd_len = sizeof(struct nvme_command);
111 req->special = (void *)0;
112
4160982e
CH
113 return req;
114}
115
116/*
117 * Returns 0 on success. If the result is negative, it's a Linux error code;
118 * if the result is positive, it's an NVM Express status code
119 */
120int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
121 void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
122{
123 struct request *req;
124 int ret;
125
126 req = nvme_alloc_request(q, cmd, 0);
127 if (IS_ERR(req))
128 return PTR_ERR(req);
129
130 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
131
21d34711
CH
132 if (buffer && bufflen) {
133 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
134 if (ret)
135 goto out;
4160982e
CH
136 }
137
138 blk_execute_rq(req->q, NULL, req, 0);
139 if (result)
140 *result = (u32)(uintptr_t)req->special;
141 ret = req->errors;
142 out:
143 blk_mq_free_request(req);
144 return ret;
145}
146
147int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
148 void *buffer, unsigned bufflen)
149{
150 return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
151}
152
0b7f1f26
KB
153int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
154 void __user *ubuffer, unsigned bufflen,
155 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
156 u32 *result, unsigned timeout)
4160982e 157{
0b7f1f26
KB
158 bool write = cmd->common.opcode & 1;
159 struct nvme_ns *ns = q->queuedata;
160 struct gendisk *disk = ns ? ns->disk : NULL;
4160982e 161 struct request *req;
0b7f1f26
KB
162 struct bio *bio = NULL;
163 void *meta = NULL;
4160982e
CH
164 int ret;
165
166 req = nvme_alloc_request(q, cmd, 0);
167 if (IS_ERR(req))
168 return PTR_ERR(req);
169
170 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
171
172 if (ubuffer && bufflen) {
21d34711
CH
173 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
174 GFP_KERNEL);
175 if (ret)
176 goto out;
177 bio = req->bio;
21d34711 178
0b7f1f26
KB
179 if (!disk)
180 goto submit;
181 bio->bi_bdev = bdget_disk(disk, 0);
182 if (!bio->bi_bdev) {
183 ret = -ENODEV;
184 goto out_unmap;
185 }
186
187 if (meta_buffer) {
188 struct bio_integrity_payload *bip;
189
190 meta = kmalloc(meta_len, GFP_KERNEL);
191 if (!meta) {
192 ret = -ENOMEM;
193 goto out_unmap;
194 }
195
196 if (write) {
197 if (copy_from_user(meta, meta_buffer,
198 meta_len)) {
199 ret = -EFAULT;
200 goto out_free_meta;
201 }
202 }
203
204 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
06c1e390
KB
205 if (IS_ERR(bip)) {
206 ret = PTR_ERR(bip);
0b7f1f26
KB
207 goto out_free_meta;
208 }
209
210 bip->bip_iter.bi_size = meta_len;
211 bip->bip_iter.bi_sector = meta_seed;
212
213 ret = bio_integrity_add_page(bio, virt_to_page(meta),
214 meta_len, offset_in_page(meta));
215 if (ret != meta_len) {
216 ret = -ENOMEM;
217 goto out_free_meta;
218 }
219 }
220 }
221 submit:
222 blk_execute_rq(req->q, disk, req, 0);
223 ret = req->errors;
21d34711
CH
224 if (result)
225 *result = (u32)(uintptr_t)req->special;
0b7f1f26
KB
226 if (meta && !ret && !write) {
227 if (copy_to_user(meta_buffer, meta, meta_len))
228 ret = -EFAULT;
229 }
230 out_free_meta:
231 kfree(meta);
232 out_unmap:
233 if (bio) {
234 if (disk && bio->bi_bdev)
235 bdput(bio->bi_bdev);
236 blk_rq_unmap_user(bio);
237 }
21d34711
CH
238 out:
239 blk_mq_free_request(req);
240 return ret;
241}
242
0b7f1f26
KB
243int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
244 void __user *ubuffer, unsigned bufflen, u32 *result,
245 unsigned timeout)
246{
247 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
248 result, timeout);
249}
250
1c63dc66 251int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
21d34711
CH
252{
253 struct nvme_command c = { };
254 int error;
255
256 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
257 c.identify.opcode = nvme_admin_identify;
258 c.identify.cns = cpu_to_le32(1);
259
260 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
261 if (!*id)
262 return -ENOMEM;
263
264 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
265 sizeof(struct nvme_id_ctrl));
266 if (error)
267 kfree(*id);
268 return error;
269}
270
540c801c
KB
271static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
272{
273 struct nvme_command c = { };
274
275 c.identify.opcode = nvme_admin_identify;
276 c.identify.cns = cpu_to_le32(2);
277 c.identify.nsid = cpu_to_le32(nsid);
278 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
279}
280
1c63dc66 281int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
21d34711
CH
282 struct nvme_id_ns **id)
283{
284 struct nvme_command c = { };
285 int error;
286
287 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
288 c.identify.opcode = nvme_admin_identify,
289 c.identify.nsid = cpu_to_le32(nsid),
290
291 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
292 if (!*id)
293 return -ENOMEM;
294
295 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
296 sizeof(struct nvme_id_ns));
297 if (error)
298 kfree(*id);
299 return error;
300}
301
1c63dc66 302int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
21d34711
CH
303 dma_addr_t dma_addr, u32 *result)
304{
305 struct nvme_command c;
306
307 memset(&c, 0, sizeof(c));
308 c.features.opcode = nvme_admin_get_features;
309 c.features.nsid = cpu_to_le32(nsid);
310 c.features.prp1 = cpu_to_le64(dma_addr);
311 c.features.fid = cpu_to_le32(fid);
312
4160982e 313 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
21d34711
CH
314}
315
1c63dc66 316int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
21d34711
CH
317 dma_addr_t dma_addr, u32 *result)
318{
319 struct nvme_command c;
320
321 memset(&c, 0, sizeof(c));
322 c.features.opcode = nvme_admin_set_features;
323 c.features.prp1 = cpu_to_le64(dma_addr);
324 c.features.fid = cpu_to_le32(fid);
325 c.features.dword11 = cpu_to_le32(dword11);
326
4160982e 327 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
21d34711
CH
328}
329
1c63dc66 330int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
21d34711
CH
331{
332 struct nvme_command c = { };
333 int error;
334
335 c.common.opcode = nvme_admin_get_log_page,
336 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
337 c.common.cdw10[0] = cpu_to_le32(
338 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
339 NVME_LOG_SMART),
340
341 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
342 if (!*log)
343 return -ENOMEM;
344
345 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
346 sizeof(struct nvme_smart_log));
347 if (error)
348 kfree(*log);
349 return error;
350}
1673f1f0 351
9a0be7ab
CH
352int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
353{
354 u32 q_count = (*count - 1) | ((*count - 1) << 16);
355 u32 result;
356 int status, nr_io_queues;
357
358 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
359 &result);
360 if (status)
361 return status;
362
363 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
364 *count = min(*count, nr_io_queues);
365 return 0;
366}
367
1673f1f0
CH
368static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
369{
370 struct nvme_user_io io;
371 struct nvme_command c;
372 unsigned length, meta_len;
373 void __user *metadata;
374
375 if (copy_from_user(&io, uio, sizeof(io)))
376 return -EFAULT;
377
378 switch (io.opcode) {
379 case nvme_cmd_write:
380 case nvme_cmd_read:
381 case nvme_cmd_compare:
382 break;
383 default:
384 return -EINVAL;
385 }
386
387 length = (io.nblocks + 1) << ns->lba_shift;
388 meta_len = (io.nblocks + 1) * ns->ms;
389 metadata = (void __user *)(uintptr_t)io.metadata;
390
391 if (ns->ext) {
392 length += meta_len;
393 meta_len = 0;
394 } else if (meta_len) {
395 if ((io.metadata & 3) || !io.metadata)
396 return -EINVAL;
397 }
398
399 memset(&c, 0, sizeof(c));
400 c.rw.opcode = io.opcode;
401 c.rw.flags = io.flags;
402 c.rw.nsid = cpu_to_le32(ns->ns_id);
403 c.rw.slba = cpu_to_le64(io.slba);
404 c.rw.length = cpu_to_le16(io.nblocks);
405 c.rw.control = cpu_to_le16(io.control);
406 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
407 c.rw.reftag = cpu_to_le32(io.reftag);
408 c.rw.apptag = cpu_to_le16(io.apptag);
409 c.rw.appmask = cpu_to_le16(io.appmask);
410
411 return __nvme_submit_user_cmd(ns->queue, &c,
412 (void __user *)(uintptr_t)io.addr, length,
413 metadata, meta_len, io.slba, NULL, 0);
414}
415
f3ca80fc 416static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1673f1f0
CH
417 struct nvme_passthru_cmd __user *ucmd)
418{
419 struct nvme_passthru_cmd cmd;
420 struct nvme_command c;
421 unsigned timeout = 0;
422 int status;
423
424 if (!capable(CAP_SYS_ADMIN))
425 return -EACCES;
426 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
427 return -EFAULT;
428
429 memset(&c, 0, sizeof(c));
430 c.common.opcode = cmd.opcode;
431 c.common.flags = cmd.flags;
432 c.common.nsid = cpu_to_le32(cmd.nsid);
433 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
434 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
435 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
436 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
437 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
438 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
439 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
440 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
441
442 if (cmd.timeout_ms)
443 timeout = msecs_to_jiffies(cmd.timeout_ms);
444
445 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
d1ea7be5 446 (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
1673f1f0
CH
447 &cmd.result, timeout);
448 if (status >= 0) {
449 if (put_user(cmd.result, &ucmd->result))
450 return -EFAULT;
451 }
452
453 return status;
454}
455
456static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
457 unsigned int cmd, unsigned long arg)
458{
459 struct nvme_ns *ns = bdev->bd_disk->private_data;
460
461 switch (cmd) {
462 case NVME_IOCTL_ID:
463 force_successful_syscall_return();
464 return ns->ns_id;
465 case NVME_IOCTL_ADMIN_CMD:
466 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
467 case NVME_IOCTL_IO_CMD:
468 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
469 case NVME_IOCTL_SUBMIT_IO:
470 return nvme_submit_io(ns, (void __user *)arg);
44907332 471#ifdef CONFIG_BLK_DEV_NVME_SCSI
1673f1f0
CH
472 case SG_GET_VERSION_NUM:
473 return nvme_sg_get_version_num((void __user *)arg);
474 case SG_IO:
475 return nvme_sg_io(ns, (void __user *)arg);
44907332 476#endif
1673f1f0
CH
477 default:
478 return -ENOTTY;
479 }
480}
481
482#ifdef CONFIG_COMPAT
483static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
484 unsigned int cmd, unsigned long arg)
485{
486 switch (cmd) {
487 case SG_IO:
488 return -ENOIOCTLCMD;
489 }
490 return nvme_ioctl(bdev, mode, cmd, arg);
491}
492#else
493#define nvme_compat_ioctl NULL
494#endif
495
496static int nvme_open(struct block_device *bdev, fmode_t mode)
497{
498 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
499}
500
501static void nvme_release(struct gendisk *disk, fmode_t mode)
502{
503 nvme_put_ns(disk->private_data);
504}
505
506static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
507{
508 /* some standard values */
509 geo->heads = 1 << 6;
510 geo->sectors = 1 << 5;
511 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
512 return 0;
513}
514
515#ifdef CONFIG_BLK_DEV_INTEGRITY
516static void nvme_init_integrity(struct nvme_ns *ns)
517{
518 struct blk_integrity integrity;
519
520 switch (ns->pi_type) {
521 case NVME_NS_DPS_PI_TYPE3:
522 integrity.profile = &t10_pi_type3_crc;
523 break;
524 case NVME_NS_DPS_PI_TYPE1:
525 case NVME_NS_DPS_PI_TYPE2:
526 integrity.profile = &t10_pi_type1_crc;
527 break;
528 default:
529 integrity.profile = NULL;
530 break;
531 }
532 integrity.tuple_size = ns->ms;
533 blk_integrity_register(ns->disk, &integrity);
534 blk_queue_max_integrity_segments(ns->queue, 1);
535}
536#else
537static void nvme_init_integrity(struct nvme_ns *ns)
538{
539}
540#endif /* CONFIG_BLK_DEV_INTEGRITY */
541
542static void nvme_config_discard(struct nvme_ns *ns)
543{
544 u32 logical_block_size = queue_logical_block_size(ns->queue);
545 ns->queue->limits.discard_zeroes_data = 0;
546 ns->queue->limits.discard_alignment = logical_block_size;
547 ns->queue->limits.discard_granularity = logical_block_size;
548 blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
549 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
550}
551
5bae7f73 552static int nvme_revalidate_disk(struct gendisk *disk)
1673f1f0
CH
553{
554 struct nvme_ns *ns = disk->private_data;
555 struct nvme_id_ns *id;
556 u8 lbaf, pi_type;
557 u16 old_ms;
558 unsigned short bs;
559
560 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
561 dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
562 __func__, ns->ctrl->instance, ns->ns_id);
563 return -ENODEV;
564 }
565 if (id->ncap == 0) {
566 kfree(id);
567 return -ENODEV;
568 }
569
570 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
571 if (nvme_nvm_register(ns->queue, disk->disk_name)) {
572 dev_warn(ns->ctrl->dev,
573 "%s: LightNVM init failure\n", __func__);
574 kfree(id);
575 return -ENODEV;
576 }
577 ns->type = NVME_NS_LIGHTNVM;
578 }
579
2b9b6e86
KB
580 if (ns->ctrl->vs >= NVME_VS(1, 1))
581 memcpy(ns->eui, id->eui64, sizeof(ns->eui));
582 if (ns->ctrl->vs >= NVME_VS(1, 2))
583 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
584
1673f1f0
CH
585 old_ms = ns->ms;
586 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
587 ns->lba_shift = id->lbaf[lbaf].ds;
588 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
589 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
590
591 /*
592 * If identify namespace failed, use default 512 byte block size so
593 * block layer can use before failing read/write for 0 capacity.
594 */
595 if (ns->lba_shift == 0)
596 ns->lba_shift = 9;
597 bs = 1 << ns->lba_shift;
1673f1f0
CH
598 /* XXX: PI implementation requires metadata equal t10 pi tuple size */
599 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
600 id->dps & NVME_NS_DPS_PI_MASK : 0;
601
602 blk_mq_freeze_queue(disk->queue);
603 if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
604 ns->ms != old_ms ||
605 bs != queue_logical_block_size(disk->queue) ||
606 (ns->ms && ns->ext)))
607 blk_integrity_unregister(disk);
608
609 ns->pi_type = pi_type;
610 blk_queue_logical_block_size(ns->queue, bs);
611
4b9d5b15 612 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
1673f1f0 613 nvme_init_integrity(ns);
1673f1f0
CH
614 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
615 set_capacity(disk, 0);
616 else
617 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
618
619 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
620 nvme_config_discard(ns);
621 blk_mq_unfreeze_queue(disk->queue);
622
623 kfree(id);
624 return 0;
625}
626
627static char nvme_pr_type(enum pr_type type)
628{
629 switch (type) {
630 case PR_WRITE_EXCLUSIVE:
631 return 1;
632 case PR_EXCLUSIVE_ACCESS:
633 return 2;
634 case PR_WRITE_EXCLUSIVE_REG_ONLY:
635 return 3;
636 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
637 return 4;
638 case PR_WRITE_EXCLUSIVE_ALL_REGS:
639 return 5;
640 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
641 return 6;
642 default:
643 return 0;
644 }
645};
646
647static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
648 u64 key, u64 sa_key, u8 op)
649{
650 struct nvme_ns *ns = bdev->bd_disk->private_data;
651 struct nvme_command c;
652 u8 data[16] = { 0, };
653
654 put_unaligned_le64(key, &data[0]);
655 put_unaligned_le64(sa_key, &data[8]);
656
657 memset(&c, 0, sizeof(c));
658 c.common.opcode = op;
659 c.common.nsid = cpu_to_le32(ns->ns_id);
660 c.common.cdw10[0] = cpu_to_le32(cdw10);
661
662 return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
663}
664
665static int nvme_pr_register(struct block_device *bdev, u64 old,
666 u64 new, unsigned flags)
667{
668 u32 cdw10;
669
670 if (flags & ~PR_FL_IGNORE_KEY)
671 return -EOPNOTSUPP;
672
673 cdw10 = old ? 2 : 0;
674 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
675 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
676 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
677}
678
679static int nvme_pr_reserve(struct block_device *bdev, u64 key,
680 enum pr_type type, unsigned flags)
681{
682 u32 cdw10;
683
684 if (flags & ~PR_FL_IGNORE_KEY)
685 return -EOPNOTSUPP;
686
687 cdw10 = nvme_pr_type(type) << 8;
688 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
689 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
690}
691
692static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
693 enum pr_type type, bool abort)
694{
695 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
696 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
697}
698
699static int nvme_pr_clear(struct block_device *bdev, u64 key)
700{
8c0b3915 701 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
1673f1f0
CH
702 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
703}
704
705static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
706{
707 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
708 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
709}
710
711static const struct pr_ops nvme_pr_ops = {
712 .pr_register = nvme_pr_register,
713 .pr_reserve = nvme_pr_reserve,
714 .pr_release = nvme_pr_release,
715 .pr_preempt = nvme_pr_preempt,
716 .pr_clear = nvme_pr_clear,
717};
718
5bae7f73 719static const struct block_device_operations nvme_fops = {
1673f1f0
CH
720 .owner = THIS_MODULE,
721 .ioctl = nvme_ioctl,
722 .compat_ioctl = nvme_compat_ioctl,
723 .open = nvme_open,
724 .release = nvme_release,
725 .getgeo = nvme_getgeo,
726 .revalidate_disk= nvme_revalidate_disk,
727 .pr_ops = &nvme_pr_ops,
728};
729
5fd4ce1b
CH
730static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
731{
732 unsigned long timeout =
733 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
734 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
735 int ret;
736
737 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
738 if ((csts & NVME_CSTS_RDY) == bit)
739 break;
740
741 msleep(100);
742 if (fatal_signal_pending(current))
743 return -EINTR;
744 if (time_after(jiffies, timeout)) {
745 dev_err(ctrl->dev,
746 "Device not ready; aborting %s\n", enabled ?
747 "initialisation" : "reset");
748 return -ENODEV;
749 }
750 }
751
752 return ret;
753}
754
755/*
756 * If the device has been passed off to us in an enabled state, just clear
757 * the enabled bit. The spec says we should set the 'shutdown notification
758 * bits', but doing so may cause the device to complete commands to the
759 * admin queue ... and we don't know what memory that might be pointing at!
760 */
761int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
762{
763 int ret;
764
765 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
766 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
767
768 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
769 if (ret)
770 return ret;
771 return nvme_wait_ready(ctrl, cap, false);
772}
773
774int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
775{
776 /*
777 * Default to a 4K page size, with the intention to update this
778 * path in the future to accomodate architectures with differing
779 * kernel and IO page sizes.
780 */
781 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
782 int ret;
783
784 if (page_shift < dev_page_min) {
785 dev_err(ctrl->dev,
786 "Minimum device page size %u too large for host (%u)\n",
787 1 << dev_page_min, 1 << page_shift);
788 return -ENODEV;
789 }
790
791 ctrl->page_size = 1 << page_shift;
792
793 ctrl->ctrl_config = NVME_CC_CSS_NVM;
794 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
795 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
796 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
797 ctrl->ctrl_config |= NVME_CC_ENABLE;
798
799 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
800 if (ret)
801 return ret;
802 return nvme_wait_ready(ctrl, cap, true);
803}
804
805int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
806{
807 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
808 u32 csts;
809 int ret;
810
811 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
812 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
813
814 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
815 if (ret)
816 return ret;
817
818 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
819 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
820 break;
821
822 msleep(100);
823 if (fatal_signal_pending(current))
824 return -EINTR;
825 if (time_after(jiffies, timeout)) {
826 dev_err(ctrl->dev,
827 "Device shutdown incomplete; abort shutdown\n");
828 return -ENODEV;
829 }
830 }
831
832 return ret;
833}
834
7fd8930f
CH
835/*
836 * Initialize the cached copies of the Identify data and various controller
837 * register in our nvme_ctrl structure. This should be called as soon as
838 * the admin queue is fully up and running.
839 */
840int nvme_init_identify(struct nvme_ctrl *ctrl)
841{
842 struct nvme_id_ctrl *id;
843 u64 cap;
844 int ret, page_shift;
845
f3ca80fc
CH
846 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
847 if (ret) {
848 dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
849 return ret;
850 }
851
7fd8930f
CH
852 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
853 if (ret) {
854 dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
855 return ret;
856 }
857 page_shift = NVME_CAP_MPSMIN(cap) + 12;
858
f3ca80fc
CH
859 if (ctrl->vs >= NVME_VS(1, 1))
860 ctrl->subsystem = NVME_CAP_NSSRC(cap);
861
7fd8930f
CH
862 ret = nvme_identify_ctrl(ctrl, &id);
863 if (ret) {
864 dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
865 return -EIO;
866 }
867
868 ctrl->oncs = le16_to_cpup(&id->oncs);
6bf25d16 869 atomic_set(&ctrl->abort_limit, id->acl + 1);
7fd8930f
CH
870 ctrl->vwc = id->vwc;
871 memcpy(ctrl->serial, id->sn, sizeof(id->sn));
872 memcpy(ctrl->model, id->mn, sizeof(id->mn));
873 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
874 if (id->mdts)
875 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
876 else
877 ctrl->max_hw_sectors = UINT_MAX;
878
879 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
880 unsigned int max_hw_sectors;
881
882 ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
883 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
884 if (ctrl->max_hw_sectors) {
885 ctrl->max_hw_sectors = min(max_hw_sectors,
886 ctrl->max_hw_sectors);
887 } else {
888 ctrl->max_hw_sectors = max_hw_sectors;
889 }
890 }
891
892 kfree(id);
893 return 0;
894}
895
f3ca80fc 896static int nvme_dev_open(struct inode *inode, struct file *file)
1673f1f0 897{
f3ca80fc
CH
898 struct nvme_ctrl *ctrl;
899 int instance = iminor(inode);
900 int ret = -ENODEV;
1673f1f0 901
f3ca80fc
CH
902 spin_lock(&dev_list_lock);
903 list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
904 if (ctrl->instance != instance)
905 continue;
906
907 if (!ctrl->admin_q) {
908 ret = -EWOULDBLOCK;
909 break;
910 }
911 if (!kref_get_unless_zero(&ctrl->kref))
912 break;
913 file->private_data = ctrl;
914 ret = 0;
915 break;
916 }
917 spin_unlock(&dev_list_lock);
918
919 return ret;
1673f1f0
CH
920}
921
f3ca80fc 922static int nvme_dev_release(struct inode *inode, struct file *file)
1673f1f0 923{
f3ca80fc
CH
924 nvme_put_ctrl(file->private_data);
925 return 0;
926}
927
bfd89471
CH
928static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
929{
930 struct nvme_ns *ns;
931 int ret;
932
933 mutex_lock(&ctrl->namespaces_mutex);
934 if (list_empty(&ctrl->namespaces)) {
935 ret = -ENOTTY;
936 goto out_unlock;
937 }
938
939 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
940 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
941 dev_warn(ctrl->dev,
942 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
943 ret = -EINVAL;
944 goto out_unlock;
945 }
946
947 dev_warn(ctrl->dev,
948 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
949 kref_get(&ns->kref);
950 mutex_unlock(&ctrl->namespaces_mutex);
951
952 ret = nvme_user_cmd(ctrl, ns, argp);
953 nvme_put_ns(ns);
954 return ret;
955
956out_unlock:
957 mutex_unlock(&ctrl->namespaces_mutex);
958 return ret;
959}
960
f3ca80fc
CH
961static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
962 unsigned long arg)
963{
964 struct nvme_ctrl *ctrl = file->private_data;
965 void __user *argp = (void __user *)arg;
f3ca80fc
CH
966
967 switch (cmd) {
968 case NVME_IOCTL_ADMIN_CMD:
969 return nvme_user_cmd(ctrl, NULL, argp);
970 case NVME_IOCTL_IO_CMD:
bfd89471 971 return nvme_dev_user_cmd(ctrl, argp);
f3ca80fc
CH
972 case NVME_IOCTL_RESET:
973 dev_warn(ctrl->dev, "resetting controller\n");
974 return ctrl->ops->reset_ctrl(ctrl);
975 case NVME_IOCTL_SUBSYS_RESET:
976 return nvme_reset_subsystem(ctrl);
977 default:
978 return -ENOTTY;
979 }
980}
981
982static const struct file_operations nvme_dev_fops = {
983 .owner = THIS_MODULE,
984 .open = nvme_dev_open,
985 .release = nvme_dev_release,
986 .unlocked_ioctl = nvme_dev_ioctl,
987 .compat_ioctl = nvme_dev_ioctl,
988};
989
990static ssize_t nvme_sysfs_reset(struct device *dev,
991 struct device_attribute *attr, const char *buf,
992 size_t count)
993{
994 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
995 int ret;
996
997 ret = ctrl->ops->reset_ctrl(ctrl);
998 if (ret < 0)
999 return ret;
1000 return count;
1673f1f0 1001}
f3ca80fc 1002static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1673f1f0 1003
2b9b6e86
KB
1004static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1005 char *buf)
1006{
1007 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1008 return sprintf(buf, "%pU\n", ns->uuid);
1009}
1010static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1011
1012static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
1013 char *buf)
1014{
1015 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1016 return sprintf(buf, "%8phd\n", ns->eui);
1017}
1018static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
1019
1020static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
1021 char *buf)
1022{
1023 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1024 return sprintf(buf, "%d\n", ns->ns_id);
1025}
1026static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1027
1028static struct attribute *nvme_ns_attrs[] = {
1029 &dev_attr_uuid.attr,
1030 &dev_attr_eui.attr,
1031 &dev_attr_nsid.attr,
1032 NULL,
1033};
1034
1035static umode_t nvme_attrs_are_visible(struct kobject *kobj,
1036 struct attribute *a, int n)
1037{
1038 struct device *dev = container_of(kobj, struct device, kobj);
1039 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1040
1041 if (a == &dev_attr_uuid.attr) {
1042 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1043 return 0;
1044 }
1045 if (a == &dev_attr_eui.attr) {
1046 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1047 return 0;
1048 }
1049 return a->mode;
1050}
1051
1052static const struct attribute_group nvme_ns_attr_group = {
1053 .attrs = nvme_ns_attrs,
1054 .is_visible = nvme_attrs_are_visible,
1055};
1056
779ff756
KB
1057#define nvme_show_function(field) \
1058static ssize_t field##_show(struct device *dev, \
1059 struct device_attribute *attr, char *buf) \
1060{ \
1061 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
1062 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
1063} \
1064static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1065
1066nvme_show_function(model);
1067nvme_show_function(serial);
1068nvme_show_function(firmware_rev);
1069
1070static struct attribute *nvme_dev_attrs[] = {
1071 &dev_attr_reset_controller.attr,
1072 &dev_attr_model.attr,
1073 &dev_attr_serial.attr,
1074 &dev_attr_firmware_rev.attr,
1075 NULL
1076};
1077
1078static struct attribute_group nvme_dev_attrs_group = {
1079 .attrs = nvme_dev_attrs,
1080};
1081
1082static const struct attribute_group *nvme_dev_attr_groups[] = {
1083 &nvme_dev_attrs_group,
1084 NULL,
1085};
1086
5bae7f73
CH
1087static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
1088{
1089 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
1090 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
1091
1092 return nsa->ns_id - nsb->ns_id;
1093}
1094
1095static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1096{
1097 struct nvme_ns *ns;
1098
69d3b8ac
CH
1099 lockdep_assert_held(&ctrl->namespaces_mutex);
1100
5bae7f73
CH
1101 list_for_each_entry(ns, &ctrl->namespaces, list) {
1102 if (ns->ns_id == nsid)
1103 return ns;
1104 if (ns->ns_id > nsid)
1105 break;
1106 }
1107 return NULL;
1108}
1109
1110static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1111{
1112 struct nvme_ns *ns;
1113 struct gendisk *disk;
1114 int node = dev_to_node(ctrl->dev);
1115
69d3b8ac
CH
1116 lockdep_assert_held(&ctrl->namespaces_mutex);
1117
5bae7f73
CH
1118 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
1119 if (!ns)
1120 return;
1121
075790eb
KB
1122 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
1123 if (ns->instance < 0)
1124 goto out_free_ns;
1125
5bae7f73
CH
1126 ns->queue = blk_mq_init_queue(ctrl->tagset);
1127 if (IS_ERR(ns->queue))
075790eb 1128 goto out_release_instance;
5bae7f73
CH
1129 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1130 ns->queue->queuedata = ns;
1131 ns->ctrl = ctrl;
1132
1133 disk = alloc_disk_node(0, node);
1134 if (!disk)
1135 goto out_free_queue;
1136
1137 kref_init(&ns->kref);
1138 ns->ns_id = nsid;
1139 ns->disk = disk;
1140 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
5bae7f73
CH
1141
1142 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1143 if (ctrl->max_hw_sectors) {
1144 blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
1145 blk_queue_max_segments(ns->queue,
1146 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
1147 }
1148 if (ctrl->stripe_size)
1149 blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
1150 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1151 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
1152 blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
1153
1154 disk->major = nvme_major;
1155 disk->first_minor = 0;
1156 disk->fops = &nvme_fops;
1157 disk->private_data = ns;
1158 disk->queue = ns->queue;
1159 disk->driverfs_dev = ctrl->device;
1160 disk->flags = GENHD_FL_EXT_DEVT;
075790eb 1161 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
5bae7f73 1162
5bae7f73
CH
1163 if (nvme_revalidate_disk(ns->disk))
1164 goto out_free_disk;
1165
4b9d5b15 1166 list_add_tail(&ns->list, &ctrl->namespaces);
5bae7f73 1167 kref_get(&ctrl->kref);
2b9b6e86
KB
1168 if (ns->type == NVME_NS_LIGHTNVM)
1169 return;
5bae7f73 1170
2b9b6e86
KB
1171 add_disk(ns->disk);
1172 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1173 &nvme_ns_attr_group))
1174 pr_warn("%s: failed to create sysfs group for identification\n",
1175 ns->disk->disk_name);
5bae7f73
CH
1176 return;
1177 out_free_disk:
1178 kfree(disk);
5bae7f73
CH
1179 out_free_queue:
1180 blk_cleanup_queue(ns->queue);
075790eb
KB
1181 out_release_instance:
1182 ida_simple_remove(&ctrl->ns_ida, ns->instance);
5bae7f73
CH
1183 out_free_ns:
1184 kfree(ns);
1185}
1186
1187static void nvme_ns_remove(struct nvme_ns *ns)
1188{
1189 bool kill = nvme_io_incapable(ns->ctrl) &&
1190 !blk_queue_dying(ns->queue);
1191
69d3b8ac
CH
1192 lockdep_assert_held(&ns->ctrl->namespaces_mutex);
1193
3e1e21c7 1194 if (kill) {
5bae7f73 1195 blk_set_queue_dying(ns->queue);
3e1e21c7
LT
1196
1197 /*
1198 * The controller was shutdown first if we got here through
1199 * device removal. The shutdown may requeue outstanding
1200 * requests. These need to be aborted immediately so
1201 * del_gendisk doesn't block indefinitely for their completion.
1202 */
1203 blk_mq_abort_requeue_list(ns->queue);
1204 }
5bae7f73
CH
1205 if (ns->disk->flags & GENHD_FL_UP) {
1206 if (blk_get_integrity(ns->disk))
1207 blk_integrity_unregister(ns->disk);
2b9b6e86
KB
1208 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1209 &nvme_ns_attr_group);
5bae7f73
CH
1210 del_gendisk(ns->disk);
1211 }
1212 if (kill || !blk_queue_dying(ns->queue)) {
1213 blk_mq_abort_requeue_list(ns->queue);
1214 blk_cleanup_queue(ns->queue);
1215 }
1216 list_del_init(&ns->list);
1217 nvme_put_ns(ns);
1218}
1219
540c801c
KB
1220static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1221{
1222 struct nvme_ns *ns;
1223
1224 ns = nvme_find_ns(ctrl, nsid);
1225 if (ns) {
1226 if (revalidate_disk(ns->disk))
1227 nvme_ns_remove(ns);
1228 } else
1229 nvme_alloc_ns(ctrl, nsid);
1230}
1231
1232static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
1233{
1234 struct nvme_ns *ns;
1235 __le32 *ns_list;
1236 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
1237 int ret = 0;
1238
1239 ns_list = kzalloc(0x1000, GFP_KERNEL);
1240 if (!ns_list)
1241 return -ENOMEM;
1242
1243 for (i = 0; i < num_lists; i++) {
1244 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
1245 if (ret)
1246 goto out;
1247
1248 for (j = 0; j < min(nn, 1024U); j++) {
1249 nsid = le32_to_cpu(ns_list[j]);
1250 if (!nsid)
1251 goto out;
1252
1253 nvme_validate_ns(ctrl, nsid);
1254
1255 while (++prev < nsid) {
1256 ns = nvme_find_ns(ctrl, prev);
1257 if (ns)
1258 nvme_ns_remove(ns);
1259 }
1260 }
1261 nn -= j;
1262 }
1263 out:
1264 kfree(ns_list);
1265 return ret;
1266}
1267
5bae7f73
CH
1268static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
1269{
1270 struct nvme_ns *ns, *next;
1271 unsigned i;
1272
69d3b8ac
CH
1273 lockdep_assert_held(&ctrl->namespaces_mutex);
1274
540c801c
KB
1275 for (i = 1; i <= nn; i++)
1276 nvme_validate_ns(ctrl, i);
1277
5bae7f73
CH
1278 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
1279 if (ns->ns_id > nn)
1280 nvme_ns_remove(ns);
1281 }
5bae7f73
CH
1282}
1283
1284void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
1285{
1286 struct nvme_id_ctrl *id;
540c801c 1287 unsigned nn;
5bae7f73
CH
1288
1289 if (nvme_identify_ctrl(ctrl, &id))
1290 return;
540c801c 1291
69d3b8ac 1292 mutex_lock(&ctrl->namespaces_mutex);
540c801c
KB
1293 nn = le32_to_cpu(id->nn);
1294 if (ctrl->vs >= NVME_VS(1, 1) &&
1295 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1296 if (!nvme_scan_ns_list(ctrl, nn))
1297 goto done;
1298 }
5bae7f73 1299 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
540c801c
KB
1300 done:
1301 list_sort(NULL, &ctrl->namespaces, ns_cmp);
69d3b8ac 1302 mutex_unlock(&ctrl->namespaces_mutex);
5bae7f73
CH
1303 kfree(id);
1304}
1305
1306void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1307{
1308 struct nvme_ns *ns, *next;
1309
69d3b8ac 1310 mutex_lock(&ctrl->namespaces_mutex);
5bae7f73
CH
1311 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1312 nvme_ns_remove(ns);
69d3b8ac 1313 mutex_unlock(&ctrl->namespaces_mutex);
5bae7f73
CH
1314}
1315
f3ca80fc
CH
1316static DEFINE_IDA(nvme_instance_ida);
1317
1318static int nvme_set_instance(struct nvme_ctrl *ctrl)
1319{
1320 int instance, error;
1321
1322 do {
1323 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1324 return -ENODEV;
1325
1326 spin_lock(&dev_list_lock);
1327 error = ida_get_new(&nvme_instance_ida, &instance);
1328 spin_unlock(&dev_list_lock);
1329 } while (error == -EAGAIN);
1330
1331 if (error)
1332 return -ENODEV;
1333
1334 ctrl->instance = instance;
1335 return 0;
1336}
1337
1338static void nvme_release_instance(struct nvme_ctrl *ctrl)
1339{
1340 spin_lock(&dev_list_lock);
1341 ida_remove(&nvme_instance_ida, ctrl->instance);
1342 spin_unlock(&dev_list_lock);
1343}
1344
53029b04
KB
1345void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
1346 {
53029b04 1347 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
f3ca80fc
CH
1348
1349 spin_lock(&dev_list_lock);
1350 list_del(&ctrl->node);
1351 spin_unlock(&dev_list_lock);
53029b04
KB
1352}
1353
1354static void nvme_free_ctrl(struct kref *kref)
1355{
1356 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
f3ca80fc
CH
1357
1358 put_device(ctrl->device);
1359 nvme_release_instance(ctrl);
075790eb 1360 ida_destroy(&ctrl->ns_ida);
f3ca80fc
CH
1361
1362 ctrl->ops->free_ctrl(ctrl);
1363}
1364
1365void nvme_put_ctrl(struct nvme_ctrl *ctrl)
1366{
1367 kref_put(&ctrl->kref, nvme_free_ctrl);
1368}
1369
1370/*
1371 * Initialize a NVMe controller structures. This needs to be called during
1372 * earliest initialization so that we have the initialized structured around
1373 * during probing.
1374 */
1375int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1376 const struct nvme_ctrl_ops *ops, unsigned long quirks)
1377{
1378 int ret;
1379
1380 INIT_LIST_HEAD(&ctrl->namespaces);
69d3b8ac 1381 mutex_init(&ctrl->namespaces_mutex);
f3ca80fc
CH
1382 kref_init(&ctrl->kref);
1383 ctrl->dev = dev;
1384 ctrl->ops = ops;
1385 ctrl->quirks = quirks;
1386
1387 ret = nvme_set_instance(ctrl);
1388 if (ret)
1389 goto out;
1390
779ff756 1391 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
f3ca80fc 1392 MKDEV(nvme_char_major, ctrl->instance),
779ff756
KB
1393 dev, nvme_dev_attr_groups,
1394 "nvme%d", ctrl->instance);
f3ca80fc
CH
1395 if (IS_ERR(ctrl->device)) {
1396 ret = PTR_ERR(ctrl->device);
1397 goto out_release_instance;
1398 }
1399 get_device(ctrl->device);
1400 dev_set_drvdata(ctrl->device, ctrl);
075790eb 1401 ida_init(&ctrl->ns_ida);
f3ca80fc 1402
f3ca80fc
CH
1403 spin_lock(&dev_list_lock);
1404 list_add_tail(&ctrl->node, &nvme_ctrl_list);
1405 spin_unlock(&dev_list_lock);
1406
1407 return 0;
f3ca80fc
CH
1408out_release_instance:
1409 nvme_release_instance(ctrl);
1410out:
1411 return ret;
1412}
1413
25646264 1414void nvme_stop_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
1415{
1416 struct nvme_ns *ns;
1417
69d3b8ac 1418 mutex_lock(&ctrl->namespaces_mutex);
363c9aac 1419 list_for_each_entry(ns, &ctrl->namespaces, list) {
363c9aac
SG
1420 spin_lock_irq(ns->queue->queue_lock);
1421 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
1422 spin_unlock_irq(ns->queue->queue_lock);
1423
1424 blk_mq_cancel_requeue_work(ns->queue);
1425 blk_mq_stop_hw_queues(ns->queue);
1426 }
69d3b8ac 1427 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac
SG
1428}
1429
25646264 1430void nvme_start_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
1431{
1432 struct nvme_ns *ns;
1433
69d3b8ac 1434 mutex_lock(&ctrl->namespaces_mutex);
363c9aac
SG
1435 list_for_each_entry(ns, &ctrl->namespaces, list) {
1436 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
363c9aac
SG
1437 blk_mq_start_stopped_hw_queues(ns->queue, true);
1438 blk_mq_kick_requeue_list(ns->queue);
1439 }
69d3b8ac 1440 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac
SG
1441}
1442
5bae7f73
CH
1443int __init nvme_core_init(void)
1444{
1445 int result;
1446
1447 result = register_blkdev(nvme_major, "nvme");
1448 if (result < 0)
1449 return result;
1450 else if (result > 0)
1451 nvme_major = result;
1452
f3ca80fc
CH
1453 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
1454 &nvme_dev_fops);
1455 if (result < 0)
1456 goto unregister_blkdev;
1457 else if (result > 0)
1458 nvme_char_major = result;
1459
1460 nvme_class = class_create(THIS_MODULE, "nvme");
1461 if (IS_ERR(nvme_class)) {
1462 result = PTR_ERR(nvme_class);
1463 goto unregister_chrdev;
1464 }
1465
5bae7f73 1466 return 0;
f3ca80fc
CH
1467
1468 unregister_chrdev:
1469 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1470 unregister_blkdev:
1471 unregister_blkdev(nvme_major, "nvme");
1472 return result;
5bae7f73
CH
1473}
1474
1475void nvme_core_exit(void)
1476{
1477 unregister_blkdev(nvme_major, "nvme");
f3ca80fc
CH
1478 class_destroy(nvme_class);
1479 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
5bae7f73 1480}