nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns
[linux-2.6-block.git] / drivers / nvme / target / core.c
CommitLineData
77141dc6 1// SPDX-License-Identifier: GPL-2.0
a07b4970
CH
2/*
3 * Common code for the NVMe target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
a07b4970
CH
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
28b89118 8#include <linux/random.h>
b2d09103 9#include <linux/rculist.h>
c6925093 10#include <linux/pci-p2pdma.h>
a5dffbb6 11#include <linux/scatterlist.h>
b2d09103 12
a5448fdc
MI
13#define CREATE_TRACE_POINTS
14#include "trace.h"
15
a07b4970
CH
16#include "nvmet.h"
17
55eb942e 18struct workqueue_struct *buffered_io_wq;
e929f06d 19static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
15fbad96 20static DEFINE_IDA(cntlid_ida);
a07b4970
CH
21
22/*
23 * This read/write semaphore is used to synchronize access to configuration
24 * information on a target system that will result in discovery log page
25 * information change for at least one host.
26 * The full list of resources to protected by this semaphore is:
27 *
28 * - subsystems list
29 * - per-subsystem allowed hosts list
30 * - allow_any_host subsystem attribute
31 * - nvmet_genctr
32 * - the nvmet_transports array
33 *
34 * When updating any of those lists/structures write lock should be obtained,
35 * while when reading (popolating discovery log page or checking host-subsystem
36 * link) read lock is obtained to allow concurrent reads.
37 */
38DECLARE_RWSEM(nvmet_config_sem);
39
72efd25d
CH
40u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
41u64 nvmet_ana_chgcnt;
42DECLARE_RWSEM(nvmet_ana_sem);
43
c6aa3542
CK
44inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
45{
46 u16 status;
47
48 switch (errno) {
49 case -ENOSPC:
50 req->error_loc = offsetof(struct nvme_rw_command, length);
51 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
52 break;
53 case -EREMOTEIO:
54 req->error_loc = offsetof(struct nvme_rw_command, slba);
55 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
56 break;
57 case -EOPNOTSUPP:
58 req->error_loc = offsetof(struct nvme_common_command, opcode);
59 switch (req->cmd->common.opcode) {
60 case nvme_cmd_dsm:
61 case nvme_cmd_write_zeroes:
62 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
63 break;
64 default:
65 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
66 }
67 break;
68 case -ENODATA:
69 req->error_loc = offsetof(struct nvme_rw_command, nsid);
70 status = NVME_SC_ACCESS_DENIED;
71 break;
72 case -EIO:
73 /* FALLTHRU */
74 default:
75 req->error_loc = offsetof(struct nvme_common_command, opcode);
76 status = NVME_SC_INTERNAL | NVME_SC_DNR;
77 }
78
79 return status;
80}
81
a07b4970
CH
82static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
83 const char *subsysnqn);
84
85u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
86 size_t len)
87{
e81446af
CK
88 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
89 req->error_loc = offsetof(struct nvme_common_command, dptr);
a07b4970 90 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 91 }
a07b4970
CH
92 return 0;
93}
94
95u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
96{
e81446af
CK
97 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
98 req->error_loc = offsetof(struct nvme_common_command, dptr);
a07b4970 99 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 100 }
a07b4970
CH
101 return 0;
102}
103
c7759fff
CH
104u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
105{
e81446af
CK
106 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
107 req->error_loc = offsetof(struct nvme_common_command, dptr);
c7759fff 108 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 109 }
c7759fff
CH
110 return 0;
111}
112
ba2dec35
RS
113static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
114{
115 struct nvmet_ns *ns;
116
117 if (list_empty(&subsys->namespaces))
118 return 0;
119
120 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
121 return ns->nsid;
122}
123
a07b4970
CH
124static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
125{
126 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
127}
128
129static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
130{
131 struct nvmet_req *req;
132
133 while (1) {
134 mutex_lock(&ctrl->lock);
135 if (!ctrl->nr_async_event_cmds) {
136 mutex_unlock(&ctrl->lock);
137 return;
138 }
139
140 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
141 mutex_unlock(&ctrl->lock);
142 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
143 }
144}
145
146static void nvmet_async_event_work(struct work_struct *work)
147{
148 struct nvmet_ctrl *ctrl =
149 container_of(work, struct nvmet_ctrl, async_event_work);
150 struct nvmet_async_event *aen;
151 struct nvmet_req *req;
152
153 while (1) {
154 mutex_lock(&ctrl->lock);
155 aen = list_first_entry_or_null(&ctrl->async_events,
156 struct nvmet_async_event, entry);
157 if (!aen || !ctrl->nr_async_event_cmds) {
158 mutex_unlock(&ctrl->lock);
159 return;
160 }
161
162 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
163 nvmet_set_result(req, nvmet_async_event_result(aen));
164
165 list_del(&aen->entry);
166 kfree(aen);
167
168 mutex_unlock(&ctrl->lock);
169 nvmet_req_complete(req, 0);
170 }
171}
172
b662a078 173void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
a07b4970
CH
174 u8 event_info, u8 log_page)
175{
176 struct nvmet_async_event *aen;
177
178 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
179 if (!aen)
180 return;
181
182 aen->event_type = event_type;
183 aen->event_info = event_info;
184 aen->log_page = log_page;
185
186 mutex_lock(&ctrl->lock);
187 list_add_tail(&aen->entry, &ctrl->async_events);
188 mutex_unlock(&ctrl->lock);
189
190 schedule_work(&ctrl->async_event_work);
191}
192
c16734ea
CH
193static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
194{
195 u32 i;
196
197 mutex_lock(&ctrl->lock);
198 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
199 goto out_unlock;
200
201 for (i = 0; i < ctrl->nr_changed_ns; i++) {
202 if (ctrl->changed_ns_list[i] == nsid)
203 goto out_unlock;
204 }
205
206 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
207 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
208 ctrl->nr_changed_ns = U32_MAX;
209 goto out_unlock;
210 }
211
212 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
213out_unlock:
214 mutex_unlock(&ctrl->lock);
215}
216
dedf0be5 217void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
c16734ea
CH
218{
219 struct nvmet_ctrl *ctrl;
220
013a63ef
MG
221 lockdep_assert_held(&subsys->lock);
222
c16734ea
CH
223 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
224 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
7114ddeb 225 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
c86b8f7b 226 continue;
c16734ea
CH
227 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
228 NVME_AER_NOTICE_NS_CHANGED,
229 NVME_LOG_CHANGED_NS);
230 }
231}
232
62ac0d32
CH
233void nvmet_send_ana_event(struct nvmet_subsys *subsys,
234 struct nvmet_port *port)
235{
236 struct nvmet_ctrl *ctrl;
237
238 mutex_lock(&subsys->lock);
239 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
240 if (port && ctrl->port != port)
241 continue;
7114ddeb 242 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
62ac0d32
CH
243 continue;
244 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
245 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
246 }
247 mutex_unlock(&subsys->lock);
248}
249
250void nvmet_port_send_ana_event(struct nvmet_port *port)
251{
252 struct nvmet_subsys_link *p;
253
254 down_read(&nvmet_config_sem);
255 list_for_each_entry(p, &port->subsystems, entry)
256 nvmet_send_ana_event(p->subsys, port);
257 up_read(&nvmet_config_sem);
258}
259
e929f06d 260int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
a07b4970
CH
261{
262 int ret = 0;
263
264 down_write(&nvmet_config_sem);
265 if (nvmet_transports[ops->type])
266 ret = -EINVAL;
267 else
268 nvmet_transports[ops->type] = ops;
269 up_write(&nvmet_config_sem);
270
271 return ret;
272}
273EXPORT_SYMBOL_GPL(nvmet_register_transport);
274
e929f06d 275void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
a07b4970
CH
276{
277 down_write(&nvmet_config_sem);
278 nvmet_transports[ops->type] = NULL;
279 up_write(&nvmet_config_sem);
280}
281EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
282
283int nvmet_enable_port(struct nvmet_port *port)
284{
e929f06d 285 const struct nvmet_fabrics_ops *ops;
a07b4970
CH
286 int ret;
287
288 lockdep_assert_held(&nvmet_config_sem);
289
290 ops = nvmet_transports[port->disc_addr.trtype];
291 if (!ops) {
292 up_write(&nvmet_config_sem);
293 request_module("nvmet-transport-%d", port->disc_addr.trtype);
294 down_write(&nvmet_config_sem);
295 ops = nvmet_transports[port->disc_addr.trtype];
296 if (!ops) {
297 pr_err("transport type %d not supported\n",
298 port->disc_addr.trtype);
299 return -EINVAL;
300 }
301 }
302
303 if (!try_module_get(ops->owner))
304 return -EINVAL;
305
306 ret = ops->add_port(port);
307 if (ret) {
308 module_put(ops->owner);
309 return ret;
310 }
311
0d5ee2b2
SW
312 /* If the transport didn't set inline_data_size, then disable it. */
313 if (port->inline_data_size < 0)
314 port->inline_data_size = 0;
315
a07b4970 316 port->enabled = true;
9d09dd8d 317 port->tr_ops = ops;
a07b4970
CH
318 return 0;
319}
320
321void nvmet_disable_port(struct nvmet_port *port)
322{
e929f06d 323 const struct nvmet_fabrics_ops *ops;
a07b4970
CH
324
325 lockdep_assert_held(&nvmet_config_sem);
326
327 port->enabled = false;
9d09dd8d 328 port->tr_ops = NULL;
a07b4970
CH
329
330 ops = nvmet_transports[port->disc_addr.trtype];
331 ops->remove_port(port);
332 module_put(ops->owner);
333}
334
335static void nvmet_keep_alive_timer(struct work_struct *work)
336{
337 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
338 struct nvmet_ctrl, ka_work);
c09305ae
SG
339 bool cmd_seen = ctrl->cmd_seen;
340
341 ctrl->cmd_seen = false;
342 if (cmd_seen) {
343 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
344 ctrl->cntlid);
345 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
346 return;
347 }
a07b4970
CH
348
349 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
350 ctrl->cntlid, ctrl->kato);
351
23a8ed4a 352 nvmet_ctrl_fatal_error(ctrl);
a07b4970
CH
353}
354
355static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
356{
357 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
358 ctrl->cntlid, ctrl->kato);
359
360 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
361 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
362}
363
364static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
365{
366 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
367
368 cancel_delayed_work_sync(&ctrl->ka_work);
369}
370
371static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
372 __le32 nsid)
373{
374 struct nvmet_ns *ns;
375
376 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
377 if (ns->nsid == le32_to_cpu(nsid))
378 return ns;
379 }
380
381 return NULL;
382}
383
384struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
385{
386 struct nvmet_ns *ns;
387
388 rcu_read_lock();
389 ns = __nvmet_find_namespace(ctrl, nsid);
390 if (ns)
391 percpu_ref_get(&ns->ref);
392 rcu_read_unlock();
393
394 return ns;
395}
396
397static void nvmet_destroy_namespace(struct percpu_ref *ref)
398{
399 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
400
401 complete(&ns->disable_done);
402}
403
404void nvmet_put_namespace(struct nvmet_ns *ns)
405{
406 percpu_ref_put(&ns->ref);
407}
408
d5eff33e
CK
409static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
410{
411 nvmet_bdev_ns_disable(ns);
412 nvmet_file_ns_disable(ns);
413}
414
c6925093
LG
415static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
416{
417 int ret;
418 struct pci_dev *p2p_dev;
419
420 if (!ns->use_p2pmem)
421 return 0;
422
423 if (!ns->bdev) {
424 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
425 return -EINVAL;
426 }
427
428 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
429 pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
430 ns->device_path);
431 return -EINVAL;
432 }
433
434 if (ns->p2p_dev) {
435 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
436 if (ret < 0)
437 return -EINVAL;
438 } else {
439 /*
440 * Right now we just check that there is p2pmem available so
441 * we can report an error to the user right away if there
442 * is not. We'll find the actual device to use once we
443 * setup the controller when the port's device is available.
444 */
445
446 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
447 if (!p2p_dev) {
448 pr_err("no peer-to-peer memory is available for %s\n",
449 ns->device_path);
450 return -EINVAL;
451 }
452
453 pci_dev_put(p2p_dev);
454 }
455
456 return 0;
457}
458
459/*
460 * Note: ctrl->subsys->lock should be held when calling this function
461 */
462static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
463 struct nvmet_ns *ns)
464{
465 struct device *clients[2];
466 struct pci_dev *p2p_dev;
467 int ret;
468
21d3bbdd 469 if (!ctrl->p2p_client || !ns->use_p2pmem)
c6925093
LG
470 return;
471
472 if (ns->p2p_dev) {
473 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
474 if (ret < 0)
475 return;
476
477 p2p_dev = pci_dev_get(ns->p2p_dev);
478 } else {
479 clients[0] = ctrl->p2p_client;
480 clients[1] = nvmet_ns_dev(ns);
481
482 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
483 if (!p2p_dev) {
484 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
485 dev_name(ctrl->p2p_client), ns->device_path);
486 return;
487 }
488 }
489
490 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
491 if (ret < 0)
492 pci_dev_put(p2p_dev);
493
494 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
495 ns->nsid);
496}
497
a07b4970
CH
498int nvmet_ns_enable(struct nvmet_ns *ns)
499{
500 struct nvmet_subsys *subsys = ns->subsys;
c6925093 501 struct nvmet_ctrl *ctrl;
793c7cfc 502 int ret;
a07b4970
CH
503
504 mutex_lock(&subsys->lock);
793c7cfc 505 ret = 0;
e4fcf07c 506 if (ns->enabled)
a07b4970
CH
507 goto out_unlock;
508
e84c2091
MG
509 ret = -EMFILE;
510 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
511 goto out_unlock;
512
d5eff33e 513 ret = nvmet_bdev_ns_enable(ns);
405a7519 514 if (ret == -ENOTBLK)
d5eff33e
CK
515 ret = nvmet_file_ns_enable(ns);
516 if (ret)
a07b4970 517 goto out_unlock;
a07b4970 518
c6925093
LG
519 ret = nvmet_p2pmem_ns_enable(ns);
520 if (ret)
a536b497 521 goto out_dev_disable;
c6925093
LG
522
523 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
524 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
525
a07b4970
CH
526 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
527 0, GFP_KERNEL);
528 if (ret)
d5eff33e 529 goto out_dev_put;
a07b4970
CH
530
531 if (ns->nsid > subsys->max_nsid)
532 subsys->max_nsid = ns->nsid;
533
534 /*
535 * The namespaces list needs to be sorted to simplify the implementation
536 * of the Identify Namepace List subcommand.
537 */
538 if (list_empty(&subsys->namespaces)) {
539 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
540 } else {
541 struct nvmet_ns *old;
542
543 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
544 BUG_ON(ns->nsid == old->nsid);
545 if (ns->nsid < old->nsid)
546 break;
547 }
548
549 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
550 }
793c7cfc 551 subsys->nr_namespaces++;
a07b4970 552
c16734ea 553 nvmet_ns_changed(subsys, ns->nsid);
e4fcf07c 554 ns->enabled = true;
a07b4970
CH
555 ret = 0;
556out_unlock:
557 mutex_unlock(&subsys->lock);
558 return ret;
d5eff33e 559out_dev_put:
c6925093
LG
560 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
561 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
a536b497 562out_dev_disable:
d5eff33e 563 nvmet_ns_dev_disable(ns);
a07b4970
CH
564 goto out_unlock;
565}
566
567void nvmet_ns_disable(struct nvmet_ns *ns)
568{
569 struct nvmet_subsys *subsys = ns->subsys;
c6925093 570 struct nvmet_ctrl *ctrl;
a07b4970
CH
571
572 mutex_lock(&subsys->lock);
e4fcf07c
SA
573 if (!ns->enabled)
574 goto out_unlock;
575
576 ns->enabled = false;
577 list_del_rcu(&ns->dev_link);
ba2dec35
RS
578 if (ns->nsid == subsys->max_nsid)
579 subsys->max_nsid = nvmet_max_nsid(subsys);
c6925093
LG
580
581 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
582 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
583
a07b4970
CH
584 mutex_unlock(&subsys->lock);
585
586 /*
587 * Now that we removed the namespaces from the lookup list, we
588 * can kill the per_cpu ref and wait for any remaining references
589 * to be dropped, as well as a RCU grace period for anyone only
590 * using the namepace under rcu_read_lock(). Note that we can't
591 * use call_rcu here as we need to ensure the namespaces have
592 * been fully destroyed before unloading the module.
593 */
594 percpu_ref_kill(&ns->ref);
595 synchronize_rcu();
596 wait_for_completion(&ns->disable_done);
597 percpu_ref_exit(&ns->ref);
598
599 mutex_lock(&subsys->lock);
c6925093 600
793c7cfc 601 subsys->nr_namespaces--;
c16734ea 602 nvmet_ns_changed(subsys, ns->nsid);
d5eff33e 603 nvmet_ns_dev_disable(ns);
e4fcf07c 604out_unlock:
a07b4970
CH
605 mutex_unlock(&subsys->lock);
606}
607
608void nvmet_ns_free(struct nvmet_ns *ns)
609{
610 nvmet_ns_disable(ns);
611
72efd25d
CH
612 down_write(&nvmet_ana_sem);
613 nvmet_ana_group_enabled[ns->anagrpid]--;
614 up_write(&nvmet_ana_sem);
615
a07b4970
CH
616 kfree(ns->device_path);
617 kfree(ns);
618}
619
620struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
621{
622 struct nvmet_ns *ns;
623
624 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
625 if (!ns)
626 return NULL;
627
628 INIT_LIST_HEAD(&ns->dev_link);
629 init_completion(&ns->disable_done);
630
631 ns->nsid = nsid;
632 ns->subsys = subsys;
72efd25d
CH
633
634 down_write(&nvmet_ana_sem);
635 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
636 nvmet_ana_group_enabled[ns->anagrpid]++;
637 up_write(&nvmet_ana_sem);
638
637dc0f3 639 uuid_gen(&ns->uuid);
55eb942e 640 ns->buffered_io = false;
a07b4970
CH
641
642 return ns;
643}
644
e6a622fd 645static void nvmet_update_sq_head(struct nvmet_req *req)
a07b4970 646{
f9cf2a64 647 if (req->sq->size) {
e6a622fd
SG
648 u32 old_sqhd, new_sqhd;
649
f9cf2a64
JS
650 do {
651 old_sqhd = req->sq->sqhd;
652 new_sqhd = (old_sqhd + 1) % req->sq->size;
653 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
654 old_sqhd);
655 }
fc6c9730 656 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
e6a622fd
SG
657}
658
76574f37
CK
659static void nvmet_set_error(struct nvmet_req *req, u16 status)
660{
661 struct nvmet_ctrl *ctrl = req->sq->ctrl;
662 struct nvme_error_slot *new_error_slot;
663 unsigned long flags;
664
fc6c9730 665 req->cqe->status = cpu_to_le16(status << 1);
76574f37 666
5698b805 667 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
76574f37
CK
668 return;
669
670 spin_lock_irqsave(&ctrl->error_lock, flags);
671 ctrl->err_counter++;
672 new_error_slot =
673 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
674
675 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
676 new_error_slot->sqid = cpu_to_le16(req->sq->qid);
677 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
678 new_error_slot->status_field = cpu_to_le16(status << 1);
679 new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
680 new_error_slot->lba = cpu_to_le64(req->error_slba);
681 new_error_slot->nsid = req->cmd->common.nsid;
682 spin_unlock_irqrestore(&ctrl->error_lock, flags);
683
684 /* set the more bit for this request */
fc6c9730 685 req->cqe->status |= cpu_to_le16(1 << 14);
76574f37
CK
686}
687
e6a622fd
SG
688static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
689{
690 if (!req->sq->sqhd_disabled)
691 nvmet_update_sq_head(req);
fc6c9730
MG
692 req->cqe->sq_id = cpu_to_le16(req->sq->qid);
693 req->cqe->command_id = req->cmd->common.command_id;
76574f37 694
cb019da3 695 if (unlikely(status))
76574f37 696 nvmet_set_error(req, status);
a5448fdc
MI
697
698 trace_nvmet_req_complete(req);
699
a07b4970
CH
700 if (req->ns)
701 nvmet_put_namespace(req->ns);
702 req->ops->queue_response(req);
703}
704
705void nvmet_req_complete(struct nvmet_req *req, u16 status)
706{
707 __nvmet_req_complete(req, status);
708 percpu_ref_put(&req->sq->ref);
709}
710EXPORT_SYMBOL_GPL(nvmet_req_complete);
711
712void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
713 u16 qid, u16 size)
714{
715 cq->qid = qid;
716 cq->size = size;
717
718 ctrl->cqs[qid] = cq;
719}
720
721void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
722 u16 qid, u16 size)
723{
bb1cc747 724 sq->sqhd = 0;
a07b4970
CH
725 sq->qid = qid;
726 sq->size = size;
727
728 ctrl->sqs[qid] = sq;
729}
730
427242ce
SG
731static void nvmet_confirm_sq(struct percpu_ref *ref)
732{
733 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
734
735 complete(&sq->confirm_done);
736}
737
a07b4970
CH
738void nvmet_sq_destroy(struct nvmet_sq *sq)
739{
740 /*
741 * If this is the admin queue, complete all AERs so that our
742 * queue doesn't have outstanding requests on it.
743 */
744 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
745 nvmet_async_events_free(sq->ctrl);
427242ce
SG
746 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
747 wait_for_completion(&sq->confirm_done);
a07b4970
CH
748 wait_for_completion(&sq->free_done);
749 percpu_ref_exit(&sq->ref);
750
751 if (sq->ctrl) {
752 nvmet_ctrl_put(sq->ctrl);
753 sq->ctrl = NULL; /* allows reusing the queue later */
754 }
755}
756EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
757
758static void nvmet_sq_free(struct percpu_ref *ref)
759{
760 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
761
762 complete(&sq->free_done);
763}
764
765int nvmet_sq_init(struct nvmet_sq *sq)
766{
767 int ret;
768
769 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
770 if (ret) {
771 pr_err("percpu_ref init failed!\n");
772 return ret;
773 }
774 init_completion(&sq->free_done);
427242ce 775 init_completion(&sq->confirm_done);
a07b4970
CH
776
777 return 0;
778}
779EXPORT_SYMBOL_GPL(nvmet_sq_init);
780
72efd25d
CH
781static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
782 struct nvmet_ns *ns)
783{
784 enum nvme_ana_state state = port->ana_state[ns->anagrpid];
785
786 if (unlikely(state == NVME_ANA_INACCESSIBLE))
787 return NVME_SC_ANA_INACCESSIBLE;
788 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
789 return NVME_SC_ANA_PERSISTENT_LOSS;
790 if (unlikely(state == NVME_ANA_CHANGE))
791 return NVME_SC_ANA_TRANSITION;
792 return 0;
793}
794
dedf0be5
CK
795static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
796{
797 if (unlikely(req->ns->readonly)) {
798 switch (req->cmd->common.opcode) {
799 case nvme_cmd_read:
800 case nvme_cmd_flush:
801 break;
802 default:
803 return NVME_SC_NS_WRITE_PROTECTED;
804 }
805 }
806
807 return 0;
808}
809
d5eff33e
CK
810static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
811{
812 struct nvme_command *cmd = req->cmd;
813 u16 ret;
814
815 ret = nvmet_check_ctrl_status(req, cmd);
816 if (unlikely(ret))
817 return ret;
818
819 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
e81446af
CK
820 if (unlikely(!req->ns)) {
821 req->error_loc = offsetof(struct nvme_common_command, nsid);
d5eff33e 822 return NVME_SC_INVALID_NS | NVME_SC_DNR;
e81446af 823 }
72efd25d 824 ret = nvmet_check_ana_state(req->port, req->ns);
e81446af
CK
825 if (unlikely(ret)) {
826 req->error_loc = offsetof(struct nvme_common_command, nsid);
dedf0be5 827 return ret;
e81446af 828 }
dedf0be5 829 ret = nvmet_io_cmd_check_access(req);
e81446af
CK
830 if (unlikely(ret)) {
831 req->error_loc = offsetof(struct nvme_common_command, nsid);
72efd25d 832 return ret;
e81446af 833 }
d5eff33e
CK
834
835 if (req->ns->file)
836 return nvmet_file_parse_io_cmd(req);
837 else
838 return nvmet_bdev_parse_io_cmd(req);
839}
840
a07b4970 841bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
e929f06d 842 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
a07b4970
CH
843{
844 u8 flags = req->cmd->common.flags;
845 u16 status;
846
847 req->cq = cq;
848 req->sq = sq;
849 req->ops = ops;
850 req->sg = NULL;
851 req->sg_cnt = 0;
5e62d5c9 852 req->transfer_len = 0;
fc6c9730
MG
853 req->cqe->status = 0;
854 req->cqe->sq_head = 0;
423b4487 855 req->ns = NULL;
5698b805 856 req->error_loc = NVMET_NO_ERROR_LOC;
e4a97625 857 req->error_slba = 0;
a07b4970 858
a5448fdc
MI
859 trace_nvmet_req_init(req, req->cmd);
860
a07b4970
CH
861 /* no support for fused commands yet */
862 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
e81446af 863 req->error_loc = offsetof(struct nvme_common_command, flags);
a07b4970
CH
864 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
865 goto fail;
866 }
867
bffd2b61
MG
868 /*
869 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
870 * contains an address of a single contiguous physical buffer that is
871 * byte aligned.
872 */
873 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
e81446af 874 req->error_loc = offsetof(struct nvme_common_command, flags);
a07b4970
CH
875 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
876 goto fail;
877 }
878
879 if (unlikely(!req->sq->ctrl))
880 /* will return an error for any Non-connect command: */
881 status = nvmet_parse_connect_cmd(req);
882 else if (likely(req->sq->qid != 0))
883 status = nvmet_parse_io_cmd(req);
7a1f46e3 884 else if (nvme_is_fabrics(req->cmd))
a07b4970
CH
885 status = nvmet_parse_fabrics_cmd(req);
886 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
887 status = nvmet_parse_discovery_cmd(req);
888 else
889 status = nvmet_parse_admin_cmd(req);
890
891 if (status)
892 goto fail;
893
894 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
895 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
896 goto fail;
897 }
898
c09305ae
SG
899 if (sq->ctrl)
900 sq->ctrl->cmd_seen = true;
901
a07b4970
CH
902 return true;
903
904fail:
905 __nvmet_req_complete(req, status);
906 return false;
907}
908EXPORT_SYMBOL_GPL(nvmet_req_init);
909
549f01ae
VI
910void nvmet_req_uninit(struct nvmet_req *req)
911{
912 percpu_ref_put(&req->sq->ref);
423b4487
SG
913 if (req->ns)
914 nvmet_put_namespace(req->ns);
549f01ae
VI
915}
916EXPORT_SYMBOL_GPL(nvmet_req_uninit);
917
5e62d5c9
CH
918void nvmet_req_execute(struct nvmet_req *req)
919{
e81446af
CK
920 if (unlikely(req->data_len != req->transfer_len)) {
921 req->error_loc = offsetof(struct nvme_common_command, dptr);
5e62d5c9 922 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
e81446af 923 } else
5e62d5c9
CH
924 req->execute(req);
925}
926EXPORT_SYMBOL_GPL(nvmet_req_execute);
927
5b2322e4
LG
928int nvmet_req_alloc_sgl(struct nvmet_req *req)
929{
c6925093
LG
930 struct pci_dev *p2p_dev = NULL;
931
932 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
933 if (req->sq->ctrl && req->ns)
934 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
935 req->ns->nsid);
936
937 req->p2p_dev = NULL;
938 if (req->sq->qid && p2p_dev) {
939 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
940 req->transfer_len);
941 if (req->sg) {
942 req->p2p_dev = p2p_dev;
943 return 0;
944 }
945 }
946
947 /*
948 * If no P2P memory was available we fallback to using
949 * regular memory
950 */
951 }
952
5b2322e4
LG
953 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
954 if (!req->sg)
955 return -ENOMEM;
956
957 return 0;
958}
959EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
960
961void nvmet_req_free_sgl(struct nvmet_req *req)
962{
c6925093
LG
963 if (req->p2p_dev)
964 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
965 else
966 sgl_free(req->sg);
967
5b2322e4
LG
968 req->sg = NULL;
969 req->sg_cnt = 0;
970}
971EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
972
a07b4970
CH
973static inline bool nvmet_cc_en(u32 cc)
974{
ad4e05b2 975 return (cc >> NVME_CC_EN_SHIFT) & 0x1;
a07b4970
CH
976}
977
978static inline u8 nvmet_cc_css(u32 cc)
979{
ad4e05b2 980 return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
a07b4970
CH
981}
982
983static inline u8 nvmet_cc_mps(u32 cc)
984{
ad4e05b2 985 return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
a07b4970
CH
986}
987
988static inline u8 nvmet_cc_ams(u32 cc)
989{
ad4e05b2 990 return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
a07b4970
CH
991}
992
993static inline u8 nvmet_cc_shn(u32 cc)
994{
ad4e05b2 995 return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
a07b4970
CH
996}
997
998static inline u8 nvmet_cc_iosqes(u32 cc)
999{
ad4e05b2 1000 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
a07b4970
CH
1001}
1002
1003static inline u8 nvmet_cc_iocqes(u32 cc)
1004{
ad4e05b2 1005 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
a07b4970
CH
1006}
1007
1008static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
1009{
1010 lockdep_assert_held(&ctrl->lock);
1011
1012 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1013 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
1014 nvmet_cc_mps(ctrl->cc) != 0 ||
1015 nvmet_cc_ams(ctrl->cc) != 0 ||
1016 nvmet_cc_css(ctrl->cc) != 0) {
1017 ctrl->csts = NVME_CSTS_CFS;
1018 return;
1019 }
1020
1021 ctrl->csts = NVME_CSTS_RDY;
d68a90e1
MG
1022
1023 /*
1024 * Controllers that are not yet enabled should not really enforce the
1025 * keep alive timeout, but we still want to track a timeout and cleanup
1026 * in case a host died before it enabled the controller. Hence, simply
1027 * reset the keep alive timer when the controller is enabled.
1028 */
1029 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
a07b4970
CH
1030}
1031
1032static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1033{
1034 lockdep_assert_held(&ctrl->lock);
1035
1036 /* XXX: tear down queues? */
1037 ctrl->csts &= ~NVME_CSTS_RDY;
1038 ctrl->cc = 0;
1039}
1040
1041void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1042{
1043 u32 old;
1044
1045 mutex_lock(&ctrl->lock);
1046 old = ctrl->cc;
1047 ctrl->cc = new;
1048
1049 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1050 nvmet_start_ctrl(ctrl);
1051 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1052 nvmet_clear_ctrl(ctrl);
1053 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1054 nvmet_clear_ctrl(ctrl);
1055 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1056 }
1057 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1058 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1059 mutex_unlock(&ctrl->lock);
1060}
1061
1062static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1063{
1064 /* command sets supported: NVMe command set: */
1065 ctrl->cap = (1ULL << 37);
1066 /* CC.EN timeout in 500msec units: */
1067 ctrl->cap |= (15ULL << 24);
1068 /* maximum queue entries supported: */
1069 ctrl->cap |= NVMET_QUEUE_SIZE - 1;
1070}
1071
1072u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
1073 struct nvmet_req *req, struct nvmet_ctrl **ret)
1074{
1075 struct nvmet_subsys *subsys;
1076 struct nvmet_ctrl *ctrl;
1077 u16 status = 0;
1078
1079 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1080 if (!subsys) {
1081 pr_warn("connect request for invalid subsystem %s!\n",
1082 subsysnqn);
fc6c9730 1083 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
a07b4970
CH
1084 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1085 }
1086
1087 mutex_lock(&subsys->lock);
1088 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1089 if (ctrl->cntlid == cntlid) {
1090 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1091 pr_warn("hostnqn mismatch.\n");
1092 continue;
1093 }
1094 if (!kref_get_unless_zero(&ctrl->ref))
1095 continue;
1096
1097 *ret = ctrl;
1098 goto out;
1099 }
1100 }
1101
1102 pr_warn("could not find controller %d for subsys %s / host %s\n",
1103 cntlid, subsysnqn, hostnqn);
fc6c9730 1104 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
a07b4970
CH
1105 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1106
1107out:
1108 mutex_unlock(&subsys->lock);
1109 nvmet_subsys_put(subsys);
1110 return status;
1111}
1112
64a0ca88
PP
1113u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
1114{
1115 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
b40b83e3 1116 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
64a0ca88
PP
1117 cmd->common.opcode, req->sq->qid);
1118 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1119 }
1120
1121 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
b40b83e3 1122 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
64a0ca88 1123 cmd->common.opcode, req->sq->qid);
64a0ca88
PP
1124 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1125 }
1126 return 0;
1127}
1128
253928ee 1129bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
a07b4970
CH
1130{
1131 struct nvmet_host_link *p;
1132
253928ee
SG
1133 lockdep_assert_held(&nvmet_config_sem);
1134
a07b4970
CH
1135 if (subsys->allow_any_host)
1136 return true;
1137
253928ee
SG
1138 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
1139 return true;
1140
a07b4970
CH
1141 list_for_each_entry(p, &subsys->hosts, entry) {
1142 if (!strcmp(nvmet_host_name(p->host), hostnqn))
1143 return true;
1144 }
1145
1146 return false;
1147}
1148
c6925093
LG
1149/*
1150 * Note: ctrl->subsys->lock should be held when calling this function
1151 */
1152static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1153 struct nvmet_req *req)
1154{
1155 struct nvmet_ns *ns;
1156
1157 if (!req->p2p_client)
1158 return;
1159
1160 ctrl->p2p_client = get_device(req->p2p_client);
1161
1162 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
1163 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1164}
1165
1166/*
1167 * Note: ctrl->subsys->lock should be held when calling this function
1168 */
1169static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1170{
1171 struct radix_tree_iter iter;
1172 void __rcu **slot;
1173
1174 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1175 pci_dev_put(radix_tree_deref_slot(slot));
1176
1177 put_device(ctrl->p2p_client);
1178}
1179
d11de63f
YY
1180static void nvmet_fatal_error_handler(struct work_struct *work)
1181{
1182 struct nvmet_ctrl *ctrl =
1183 container_of(work, struct nvmet_ctrl, fatal_err_work);
1184
1185 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1186 ctrl->ops->delete_ctrl(ctrl);
1187}
1188
a07b4970
CH
1189u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1190 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1191{
1192 struct nvmet_subsys *subsys;
1193 struct nvmet_ctrl *ctrl;
1194 int ret;
1195 u16 status;
1196
1197 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1198 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1199 if (!subsys) {
1200 pr_warn("connect request for invalid subsystem %s!\n",
1201 subsysnqn);
fc6c9730 1202 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
a07b4970
CH
1203 goto out;
1204 }
1205
1206 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1207 down_read(&nvmet_config_sem);
253928ee 1208 if (!nvmet_host_allowed(subsys, hostnqn)) {
a07b4970
CH
1209 pr_info("connect by host %s for subsystem %s not allowed\n",
1210 hostnqn, subsysnqn);
fc6c9730 1211 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
a07b4970 1212 up_read(&nvmet_config_sem);
130c24b5 1213 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
a07b4970
CH
1214 goto out_put_subsystem;
1215 }
1216 up_read(&nvmet_config_sem);
1217
1218 status = NVME_SC_INTERNAL;
1219 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1220 if (!ctrl)
1221 goto out_put_subsystem;
1222 mutex_init(&ctrl->lock);
1223
1224 nvmet_init_cap(ctrl);
1225
4ee43280
CH
1226 ctrl->port = req->port;
1227
a07b4970
CH
1228 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1229 INIT_LIST_HEAD(&ctrl->async_events);
c6925093 1230 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
d11de63f 1231 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
a07b4970
CH
1232
1233 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1234 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1235
1236 kref_init(&ctrl->ref);
1237 ctrl->subsys = subsys;
c86b8f7b 1238 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
a07b4970 1239
c16734ea
CH
1240 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1241 sizeof(__le32), GFP_KERNEL);
1242 if (!ctrl->changed_ns_list)
1243 goto out_free_ctrl;
1244
a07b4970
CH
1245 ctrl->cqs = kcalloc(subsys->max_qid + 1,
1246 sizeof(struct nvmet_cq *),
1247 GFP_KERNEL);
1248 if (!ctrl->cqs)
c16734ea 1249 goto out_free_changed_ns_list;
a07b4970
CH
1250
1251 ctrl->sqs = kcalloc(subsys->max_qid + 1,
1252 sizeof(struct nvmet_sq *),
1253 GFP_KERNEL);
1254 if (!ctrl->sqs)
1255 goto out_free_cqs;
1256
15fbad96 1257 ret = ida_simple_get(&cntlid_ida,
a07b4970
CH
1258 NVME_CNTLID_MIN, NVME_CNTLID_MAX,
1259 GFP_KERNEL);
1260 if (ret < 0) {
1261 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
1262 goto out_free_sqs;
1263 }
1264 ctrl->cntlid = ret;
1265
1266 ctrl->ops = req->ops;
a07b4970 1267
f9362ac1
JS
1268 /*
1269 * Discovery controllers may use some arbitrary high value
1270 * in order to cleanup stale discovery sessions
1271 */
1272 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
1273 kato = NVMET_DISC_KATO_MS;
1274
1275 /* keep-alive timeout in seconds */
1276 ctrl->kato = DIV_ROUND_UP(kato, 1000);
1277
e4a97625
CK
1278 ctrl->err_counter = 0;
1279 spin_lock_init(&ctrl->error_lock);
1280
a07b4970
CH
1281 nvmet_start_keep_alive_timer(ctrl);
1282
1283 mutex_lock(&subsys->lock);
1284 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
c6925093 1285 nvmet_setup_p2p_ns_map(ctrl, req);
a07b4970
CH
1286 mutex_unlock(&subsys->lock);
1287
1288 *ctrlp = ctrl;
1289 return 0;
1290
1291out_free_sqs:
1292 kfree(ctrl->sqs);
1293out_free_cqs:
1294 kfree(ctrl->cqs);
c16734ea
CH
1295out_free_changed_ns_list:
1296 kfree(ctrl->changed_ns_list);
a07b4970
CH
1297out_free_ctrl:
1298 kfree(ctrl);
1299out_put_subsystem:
1300 nvmet_subsys_put(subsys);
1301out:
1302 return status;
1303}
1304
1305static void nvmet_ctrl_free(struct kref *ref)
1306{
1307 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1308 struct nvmet_subsys *subsys = ctrl->subsys;
1309
a07b4970 1310 mutex_lock(&subsys->lock);
c6925093 1311 nvmet_release_p2p_ns_map(ctrl);
a07b4970
CH
1312 list_del(&ctrl->subsys_entry);
1313 mutex_unlock(&subsys->lock);
1314
6b1943af
IR
1315 nvmet_stop_keep_alive_timer(ctrl);
1316
06406d81
SG
1317 flush_work(&ctrl->async_event_work);
1318 cancel_work_sync(&ctrl->fatal_err_work);
1319
15fbad96 1320 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
a07b4970
CH
1321
1322 kfree(ctrl->sqs);
1323 kfree(ctrl->cqs);
c16734ea 1324 kfree(ctrl->changed_ns_list);
a07b4970 1325 kfree(ctrl);
6b1943af
IR
1326
1327 nvmet_subsys_put(subsys);
a07b4970
CH
1328}
1329
1330void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1331{
1332 kref_put(&ctrl->ref, nvmet_ctrl_free);
1333}
1334
a07b4970
CH
1335void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1336{
8242ddac
SG
1337 mutex_lock(&ctrl->lock);
1338 if (!(ctrl->csts & NVME_CSTS_CFS)) {
1339 ctrl->csts |= NVME_CSTS_CFS;
8242ddac
SG
1340 schedule_work(&ctrl->fatal_err_work);
1341 }
1342 mutex_unlock(&ctrl->lock);
a07b4970
CH
1343}
1344EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1345
1346static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1347 const char *subsysnqn)
1348{
1349 struct nvmet_subsys_link *p;
1350
1351 if (!port)
1352 return NULL;
1353
43a6f8fb 1354 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
a07b4970
CH
1355 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1356 return NULL;
1357 return nvmet_disc_subsys;
1358 }
1359
1360 down_read(&nvmet_config_sem);
1361 list_for_each_entry(p, &port->subsystems, entry) {
1362 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1363 NVMF_NQN_SIZE)) {
1364 if (!kref_get_unless_zero(&p->subsys->ref))
1365 break;
1366 up_read(&nvmet_config_sem);
1367 return p->subsys;
1368 }
1369 }
1370 up_read(&nvmet_config_sem);
1371 return NULL;
1372}
1373
1374struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1375 enum nvme_subsys_type type)
1376{
1377 struct nvmet_subsys *subsys;
1378
1379 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1380 if (!subsys)
6b7e631b 1381 return ERR_PTR(-ENOMEM);
a07b4970 1382
637dc0f3 1383 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
2e7f5d2a
JT
1384 /* generate a random serial number as our controllers are ephemeral: */
1385 get_random_bytes(&subsys->serial, sizeof(subsys->serial));
a07b4970
CH
1386
1387 switch (type) {
1388 case NVME_NQN_NVME:
1389 subsys->max_qid = NVMET_NR_QUEUES;
1390 break;
1391 case NVME_NQN_DISC:
1392 subsys->max_qid = 0;
1393 break;
1394 default:
1395 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1396 kfree(subsys);
6b7e631b 1397 return ERR_PTR(-EINVAL);
a07b4970
CH
1398 }
1399 subsys->type = type;
1400 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1401 GFP_KERNEL);
69555af2 1402 if (!subsys->subsysnqn) {
a07b4970 1403 kfree(subsys);
6b7e631b 1404 return ERR_PTR(-ENOMEM);
a07b4970
CH
1405 }
1406
1407 kref_init(&subsys->ref);
1408
1409 mutex_init(&subsys->lock);
1410 INIT_LIST_HEAD(&subsys->namespaces);
1411 INIT_LIST_HEAD(&subsys->ctrls);
a07b4970
CH
1412 INIT_LIST_HEAD(&subsys->hosts);
1413
1414 return subsys;
1415}
1416
1417static void nvmet_subsys_free(struct kref *ref)
1418{
1419 struct nvmet_subsys *subsys =
1420 container_of(ref, struct nvmet_subsys, ref);
1421
1422 WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1423
a07b4970
CH
1424 kfree(subsys->subsysnqn);
1425 kfree(subsys);
1426}
1427
344770b0
SG
1428void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1429{
1430 struct nvmet_ctrl *ctrl;
1431
1432 mutex_lock(&subsys->lock);
1433 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1434 ctrl->ops->delete_ctrl(ctrl);
1435 mutex_unlock(&subsys->lock);
1436}
1437
a07b4970
CH
1438void nvmet_subsys_put(struct nvmet_subsys *subsys)
1439{
1440 kref_put(&subsys->ref, nvmet_subsys_free);
1441}
1442
1443static int __init nvmet_init(void)
1444{
1445 int error;
1446
72efd25d
CH
1447 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1448
55eb942e
CK
1449 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1450 WQ_MEM_RECLAIM, 0);
1451 if (!buffered_io_wq) {
1452 error = -ENOMEM;
1453 goto out;
1454 }
72efd25d 1455
a07b4970
CH
1456 error = nvmet_init_discovery();
1457 if (error)
04db0e5e 1458 goto out_free_work_queue;
a07b4970
CH
1459
1460 error = nvmet_init_configfs();
1461 if (error)
1462 goto out_exit_discovery;
1463 return 0;
1464
1465out_exit_discovery:
1466 nvmet_exit_discovery();
04db0e5e
CK
1467out_free_work_queue:
1468 destroy_workqueue(buffered_io_wq);
a07b4970
CH
1469out:
1470 return error;
1471}
1472
1473static void __exit nvmet_exit(void)
1474{
1475 nvmet_exit_configfs();
1476 nvmet_exit_discovery();
15fbad96 1477 ida_destroy(&cntlid_ida);
55eb942e 1478 destroy_workqueue(buffered_io_wq);
a07b4970
CH
1479
1480 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1481 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1482}
1483
1484module_init(nvmet_init);
1485module_exit(nvmet_exit);
1486
1487MODULE_LICENSE("GPL v2");