nvme: export get and set features
[linux-2.6-block.git] / drivers / nvme / target / core.c
CommitLineData
77141dc6 1// SPDX-License-Identifier: GPL-2.0
a07b4970
CH
2/*
3 * Common code for the NVMe target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
a07b4970
CH
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
28b89118 8#include <linux/random.h>
b2d09103 9#include <linux/rculist.h>
c6925093 10#include <linux/pci-p2pdma.h>
a5dffbb6 11#include <linux/scatterlist.h>
b2d09103 12
a07b4970
CH
13#include "nvmet.h"
14
55eb942e 15struct workqueue_struct *buffered_io_wq;
e929f06d 16static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
15fbad96 17static DEFINE_IDA(cntlid_ida);
a07b4970
CH
18
19/*
20 * This read/write semaphore is used to synchronize access to configuration
21 * information on a target system that will result in discovery log page
22 * information change for at least one host.
23 * The full list of resources to protected by this semaphore is:
24 *
25 * - subsystems list
26 * - per-subsystem allowed hosts list
27 * - allow_any_host subsystem attribute
28 * - nvmet_genctr
29 * - the nvmet_transports array
30 *
31 * When updating any of those lists/structures write lock should be obtained,
32 * while when reading (popolating discovery log page or checking host-subsystem
33 * link) read lock is obtained to allow concurrent reads.
34 */
35DECLARE_RWSEM(nvmet_config_sem);
36
72efd25d
CH
37u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
38u64 nvmet_ana_chgcnt;
39DECLARE_RWSEM(nvmet_ana_sem);
40
c6aa3542
CK
41inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
42{
43 u16 status;
44
45 switch (errno) {
46 case -ENOSPC:
47 req->error_loc = offsetof(struct nvme_rw_command, length);
48 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
49 break;
50 case -EREMOTEIO:
51 req->error_loc = offsetof(struct nvme_rw_command, slba);
52 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
53 break;
54 case -EOPNOTSUPP:
55 req->error_loc = offsetof(struct nvme_common_command, opcode);
56 switch (req->cmd->common.opcode) {
57 case nvme_cmd_dsm:
58 case nvme_cmd_write_zeroes:
59 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
60 break;
61 default:
62 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
63 }
64 break;
65 case -ENODATA:
66 req->error_loc = offsetof(struct nvme_rw_command, nsid);
67 status = NVME_SC_ACCESS_DENIED;
68 break;
69 case -EIO:
70 /* FALLTHRU */
71 default:
72 req->error_loc = offsetof(struct nvme_common_command, opcode);
73 status = NVME_SC_INTERNAL | NVME_SC_DNR;
74 }
75
76 return status;
77}
78
a07b4970
CH
79static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
80 const char *subsysnqn);
81
82u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
83 size_t len)
84{
e81446af
CK
85 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
86 req->error_loc = offsetof(struct nvme_common_command, dptr);
a07b4970 87 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 88 }
a07b4970
CH
89 return 0;
90}
91
92u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
93{
e81446af
CK
94 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
95 req->error_loc = offsetof(struct nvme_common_command, dptr);
a07b4970 96 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 97 }
a07b4970
CH
98 return 0;
99}
100
c7759fff
CH
101u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
102{
e81446af
CK
103 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
104 req->error_loc = offsetof(struct nvme_common_command, dptr);
c7759fff 105 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
e81446af 106 }
c7759fff
CH
107 return 0;
108}
109
ba2dec35
RS
110static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
111{
112 struct nvmet_ns *ns;
113
114 if (list_empty(&subsys->namespaces))
115 return 0;
116
117 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
118 return ns->nsid;
119}
120
a07b4970
CH
121static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
122{
123 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
124}
125
126static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
127{
128 struct nvmet_req *req;
129
130 while (1) {
131 mutex_lock(&ctrl->lock);
132 if (!ctrl->nr_async_event_cmds) {
133 mutex_unlock(&ctrl->lock);
134 return;
135 }
136
137 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
138 mutex_unlock(&ctrl->lock);
139 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
140 }
141}
142
143static void nvmet_async_event_work(struct work_struct *work)
144{
145 struct nvmet_ctrl *ctrl =
146 container_of(work, struct nvmet_ctrl, async_event_work);
147 struct nvmet_async_event *aen;
148 struct nvmet_req *req;
149
150 while (1) {
151 mutex_lock(&ctrl->lock);
152 aen = list_first_entry_or_null(&ctrl->async_events,
153 struct nvmet_async_event, entry);
154 if (!aen || !ctrl->nr_async_event_cmds) {
155 mutex_unlock(&ctrl->lock);
156 return;
157 }
158
159 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
160 nvmet_set_result(req, nvmet_async_event_result(aen));
161
162 list_del(&aen->entry);
163 kfree(aen);
164
165 mutex_unlock(&ctrl->lock);
166 nvmet_req_complete(req, 0);
167 }
168}
169
b662a078 170void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
a07b4970
CH
171 u8 event_info, u8 log_page)
172{
173 struct nvmet_async_event *aen;
174
175 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
176 if (!aen)
177 return;
178
179 aen->event_type = event_type;
180 aen->event_info = event_info;
181 aen->log_page = log_page;
182
183 mutex_lock(&ctrl->lock);
184 list_add_tail(&aen->entry, &ctrl->async_events);
185 mutex_unlock(&ctrl->lock);
186
187 schedule_work(&ctrl->async_event_work);
188}
189
c16734ea
CH
190static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
191{
192 u32 i;
193
194 mutex_lock(&ctrl->lock);
195 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
196 goto out_unlock;
197
198 for (i = 0; i < ctrl->nr_changed_ns; i++) {
199 if (ctrl->changed_ns_list[i] == nsid)
200 goto out_unlock;
201 }
202
203 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
204 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
205 ctrl->nr_changed_ns = U32_MAX;
206 goto out_unlock;
207 }
208
209 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
210out_unlock:
211 mutex_unlock(&ctrl->lock);
212}
213
dedf0be5 214void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
c16734ea
CH
215{
216 struct nvmet_ctrl *ctrl;
217
013a63ef
MG
218 lockdep_assert_held(&subsys->lock);
219
c16734ea
CH
220 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
221 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
7114ddeb 222 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
c86b8f7b 223 continue;
c16734ea
CH
224 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
225 NVME_AER_NOTICE_NS_CHANGED,
226 NVME_LOG_CHANGED_NS);
227 }
228}
229
62ac0d32
CH
230void nvmet_send_ana_event(struct nvmet_subsys *subsys,
231 struct nvmet_port *port)
232{
233 struct nvmet_ctrl *ctrl;
234
235 mutex_lock(&subsys->lock);
236 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
237 if (port && ctrl->port != port)
238 continue;
7114ddeb 239 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
62ac0d32
CH
240 continue;
241 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
242 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
243 }
244 mutex_unlock(&subsys->lock);
245}
246
247void nvmet_port_send_ana_event(struct nvmet_port *port)
248{
249 struct nvmet_subsys_link *p;
250
251 down_read(&nvmet_config_sem);
252 list_for_each_entry(p, &port->subsystems, entry)
253 nvmet_send_ana_event(p->subsys, port);
254 up_read(&nvmet_config_sem);
255}
256
e929f06d 257int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
a07b4970
CH
258{
259 int ret = 0;
260
261 down_write(&nvmet_config_sem);
262 if (nvmet_transports[ops->type])
263 ret = -EINVAL;
264 else
265 nvmet_transports[ops->type] = ops;
266 up_write(&nvmet_config_sem);
267
268 return ret;
269}
270EXPORT_SYMBOL_GPL(nvmet_register_transport);
271
e929f06d 272void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
a07b4970
CH
273{
274 down_write(&nvmet_config_sem);
275 nvmet_transports[ops->type] = NULL;
276 up_write(&nvmet_config_sem);
277}
278EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
279
280int nvmet_enable_port(struct nvmet_port *port)
281{
e929f06d 282 const struct nvmet_fabrics_ops *ops;
a07b4970
CH
283 int ret;
284
285 lockdep_assert_held(&nvmet_config_sem);
286
287 ops = nvmet_transports[port->disc_addr.trtype];
288 if (!ops) {
289 up_write(&nvmet_config_sem);
290 request_module("nvmet-transport-%d", port->disc_addr.trtype);
291 down_write(&nvmet_config_sem);
292 ops = nvmet_transports[port->disc_addr.trtype];
293 if (!ops) {
294 pr_err("transport type %d not supported\n",
295 port->disc_addr.trtype);
296 return -EINVAL;
297 }
298 }
299
300 if (!try_module_get(ops->owner))
301 return -EINVAL;
302
303 ret = ops->add_port(port);
304 if (ret) {
305 module_put(ops->owner);
306 return ret;
307 }
308
0d5ee2b2
SW
309 /* If the transport didn't set inline_data_size, then disable it. */
310 if (port->inline_data_size < 0)
311 port->inline_data_size = 0;
312
a07b4970 313 port->enabled = true;
9d09dd8d 314 port->tr_ops = ops;
a07b4970
CH
315 return 0;
316}
317
318void nvmet_disable_port(struct nvmet_port *port)
319{
e929f06d 320 const struct nvmet_fabrics_ops *ops;
a07b4970
CH
321
322 lockdep_assert_held(&nvmet_config_sem);
323
324 port->enabled = false;
9d09dd8d 325 port->tr_ops = NULL;
a07b4970
CH
326
327 ops = nvmet_transports[port->disc_addr.trtype];
328 ops->remove_port(port);
329 module_put(ops->owner);
330}
331
332static void nvmet_keep_alive_timer(struct work_struct *work)
333{
334 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
335 struct nvmet_ctrl, ka_work);
c09305ae
SG
336 bool cmd_seen = ctrl->cmd_seen;
337
338 ctrl->cmd_seen = false;
339 if (cmd_seen) {
340 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
341 ctrl->cntlid);
342 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
343 return;
344 }
a07b4970
CH
345
346 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
347 ctrl->cntlid, ctrl->kato);
348
23a8ed4a 349 nvmet_ctrl_fatal_error(ctrl);
a07b4970
CH
350}
351
352static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
353{
354 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
355 ctrl->cntlid, ctrl->kato);
356
357 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
358 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
359}
360
361static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
362{
363 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
364
365 cancel_delayed_work_sync(&ctrl->ka_work);
366}
367
368static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
369 __le32 nsid)
370{
371 struct nvmet_ns *ns;
372
373 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
374 if (ns->nsid == le32_to_cpu(nsid))
375 return ns;
376 }
377
378 return NULL;
379}
380
381struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
382{
383 struct nvmet_ns *ns;
384
385 rcu_read_lock();
386 ns = __nvmet_find_namespace(ctrl, nsid);
387 if (ns)
388 percpu_ref_get(&ns->ref);
389 rcu_read_unlock();
390
391 return ns;
392}
393
394static void nvmet_destroy_namespace(struct percpu_ref *ref)
395{
396 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
397
398 complete(&ns->disable_done);
399}
400
401void nvmet_put_namespace(struct nvmet_ns *ns)
402{
403 percpu_ref_put(&ns->ref);
404}
405
d5eff33e
CK
406static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
407{
408 nvmet_bdev_ns_disable(ns);
409 nvmet_file_ns_disable(ns);
410}
411
c6925093
LG
412static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
413{
414 int ret;
415 struct pci_dev *p2p_dev;
416
417 if (!ns->use_p2pmem)
418 return 0;
419
420 if (!ns->bdev) {
421 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
422 return -EINVAL;
423 }
424
425 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
426 pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
427 ns->device_path);
428 return -EINVAL;
429 }
430
431 if (ns->p2p_dev) {
432 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
433 if (ret < 0)
434 return -EINVAL;
435 } else {
436 /*
437 * Right now we just check that there is p2pmem available so
438 * we can report an error to the user right away if there
439 * is not. We'll find the actual device to use once we
440 * setup the controller when the port's device is available.
441 */
442
443 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
444 if (!p2p_dev) {
445 pr_err("no peer-to-peer memory is available for %s\n",
446 ns->device_path);
447 return -EINVAL;
448 }
449
450 pci_dev_put(p2p_dev);
451 }
452
453 return 0;
454}
455
456/*
457 * Note: ctrl->subsys->lock should be held when calling this function
458 */
459static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
460 struct nvmet_ns *ns)
461{
462 struct device *clients[2];
463 struct pci_dev *p2p_dev;
464 int ret;
465
21d3bbdd 466 if (!ctrl->p2p_client || !ns->use_p2pmem)
c6925093
LG
467 return;
468
469 if (ns->p2p_dev) {
470 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
471 if (ret < 0)
472 return;
473
474 p2p_dev = pci_dev_get(ns->p2p_dev);
475 } else {
476 clients[0] = ctrl->p2p_client;
477 clients[1] = nvmet_ns_dev(ns);
478
479 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
480 if (!p2p_dev) {
481 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
482 dev_name(ctrl->p2p_client), ns->device_path);
483 return;
484 }
485 }
486
487 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
488 if (ret < 0)
489 pci_dev_put(p2p_dev);
490
491 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
492 ns->nsid);
493}
494
a07b4970
CH
495int nvmet_ns_enable(struct nvmet_ns *ns)
496{
497 struct nvmet_subsys *subsys = ns->subsys;
c6925093 498 struct nvmet_ctrl *ctrl;
793c7cfc 499 int ret;
a07b4970
CH
500
501 mutex_lock(&subsys->lock);
793c7cfc 502 ret = 0;
e4fcf07c 503 if (ns->enabled)
a07b4970
CH
504 goto out_unlock;
505
e84c2091
MG
506 ret = -EMFILE;
507 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
508 goto out_unlock;
509
d5eff33e 510 ret = nvmet_bdev_ns_enable(ns);
405a7519 511 if (ret == -ENOTBLK)
d5eff33e
CK
512 ret = nvmet_file_ns_enable(ns);
513 if (ret)
a07b4970 514 goto out_unlock;
a07b4970 515
c6925093
LG
516 ret = nvmet_p2pmem_ns_enable(ns);
517 if (ret)
a536b497 518 goto out_dev_disable;
c6925093
LG
519
520 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
521 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
522
a07b4970
CH
523 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
524 0, GFP_KERNEL);
525 if (ret)
d5eff33e 526 goto out_dev_put;
a07b4970
CH
527
528 if (ns->nsid > subsys->max_nsid)
529 subsys->max_nsid = ns->nsid;
530
531 /*
532 * The namespaces list needs to be sorted to simplify the implementation
533 * of the Identify Namepace List subcommand.
534 */
535 if (list_empty(&subsys->namespaces)) {
536 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
537 } else {
538 struct nvmet_ns *old;
539
540 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
541 BUG_ON(ns->nsid == old->nsid);
542 if (ns->nsid < old->nsid)
543 break;
544 }
545
546 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
547 }
793c7cfc 548 subsys->nr_namespaces++;
a07b4970 549
c16734ea 550 nvmet_ns_changed(subsys, ns->nsid);
e4fcf07c 551 ns->enabled = true;
a07b4970
CH
552 ret = 0;
553out_unlock:
554 mutex_unlock(&subsys->lock);
555 return ret;
d5eff33e 556out_dev_put:
c6925093
LG
557 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
558 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
a536b497 559out_dev_disable:
d5eff33e 560 nvmet_ns_dev_disable(ns);
a07b4970
CH
561 goto out_unlock;
562}
563
564void nvmet_ns_disable(struct nvmet_ns *ns)
565{
566 struct nvmet_subsys *subsys = ns->subsys;
c6925093 567 struct nvmet_ctrl *ctrl;
a07b4970
CH
568
569 mutex_lock(&subsys->lock);
e4fcf07c
SA
570 if (!ns->enabled)
571 goto out_unlock;
572
573 ns->enabled = false;
574 list_del_rcu(&ns->dev_link);
ba2dec35
RS
575 if (ns->nsid == subsys->max_nsid)
576 subsys->max_nsid = nvmet_max_nsid(subsys);
c6925093
LG
577
578 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
579 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
580
a07b4970
CH
581 mutex_unlock(&subsys->lock);
582
583 /*
584 * Now that we removed the namespaces from the lookup list, we
585 * can kill the per_cpu ref and wait for any remaining references
586 * to be dropped, as well as a RCU grace period for anyone only
587 * using the namepace under rcu_read_lock(). Note that we can't
588 * use call_rcu here as we need to ensure the namespaces have
589 * been fully destroyed before unloading the module.
590 */
591 percpu_ref_kill(&ns->ref);
592 synchronize_rcu();
593 wait_for_completion(&ns->disable_done);
594 percpu_ref_exit(&ns->ref);
595
596 mutex_lock(&subsys->lock);
c6925093 597
793c7cfc 598 subsys->nr_namespaces--;
c16734ea 599 nvmet_ns_changed(subsys, ns->nsid);
d5eff33e 600 nvmet_ns_dev_disable(ns);
e4fcf07c 601out_unlock:
a07b4970
CH
602 mutex_unlock(&subsys->lock);
603}
604
605void nvmet_ns_free(struct nvmet_ns *ns)
606{
607 nvmet_ns_disable(ns);
608
72efd25d
CH
609 down_write(&nvmet_ana_sem);
610 nvmet_ana_group_enabled[ns->anagrpid]--;
611 up_write(&nvmet_ana_sem);
612
a07b4970
CH
613 kfree(ns->device_path);
614 kfree(ns);
615}
616
617struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
618{
619 struct nvmet_ns *ns;
620
621 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
622 if (!ns)
623 return NULL;
624
625 INIT_LIST_HEAD(&ns->dev_link);
626 init_completion(&ns->disable_done);
627
628 ns->nsid = nsid;
629 ns->subsys = subsys;
72efd25d
CH
630
631 down_write(&nvmet_ana_sem);
632 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
633 nvmet_ana_group_enabled[ns->anagrpid]++;
634 up_write(&nvmet_ana_sem);
635
637dc0f3 636 uuid_gen(&ns->uuid);
55eb942e 637 ns->buffered_io = false;
a07b4970
CH
638
639 return ns;
640}
641
e6a622fd 642static void nvmet_update_sq_head(struct nvmet_req *req)
a07b4970 643{
f9cf2a64 644 if (req->sq->size) {
e6a622fd
SG
645 u32 old_sqhd, new_sqhd;
646
f9cf2a64
JS
647 do {
648 old_sqhd = req->sq->sqhd;
649 new_sqhd = (old_sqhd + 1) % req->sq->size;
650 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
651 old_sqhd);
652 }
fc6c9730 653 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
e6a622fd
SG
654}
655
76574f37
CK
656static void nvmet_set_error(struct nvmet_req *req, u16 status)
657{
658 struct nvmet_ctrl *ctrl = req->sq->ctrl;
659 struct nvme_error_slot *new_error_slot;
660 unsigned long flags;
661
fc6c9730 662 req->cqe->status = cpu_to_le16(status << 1);
76574f37 663
5698b805 664 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
76574f37
CK
665 return;
666
667 spin_lock_irqsave(&ctrl->error_lock, flags);
668 ctrl->err_counter++;
669 new_error_slot =
670 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
671
672 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
673 new_error_slot->sqid = cpu_to_le16(req->sq->qid);
674 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
675 new_error_slot->status_field = cpu_to_le16(status << 1);
676 new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
677 new_error_slot->lba = cpu_to_le64(req->error_slba);
678 new_error_slot->nsid = req->cmd->common.nsid;
679 spin_unlock_irqrestore(&ctrl->error_lock, flags);
680
681 /* set the more bit for this request */
fc6c9730 682 req->cqe->status |= cpu_to_le16(1 << 14);
76574f37
CK
683}
684
e6a622fd
SG
685static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
686{
687 if (!req->sq->sqhd_disabled)
688 nvmet_update_sq_head(req);
fc6c9730
MG
689 req->cqe->sq_id = cpu_to_le16(req->sq->qid);
690 req->cqe->command_id = req->cmd->common.command_id;
76574f37 691
cb019da3 692 if (unlikely(status))
76574f37 693 nvmet_set_error(req, status);
a07b4970
CH
694 if (req->ns)
695 nvmet_put_namespace(req->ns);
696 req->ops->queue_response(req);
697}
698
699void nvmet_req_complete(struct nvmet_req *req, u16 status)
700{
701 __nvmet_req_complete(req, status);
702 percpu_ref_put(&req->sq->ref);
703}
704EXPORT_SYMBOL_GPL(nvmet_req_complete);
705
706void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
707 u16 qid, u16 size)
708{
709 cq->qid = qid;
710 cq->size = size;
711
712 ctrl->cqs[qid] = cq;
713}
714
715void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
716 u16 qid, u16 size)
717{
bb1cc747 718 sq->sqhd = 0;
a07b4970
CH
719 sq->qid = qid;
720 sq->size = size;
721
722 ctrl->sqs[qid] = sq;
723}
724
427242ce
SG
725static void nvmet_confirm_sq(struct percpu_ref *ref)
726{
727 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
728
729 complete(&sq->confirm_done);
730}
731
a07b4970
CH
732void nvmet_sq_destroy(struct nvmet_sq *sq)
733{
734 /*
735 * If this is the admin queue, complete all AERs so that our
736 * queue doesn't have outstanding requests on it.
737 */
738 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
739 nvmet_async_events_free(sq->ctrl);
427242ce
SG
740 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
741 wait_for_completion(&sq->confirm_done);
a07b4970
CH
742 wait_for_completion(&sq->free_done);
743 percpu_ref_exit(&sq->ref);
744
745 if (sq->ctrl) {
746 nvmet_ctrl_put(sq->ctrl);
747 sq->ctrl = NULL; /* allows reusing the queue later */
748 }
749}
750EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
751
752static void nvmet_sq_free(struct percpu_ref *ref)
753{
754 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
755
756 complete(&sq->free_done);
757}
758
759int nvmet_sq_init(struct nvmet_sq *sq)
760{
761 int ret;
762
763 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
764 if (ret) {
765 pr_err("percpu_ref init failed!\n");
766 return ret;
767 }
768 init_completion(&sq->free_done);
427242ce 769 init_completion(&sq->confirm_done);
a07b4970
CH
770
771 return 0;
772}
773EXPORT_SYMBOL_GPL(nvmet_sq_init);
774
72efd25d
CH
775static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
776 struct nvmet_ns *ns)
777{
778 enum nvme_ana_state state = port->ana_state[ns->anagrpid];
779
780 if (unlikely(state == NVME_ANA_INACCESSIBLE))
781 return NVME_SC_ANA_INACCESSIBLE;
782 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
783 return NVME_SC_ANA_PERSISTENT_LOSS;
784 if (unlikely(state == NVME_ANA_CHANGE))
785 return NVME_SC_ANA_TRANSITION;
786 return 0;
787}
788
dedf0be5
CK
789static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
790{
791 if (unlikely(req->ns->readonly)) {
792 switch (req->cmd->common.opcode) {
793 case nvme_cmd_read:
794 case nvme_cmd_flush:
795 break;
796 default:
797 return NVME_SC_NS_WRITE_PROTECTED;
798 }
799 }
800
801 return 0;
802}
803
d5eff33e
CK
804static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
805{
806 struct nvme_command *cmd = req->cmd;
807 u16 ret;
808
809 ret = nvmet_check_ctrl_status(req, cmd);
810 if (unlikely(ret))
811 return ret;
812
813 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
e81446af
CK
814 if (unlikely(!req->ns)) {
815 req->error_loc = offsetof(struct nvme_common_command, nsid);
d5eff33e 816 return NVME_SC_INVALID_NS | NVME_SC_DNR;
e81446af 817 }
72efd25d 818 ret = nvmet_check_ana_state(req->port, req->ns);
e81446af
CK
819 if (unlikely(ret)) {
820 req->error_loc = offsetof(struct nvme_common_command, nsid);
dedf0be5 821 return ret;
e81446af 822 }
dedf0be5 823 ret = nvmet_io_cmd_check_access(req);
e81446af
CK
824 if (unlikely(ret)) {
825 req->error_loc = offsetof(struct nvme_common_command, nsid);
72efd25d 826 return ret;
e81446af 827 }
d5eff33e
CK
828
829 if (req->ns->file)
830 return nvmet_file_parse_io_cmd(req);
831 else
832 return nvmet_bdev_parse_io_cmd(req);
833}
834
a07b4970 835bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
e929f06d 836 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
a07b4970
CH
837{
838 u8 flags = req->cmd->common.flags;
839 u16 status;
840
841 req->cq = cq;
842 req->sq = sq;
843 req->ops = ops;
844 req->sg = NULL;
845 req->sg_cnt = 0;
5e62d5c9 846 req->transfer_len = 0;
fc6c9730
MG
847 req->cqe->status = 0;
848 req->cqe->sq_head = 0;
423b4487 849 req->ns = NULL;
5698b805 850 req->error_loc = NVMET_NO_ERROR_LOC;
e4a97625 851 req->error_slba = 0;
a07b4970
CH
852
853 /* no support for fused commands yet */
854 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
e81446af 855 req->error_loc = offsetof(struct nvme_common_command, flags);
a07b4970
CH
856 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
857 goto fail;
858 }
859
bffd2b61
MG
860 /*
861 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
862 * contains an address of a single contiguous physical buffer that is
863 * byte aligned.
864 */
865 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
e81446af 866 req->error_loc = offsetof(struct nvme_common_command, flags);
a07b4970
CH
867 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
868 goto fail;
869 }
870
871 if (unlikely(!req->sq->ctrl))
872 /* will return an error for any Non-connect command: */
873 status = nvmet_parse_connect_cmd(req);
874 else if (likely(req->sq->qid != 0))
875 status = nvmet_parse_io_cmd(req);
876 else if (req->cmd->common.opcode == nvme_fabrics_command)
877 status = nvmet_parse_fabrics_cmd(req);
878 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
879 status = nvmet_parse_discovery_cmd(req);
880 else
881 status = nvmet_parse_admin_cmd(req);
882
883 if (status)
884 goto fail;
885
886 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
887 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
888 goto fail;
889 }
890
c09305ae
SG
891 if (sq->ctrl)
892 sq->ctrl->cmd_seen = true;
893
a07b4970
CH
894 return true;
895
896fail:
897 __nvmet_req_complete(req, status);
898 return false;
899}
900EXPORT_SYMBOL_GPL(nvmet_req_init);
901
549f01ae
VI
902void nvmet_req_uninit(struct nvmet_req *req)
903{
904 percpu_ref_put(&req->sq->ref);
423b4487
SG
905 if (req->ns)
906 nvmet_put_namespace(req->ns);
549f01ae
VI
907}
908EXPORT_SYMBOL_GPL(nvmet_req_uninit);
909
5e62d5c9
CH
910void nvmet_req_execute(struct nvmet_req *req)
911{
e81446af
CK
912 if (unlikely(req->data_len != req->transfer_len)) {
913 req->error_loc = offsetof(struct nvme_common_command, dptr);
5e62d5c9 914 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
e81446af 915 } else
5e62d5c9
CH
916 req->execute(req);
917}
918EXPORT_SYMBOL_GPL(nvmet_req_execute);
919
5b2322e4
LG
920int nvmet_req_alloc_sgl(struct nvmet_req *req)
921{
c6925093
LG
922 struct pci_dev *p2p_dev = NULL;
923
924 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
925 if (req->sq->ctrl && req->ns)
926 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
927 req->ns->nsid);
928
929 req->p2p_dev = NULL;
930 if (req->sq->qid && p2p_dev) {
931 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
932 req->transfer_len);
933 if (req->sg) {
934 req->p2p_dev = p2p_dev;
935 return 0;
936 }
937 }
938
939 /*
940 * If no P2P memory was available we fallback to using
941 * regular memory
942 */
943 }
944
5b2322e4
LG
945 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
946 if (!req->sg)
947 return -ENOMEM;
948
949 return 0;
950}
951EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
952
953void nvmet_req_free_sgl(struct nvmet_req *req)
954{
c6925093
LG
955 if (req->p2p_dev)
956 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
957 else
958 sgl_free(req->sg);
959
5b2322e4
LG
960 req->sg = NULL;
961 req->sg_cnt = 0;
962}
963EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
964
a07b4970
CH
965static inline bool nvmet_cc_en(u32 cc)
966{
ad4e05b2 967 return (cc >> NVME_CC_EN_SHIFT) & 0x1;
a07b4970
CH
968}
969
970static inline u8 nvmet_cc_css(u32 cc)
971{
ad4e05b2 972 return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
a07b4970
CH
973}
974
975static inline u8 nvmet_cc_mps(u32 cc)
976{
ad4e05b2 977 return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
a07b4970
CH
978}
979
980static inline u8 nvmet_cc_ams(u32 cc)
981{
ad4e05b2 982 return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
a07b4970
CH
983}
984
985static inline u8 nvmet_cc_shn(u32 cc)
986{
ad4e05b2 987 return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
a07b4970
CH
988}
989
990static inline u8 nvmet_cc_iosqes(u32 cc)
991{
ad4e05b2 992 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
a07b4970
CH
993}
994
995static inline u8 nvmet_cc_iocqes(u32 cc)
996{
ad4e05b2 997 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
a07b4970
CH
998}
999
1000static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
1001{
1002 lockdep_assert_held(&ctrl->lock);
1003
1004 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1005 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
1006 nvmet_cc_mps(ctrl->cc) != 0 ||
1007 nvmet_cc_ams(ctrl->cc) != 0 ||
1008 nvmet_cc_css(ctrl->cc) != 0) {
1009 ctrl->csts = NVME_CSTS_CFS;
1010 return;
1011 }
1012
1013 ctrl->csts = NVME_CSTS_RDY;
d68a90e1
MG
1014
1015 /*
1016 * Controllers that are not yet enabled should not really enforce the
1017 * keep alive timeout, but we still want to track a timeout and cleanup
1018 * in case a host died before it enabled the controller. Hence, simply
1019 * reset the keep alive timer when the controller is enabled.
1020 */
1021 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
a07b4970
CH
1022}
1023
1024static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1025{
1026 lockdep_assert_held(&ctrl->lock);
1027
1028 /* XXX: tear down queues? */
1029 ctrl->csts &= ~NVME_CSTS_RDY;
1030 ctrl->cc = 0;
1031}
1032
1033void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1034{
1035 u32 old;
1036
1037 mutex_lock(&ctrl->lock);
1038 old = ctrl->cc;
1039 ctrl->cc = new;
1040
1041 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1042 nvmet_start_ctrl(ctrl);
1043 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1044 nvmet_clear_ctrl(ctrl);
1045 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1046 nvmet_clear_ctrl(ctrl);
1047 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1048 }
1049 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1050 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1051 mutex_unlock(&ctrl->lock);
1052}
1053
1054static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1055{
1056 /* command sets supported: NVMe command set: */
1057 ctrl->cap = (1ULL << 37);
1058 /* CC.EN timeout in 500msec units: */
1059 ctrl->cap |= (15ULL << 24);
1060 /* maximum queue entries supported: */
1061 ctrl->cap |= NVMET_QUEUE_SIZE - 1;
1062}
1063
1064u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
1065 struct nvmet_req *req, struct nvmet_ctrl **ret)
1066{
1067 struct nvmet_subsys *subsys;
1068 struct nvmet_ctrl *ctrl;
1069 u16 status = 0;
1070
1071 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1072 if (!subsys) {
1073 pr_warn("connect request for invalid subsystem %s!\n",
1074 subsysnqn);
fc6c9730 1075 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
a07b4970
CH
1076 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1077 }
1078
1079 mutex_lock(&subsys->lock);
1080 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1081 if (ctrl->cntlid == cntlid) {
1082 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1083 pr_warn("hostnqn mismatch.\n");
1084 continue;
1085 }
1086 if (!kref_get_unless_zero(&ctrl->ref))
1087 continue;
1088
1089 *ret = ctrl;
1090 goto out;
1091 }
1092 }
1093
1094 pr_warn("could not find controller %d for subsys %s / host %s\n",
1095 cntlid, subsysnqn, hostnqn);
fc6c9730 1096 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
a07b4970
CH
1097 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1098
1099out:
1100 mutex_unlock(&subsys->lock);
1101 nvmet_subsys_put(subsys);
1102 return status;
1103}
1104
64a0ca88
PP
1105u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
1106{
1107 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
b40b83e3 1108 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
64a0ca88
PP
1109 cmd->common.opcode, req->sq->qid);
1110 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1111 }
1112
1113 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
b40b83e3 1114 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
64a0ca88 1115 cmd->common.opcode, req->sq->qid);
64a0ca88
PP
1116 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1117 }
1118 return 0;
1119}
1120
253928ee 1121bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
a07b4970
CH
1122{
1123 struct nvmet_host_link *p;
1124
253928ee
SG
1125 lockdep_assert_held(&nvmet_config_sem);
1126
a07b4970
CH
1127 if (subsys->allow_any_host)
1128 return true;
1129
253928ee
SG
1130 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
1131 return true;
1132
a07b4970
CH
1133 list_for_each_entry(p, &subsys->hosts, entry) {
1134 if (!strcmp(nvmet_host_name(p->host), hostnqn))
1135 return true;
1136 }
1137
1138 return false;
1139}
1140
c6925093
LG
1141/*
1142 * Note: ctrl->subsys->lock should be held when calling this function
1143 */
1144static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1145 struct nvmet_req *req)
1146{
1147 struct nvmet_ns *ns;
1148
1149 if (!req->p2p_client)
1150 return;
1151
1152 ctrl->p2p_client = get_device(req->p2p_client);
1153
1154 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
1155 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1156}
1157
1158/*
1159 * Note: ctrl->subsys->lock should be held when calling this function
1160 */
1161static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1162{
1163 struct radix_tree_iter iter;
1164 void __rcu **slot;
1165
1166 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1167 pci_dev_put(radix_tree_deref_slot(slot));
1168
1169 put_device(ctrl->p2p_client);
1170}
1171
d11de63f
YY
1172static void nvmet_fatal_error_handler(struct work_struct *work)
1173{
1174 struct nvmet_ctrl *ctrl =
1175 container_of(work, struct nvmet_ctrl, fatal_err_work);
1176
1177 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1178 ctrl->ops->delete_ctrl(ctrl);
1179}
1180
a07b4970
CH
1181u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1182 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1183{
1184 struct nvmet_subsys *subsys;
1185 struct nvmet_ctrl *ctrl;
1186 int ret;
1187 u16 status;
1188
1189 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1190 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1191 if (!subsys) {
1192 pr_warn("connect request for invalid subsystem %s!\n",
1193 subsysnqn);
fc6c9730 1194 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
a07b4970
CH
1195 goto out;
1196 }
1197
1198 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1199 down_read(&nvmet_config_sem);
253928ee 1200 if (!nvmet_host_allowed(subsys, hostnqn)) {
a07b4970
CH
1201 pr_info("connect by host %s for subsystem %s not allowed\n",
1202 hostnqn, subsysnqn);
fc6c9730 1203 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
a07b4970 1204 up_read(&nvmet_config_sem);
130c24b5 1205 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
a07b4970
CH
1206 goto out_put_subsystem;
1207 }
1208 up_read(&nvmet_config_sem);
1209
1210 status = NVME_SC_INTERNAL;
1211 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1212 if (!ctrl)
1213 goto out_put_subsystem;
1214 mutex_init(&ctrl->lock);
1215
1216 nvmet_init_cap(ctrl);
1217
4ee43280
CH
1218 ctrl->port = req->port;
1219
a07b4970
CH
1220 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1221 INIT_LIST_HEAD(&ctrl->async_events);
c6925093 1222 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
d11de63f 1223 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
a07b4970
CH
1224
1225 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1226 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1227
1228 kref_init(&ctrl->ref);
1229 ctrl->subsys = subsys;
c86b8f7b 1230 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
a07b4970 1231
c16734ea
CH
1232 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1233 sizeof(__le32), GFP_KERNEL);
1234 if (!ctrl->changed_ns_list)
1235 goto out_free_ctrl;
1236
a07b4970
CH
1237 ctrl->cqs = kcalloc(subsys->max_qid + 1,
1238 sizeof(struct nvmet_cq *),
1239 GFP_KERNEL);
1240 if (!ctrl->cqs)
c16734ea 1241 goto out_free_changed_ns_list;
a07b4970
CH
1242
1243 ctrl->sqs = kcalloc(subsys->max_qid + 1,
1244 sizeof(struct nvmet_sq *),
1245 GFP_KERNEL);
1246 if (!ctrl->sqs)
1247 goto out_free_cqs;
1248
15fbad96 1249 ret = ida_simple_get(&cntlid_ida,
a07b4970
CH
1250 NVME_CNTLID_MIN, NVME_CNTLID_MAX,
1251 GFP_KERNEL);
1252 if (ret < 0) {
1253 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
1254 goto out_free_sqs;
1255 }
1256 ctrl->cntlid = ret;
1257
1258 ctrl->ops = req->ops;
a07b4970 1259
f9362ac1
JS
1260 /*
1261 * Discovery controllers may use some arbitrary high value
1262 * in order to cleanup stale discovery sessions
1263 */
1264 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
1265 kato = NVMET_DISC_KATO_MS;
1266
1267 /* keep-alive timeout in seconds */
1268 ctrl->kato = DIV_ROUND_UP(kato, 1000);
1269
e4a97625
CK
1270 ctrl->err_counter = 0;
1271 spin_lock_init(&ctrl->error_lock);
1272
a07b4970
CH
1273 nvmet_start_keep_alive_timer(ctrl);
1274
1275 mutex_lock(&subsys->lock);
1276 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
c6925093 1277 nvmet_setup_p2p_ns_map(ctrl, req);
a07b4970
CH
1278 mutex_unlock(&subsys->lock);
1279
1280 *ctrlp = ctrl;
1281 return 0;
1282
1283out_free_sqs:
1284 kfree(ctrl->sqs);
1285out_free_cqs:
1286 kfree(ctrl->cqs);
c16734ea
CH
1287out_free_changed_ns_list:
1288 kfree(ctrl->changed_ns_list);
a07b4970
CH
1289out_free_ctrl:
1290 kfree(ctrl);
1291out_put_subsystem:
1292 nvmet_subsys_put(subsys);
1293out:
1294 return status;
1295}
1296
1297static void nvmet_ctrl_free(struct kref *ref)
1298{
1299 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1300 struct nvmet_subsys *subsys = ctrl->subsys;
1301
a07b4970 1302 mutex_lock(&subsys->lock);
c6925093 1303 nvmet_release_p2p_ns_map(ctrl);
a07b4970
CH
1304 list_del(&ctrl->subsys_entry);
1305 mutex_unlock(&subsys->lock);
1306
6b1943af
IR
1307 nvmet_stop_keep_alive_timer(ctrl);
1308
06406d81
SG
1309 flush_work(&ctrl->async_event_work);
1310 cancel_work_sync(&ctrl->fatal_err_work);
1311
15fbad96 1312 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
a07b4970
CH
1313
1314 kfree(ctrl->sqs);
1315 kfree(ctrl->cqs);
c16734ea 1316 kfree(ctrl->changed_ns_list);
a07b4970 1317 kfree(ctrl);
6b1943af
IR
1318
1319 nvmet_subsys_put(subsys);
a07b4970
CH
1320}
1321
1322void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1323{
1324 kref_put(&ctrl->ref, nvmet_ctrl_free);
1325}
1326
a07b4970
CH
1327void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1328{
8242ddac
SG
1329 mutex_lock(&ctrl->lock);
1330 if (!(ctrl->csts & NVME_CSTS_CFS)) {
1331 ctrl->csts |= NVME_CSTS_CFS;
8242ddac
SG
1332 schedule_work(&ctrl->fatal_err_work);
1333 }
1334 mutex_unlock(&ctrl->lock);
a07b4970
CH
1335}
1336EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1337
1338static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1339 const char *subsysnqn)
1340{
1341 struct nvmet_subsys_link *p;
1342
1343 if (!port)
1344 return NULL;
1345
43a6f8fb 1346 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
a07b4970
CH
1347 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1348 return NULL;
1349 return nvmet_disc_subsys;
1350 }
1351
1352 down_read(&nvmet_config_sem);
1353 list_for_each_entry(p, &port->subsystems, entry) {
1354 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1355 NVMF_NQN_SIZE)) {
1356 if (!kref_get_unless_zero(&p->subsys->ref))
1357 break;
1358 up_read(&nvmet_config_sem);
1359 return p->subsys;
1360 }
1361 }
1362 up_read(&nvmet_config_sem);
1363 return NULL;
1364}
1365
1366struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1367 enum nvme_subsys_type type)
1368{
1369 struct nvmet_subsys *subsys;
1370
1371 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1372 if (!subsys)
6b7e631b 1373 return ERR_PTR(-ENOMEM);
a07b4970 1374
637dc0f3 1375 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
2e7f5d2a
JT
1376 /* generate a random serial number as our controllers are ephemeral: */
1377 get_random_bytes(&subsys->serial, sizeof(subsys->serial));
a07b4970
CH
1378
1379 switch (type) {
1380 case NVME_NQN_NVME:
1381 subsys->max_qid = NVMET_NR_QUEUES;
1382 break;
1383 case NVME_NQN_DISC:
1384 subsys->max_qid = 0;
1385 break;
1386 default:
1387 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1388 kfree(subsys);
6b7e631b 1389 return ERR_PTR(-EINVAL);
a07b4970
CH
1390 }
1391 subsys->type = type;
1392 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1393 GFP_KERNEL);
69555af2 1394 if (!subsys->subsysnqn) {
a07b4970 1395 kfree(subsys);
6b7e631b 1396 return ERR_PTR(-ENOMEM);
a07b4970
CH
1397 }
1398
1399 kref_init(&subsys->ref);
1400
1401 mutex_init(&subsys->lock);
1402 INIT_LIST_HEAD(&subsys->namespaces);
1403 INIT_LIST_HEAD(&subsys->ctrls);
a07b4970
CH
1404 INIT_LIST_HEAD(&subsys->hosts);
1405
1406 return subsys;
1407}
1408
1409static void nvmet_subsys_free(struct kref *ref)
1410{
1411 struct nvmet_subsys *subsys =
1412 container_of(ref, struct nvmet_subsys, ref);
1413
1414 WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1415
a07b4970
CH
1416 kfree(subsys->subsysnqn);
1417 kfree(subsys);
1418}
1419
344770b0
SG
1420void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1421{
1422 struct nvmet_ctrl *ctrl;
1423
1424 mutex_lock(&subsys->lock);
1425 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1426 ctrl->ops->delete_ctrl(ctrl);
1427 mutex_unlock(&subsys->lock);
1428}
1429
a07b4970
CH
1430void nvmet_subsys_put(struct nvmet_subsys *subsys)
1431{
1432 kref_put(&subsys->ref, nvmet_subsys_free);
1433}
1434
1435static int __init nvmet_init(void)
1436{
1437 int error;
1438
72efd25d
CH
1439 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1440
55eb942e
CK
1441 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1442 WQ_MEM_RECLAIM, 0);
1443 if (!buffered_io_wq) {
1444 error = -ENOMEM;
1445 goto out;
1446 }
72efd25d 1447
a07b4970
CH
1448 error = nvmet_init_discovery();
1449 if (error)
04db0e5e 1450 goto out_free_work_queue;
a07b4970
CH
1451
1452 error = nvmet_init_configfs();
1453 if (error)
1454 goto out_exit_discovery;
1455 return 0;
1456
1457out_exit_discovery:
1458 nvmet_exit_discovery();
04db0e5e
CK
1459out_free_work_queue:
1460 destroy_workqueue(buffered_io_wq);
a07b4970
CH
1461out:
1462 return error;
1463}
1464
1465static void __exit nvmet_exit(void)
1466{
1467 nvmet_exit_configfs();
1468 nvmet_exit_discovery();
15fbad96 1469 ida_destroy(&cntlid_ida);
55eb942e 1470 destroy_workqueue(buffered_io_wq);
a07b4970
CH
1471
1472 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1473 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1474}
1475
1476module_init(nvmet_init);
1477module_exit(nvmet_exit);
1478
1479MODULE_LICENSE("GPL v2");