Merge tag 'media/v6.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...
[linux-2.6-block.git] / drivers / nvme / host / nvme.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (c) 2011-2014, Intel Corporation.
4  */
5
6 #ifndef _NVME_H
7 #define _NVME_H
8
9 #include <linux/nvme.h>
10 #include <linux/cdev.h>
11 #include <linux/pci.h>
12 #include <linux/kref.h>
13 #include <linux/blk-mq.h>
14 #include <linux/sed-opal.h>
15 #include <linux/fault-inject.h>
16 #include <linux/rcupdate.h>
17 #include <linux/wait.h>
18 #include <linux/t10-pi.h>
19 #include <linux/ratelimit_types.h>
20
21 #include <trace/events/block.h>
22
23 extern const struct pr_ops nvme_pr_ops;
24
25 extern unsigned int nvme_io_timeout;
26 #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
27
28 extern unsigned int admin_timeout;
29 #define NVME_ADMIN_TIMEOUT      (admin_timeout * HZ)
30
31 #define NVME_DEFAULT_KATO       5
32
33 #ifdef CONFIG_ARCH_NO_SG_CHAIN
34 #define  NVME_INLINE_SG_CNT  0
35 #define  NVME_INLINE_METADATA_SG_CNT  0
36 #else
37 #define  NVME_INLINE_SG_CNT  2
38 #define  NVME_INLINE_METADATA_SG_CNT  1
39 #endif
40
41 /*
42  * Default to a 4K page size, with the intention to update this
43  * path in the future to accommodate architectures with differing
44  * kernel and IO page sizes.
45  */
46 #define NVME_CTRL_PAGE_SHIFT    12
47 #define NVME_CTRL_PAGE_SIZE     (1 << NVME_CTRL_PAGE_SHIFT)
48
49 extern struct workqueue_struct *nvme_wq;
50 extern struct workqueue_struct *nvme_reset_wq;
51 extern struct workqueue_struct *nvme_delete_wq;
52 extern struct mutex nvme_subsystems_lock;
53
54 /*
55  * List of workarounds for devices that required behavior not specified in
56  * the standard.
57  */
58 enum nvme_quirks {
59         /*
60          * Prefers I/O aligned to a stripe size specified in a vendor
61          * specific Identify field.
62          */
63         NVME_QUIRK_STRIPE_SIZE                  = (1 << 0),
64
65         /*
66          * The controller doesn't handle Identify value others than 0 or 1
67          * correctly.
68          */
69         NVME_QUIRK_IDENTIFY_CNS                 = (1 << 1),
70
71         /*
72          * The controller deterministically returns O's on reads to
73          * logical blocks that deallocate was called on.
74          */
75         NVME_QUIRK_DEALLOCATE_ZEROES            = (1 << 2),
76
77         /*
78          * The controller needs a delay before starts checking the device
79          * readiness, which is done by reading the NVME_CSTS_RDY bit.
80          */
81         NVME_QUIRK_DELAY_BEFORE_CHK_RDY         = (1 << 3),
82
83         /*
84          * APST should not be used.
85          */
86         NVME_QUIRK_NO_APST                      = (1 << 4),
87
88         /*
89          * The deepest sleep state should not be used.
90          */
91         NVME_QUIRK_NO_DEEPEST_PS                = (1 << 5),
92
93         /*
94          *  Problems seen with concurrent commands
95          */
96         NVME_QUIRK_QDEPTH_ONE                   = (1 << 6),
97
98         /*
99          * Set MEDIUM priority on SQ creation
100          */
101         NVME_QUIRK_MEDIUM_PRIO_SQ               = (1 << 7),
102
103         /*
104          * Ignore device provided subnqn.
105          */
106         NVME_QUIRK_IGNORE_DEV_SUBNQN            = (1 << 8),
107
108         /*
109          * Broken Write Zeroes.
110          */
111         NVME_QUIRK_DISABLE_WRITE_ZEROES         = (1 << 9),
112
113         /*
114          * Force simple suspend/resume path.
115          */
116         NVME_QUIRK_SIMPLE_SUSPEND               = (1 << 10),
117
118         /*
119          * Use only one interrupt vector for all queues
120          */
121         NVME_QUIRK_SINGLE_VECTOR                = (1 << 11),
122
123         /*
124          * Use non-standard 128 bytes SQEs.
125          */
126         NVME_QUIRK_128_BYTES_SQES               = (1 << 12),
127
128         /*
129          * Prevent tag overlap between queues
130          */
131         NVME_QUIRK_SHARED_TAGS                  = (1 << 13),
132
133         /*
134          * Don't change the value of the temperature threshold feature
135          */
136         NVME_QUIRK_NO_TEMP_THRESH_CHANGE        = (1 << 14),
137
138         /*
139          * The controller doesn't handle the Identify Namespace
140          * Identification Descriptor list subcommand despite claiming
141          * NVMe 1.3 compliance.
142          */
143         NVME_QUIRK_NO_NS_DESC_LIST              = (1 << 15),
144
145         /*
146          * The controller does not properly handle DMA addresses over
147          * 48 bits.
148          */
149         NVME_QUIRK_DMA_ADDRESS_BITS_48          = (1 << 16),
150
151         /*
152          * The controller requires the command_id value be limited, so skip
153          * encoding the generation sequence number.
154          */
155         NVME_QUIRK_SKIP_CID_GEN                 = (1 << 17),
156
157         /*
158          * Reports garbage in the namespace identifiers (eui64, nguid, uuid).
159          */
160         NVME_QUIRK_BOGUS_NID                    = (1 << 18),
161
162         /*
163          * No temperature thresholds for channels other than 0 (Composite).
164          */
165         NVME_QUIRK_NO_SECONDARY_TEMP_THRESH     = (1 << 19),
166
167         /*
168          * Disables simple suspend/resume path.
169          */
170         NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND      = (1 << 20),
171
172         /*
173          * MSI (but not MSI-X) interrupts are broken and never fire.
174          */
175         NVME_QUIRK_BROKEN_MSI                   = (1 << 21),
176 };
177
178 /*
179  * Common request structure for NVMe passthrough.  All drivers must have
180  * this structure as the first member of their request-private data.
181  */
182 struct nvme_request {
183         struct nvme_command     *cmd;
184         union nvme_result       result;
185         u8                      genctr;
186         u8                      retries;
187         u8                      flags;
188         u16                     status;
189 #ifdef CONFIG_NVME_MULTIPATH
190         unsigned long           start_time;
191 #endif
192         struct nvme_ctrl        *ctrl;
193 };
194
195 /*
196  * Mark a bio as coming in through the mpath node.
197  */
198 #define REQ_NVME_MPATH          REQ_DRV
199
200 enum {
201         NVME_REQ_CANCELLED              = (1 << 0),
202         NVME_REQ_USERCMD                = (1 << 1),
203         NVME_MPATH_IO_STATS             = (1 << 2),
204         NVME_MPATH_CNT_ACTIVE           = (1 << 3),
205 };
206
207 static inline struct nvme_request *nvme_req(struct request *req)
208 {
209         return blk_mq_rq_to_pdu(req);
210 }
211
212 static inline u16 nvme_req_qid(struct request *req)
213 {
214         if (!req->q->queuedata)
215                 return 0;
216
217         return req->mq_hctx->queue_num + 1;
218 }
219
220 /* The below value is the specific amount of delay needed before checking
221  * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
222  * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
223  * found empirically.
224  */
225 #define NVME_QUIRK_DELAY_AMOUNT         2300
226
227 /*
228  * enum nvme_ctrl_state: Controller state
229  *
230  * @NVME_CTRL_NEW:              New controller just allocated, initial state
231  * @NVME_CTRL_LIVE:             Controller is connected and I/O capable
232  * @NVME_CTRL_RESETTING:        Controller is resetting (or scheduled reset)
233  * @NVME_CTRL_CONNECTING:       Controller is disconnected, now connecting the
234  *                              transport
235  * @NVME_CTRL_DELETING:         Controller is deleting (or scheduled deletion)
236  * @NVME_CTRL_DELETING_NOIO:    Controller is deleting and I/O is not
237  *                              disabled/failed immediately. This state comes
238  *                              after all async event processing took place and
239  *                              before ns removal and the controller deletion
240  *                              progress
241  * @NVME_CTRL_DEAD:             Controller is non-present/unresponsive during
242  *                              shutdown or removal. In this case we forcibly
243  *                              kill all inflight I/O as they have no chance to
244  *                              complete
245  */
246 enum nvme_ctrl_state {
247         NVME_CTRL_NEW,
248         NVME_CTRL_LIVE,
249         NVME_CTRL_RESETTING,
250         NVME_CTRL_CONNECTING,
251         NVME_CTRL_DELETING,
252         NVME_CTRL_DELETING_NOIO,
253         NVME_CTRL_DEAD,
254 };
255
256 struct nvme_fault_inject {
257 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
258         struct fault_attr attr;
259         struct dentry *parent;
260         bool dont_retry;        /* DNR, do not retry */
261         u16 status;             /* status code */
262 #endif
263 };
264
265 enum nvme_ctrl_flags {
266         NVME_CTRL_FAILFAST_EXPIRED      = 0,
267         NVME_CTRL_ADMIN_Q_STOPPED       = 1,
268         NVME_CTRL_STARTED_ONCE          = 2,
269         NVME_CTRL_STOPPED               = 3,
270         NVME_CTRL_SKIP_ID_CNS_CS        = 4,
271         NVME_CTRL_DIRTY_CAPABILITY      = 5,
272         NVME_CTRL_FROZEN                = 6,
273 };
274
275 struct nvme_ctrl {
276         bool comp_seen;
277         bool identified;
278         bool passthru_err_log_enabled;
279         enum nvme_ctrl_state state;
280         spinlock_t lock;
281         struct mutex scan_lock;
282         const struct nvme_ctrl_ops *ops;
283         struct request_queue *admin_q;
284         struct request_queue *connect_q;
285         struct request_queue *fabrics_q;
286         struct device *dev;
287         int instance;
288         int numa_node;
289         struct blk_mq_tag_set *tagset;
290         struct blk_mq_tag_set *admin_tagset;
291         struct list_head namespaces;
292         struct mutex namespaces_lock;
293         struct srcu_struct srcu;
294         struct device ctrl_device;
295         struct device *device;  /* char device */
296 #ifdef CONFIG_NVME_HWMON
297         struct device *hwmon_device;
298 #endif
299         struct cdev cdev;
300         struct work_struct reset_work;
301         struct work_struct delete_work;
302         wait_queue_head_t state_wq;
303
304         struct nvme_subsystem *subsys;
305         struct list_head subsys_entry;
306
307         struct opal_dev *opal_dev;
308
309         u16 cntlid;
310
311         u16 mtfa;
312         u32 ctrl_config;
313         u32 queue_count;
314
315         u64 cap;
316         u32 max_hw_sectors;
317         u32 max_segments;
318         u32 max_integrity_segments;
319         u32 max_zeroes_sectors;
320 #ifdef CONFIG_BLK_DEV_ZONED
321         u32 max_zone_append;
322 #endif
323         u16 crdt[3];
324         u16 oncs;
325         u8 dmrl;
326         u32 dmrsl;
327         u16 oacs;
328         u16 sqsize;
329         u32 max_namespaces;
330         atomic_t abort_limit;
331         u8 vwc;
332         u32 vs;
333         u32 sgls;
334         u16 kas;
335         u8 npss;
336         u8 apsta;
337         u16 wctemp;
338         u16 cctemp;
339         u32 oaes;
340         u32 aen_result;
341         u32 ctratt;
342         unsigned int shutdown_timeout;
343         unsigned int kato;
344         bool subsystem;
345         unsigned long quirks;
346         struct nvme_id_power_state psd[32];
347         struct nvme_effects_log *effects;
348         struct xarray cels;
349         struct work_struct scan_work;
350         struct work_struct async_event_work;
351         struct delayed_work ka_work;
352         struct delayed_work failfast_work;
353         struct nvme_command ka_cmd;
354         unsigned long ka_last_check_time;
355         struct work_struct fw_act_work;
356         unsigned long events;
357
358 #ifdef CONFIG_NVME_MULTIPATH
359         /* asymmetric namespace access: */
360         u8 anacap;
361         u8 anatt;
362         u32 anagrpmax;
363         u32 nanagrpid;
364         struct mutex ana_lock;
365         struct nvme_ana_rsp_hdr *ana_log_buf;
366         size_t ana_log_size;
367         struct timer_list anatt_timer;
368         struct work_struct ana_work;
369         atomic_t nr_active;
370 #endif
371
372 #ifdef CONFIG_NVME_HOST_AUTH
373         struct work_struct dhchap_auth_work;
374         struct mutex dhchap_auth_mutex;
375         struct nvme_dhchap_queue_context *dhchap_ctxs;
376         struct nvme_dhchap_key *host_key;
377         struct nvme_dhchap_key *ctrl_key;
378         u16 transaction;
379 #endif
380         key_serial_t tls_pskid;
381
382         /* Power saving configuration */
383         u64 ps_max_latency_us;
384         bool apst_enabled;
385
386         /* PCIe only: */
387         u16 hmmaxd;
388         u32 hmpre;
389         u32 hmmin;
390         u32 hmminds;
391
392         /* Fabrics only */
393         u32 ioccsz;
394         u32 iorcsz;
395         u16 icdoff;
396         u16 maxcmd;
397         int nr_reconnects;
398         unsigned long flags;
399         struct nvmf_ctrl_options *opts;
400
401         struct page *discard_page;
402         unsigned long discard_page_busy;
403
404         struct nvme_fault_inject fault_inject;
405
406         enum nvme_ctrl_type cntrltype;
407         enum nvme_dctype dctype;
408 };
409
410 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
411 {
412         return READ_ONCE(ctrl->state);
413 }
414
415 enum nvme_iopolicy {
416         NVME_IOPOLICY_NUMA,
417         NVME_IOPOLICY_RR,
418         NVME_IOPOLICY_QD,
419 };
420
421 struct nvme_subsystem {
422         int                     instance;
423         struct device           dev;
424         /*
425          * Because we unregister the device on the last put we need
426          * a separate refcount.
427          */
428         struct kref             ref;
429         struct list_head        entry;
430         struct mutex            lock;
431         struct list_head        ctrls;
432         struct list_head        nsheads;
433         char                    subnqn[NVMF_NQN_SIZE];
434         char                    serial[20];
435         char                    model[40];
436         char                    firmware_rev[8];
437         u8                      cmic;
438         enum nvme_subsys_type   subtype;
439         u16                     vendor_id;
440         u16                     awupf;  /* 0's based awupf value. */
441         struct ida              ns_ida;
442 #ifdef CONFIG_NVME_MULTIPATH
443         enum nvme_iopolicy      iopolicy;
444 #endif
445 };
446
447 /*
448  * Container structure for uniqueue namespace identifiers.
449  */
450 struct nvme_ns_ids {
451         u8      eui64[8];
452         u8      nguid[16];
453         uuid_t  uuid;
454         u8      csi;
455 };
456
457 /*
458  * Anchor structure for namespaces.  There is one for each namespace in a
459  * NVMe subsystem that any of our controllers can see, and the namespace
460  * structure for each controller is chained of it.  For private namespaces
461  * there is a 1:1 relation to our namespace structures, that is ->list
462  * only ever has a single entry for private namespaces.
463  */
464 struct nvme_ns_head {
465         struct list_head        list;
466         struct srcu_struct      srcu;
467         struct nvme_subsystem   *subsys;
468         struct nvme_ns_ids      ids;
469         u8                      lba_shift;
470         u16                     ms;
471         u16                     pi_size;
472         u8                      pi_type;
473         u8                      guard_type;
474         struct list_head        entry;
475         struct kref             ref;
476         bool                    shared;
477         bool                    passthru_err_log_enabled;
478         struct nvme_effects_log *effects;
479         u64                     nuse;
480         unsigned                ns_id;
481         int                     instance;
482 #ifdef CONFIG_BLK_DEV_ZONED
483         u64                     zsze;
484 #endif
485         unsigned long           features;
486
487         struct ratelimit_state  rs_nuse;
488
489         struct cdev             cdev;
490         struct device           cdev_device;
491
492         struct gendisk          *disk;
493 #ifdef CONFIG_NVME_MULTIPATH
494         struct bio_list         requeue_list;
495         spinlock_t              requeue_lock;
496         struct work_struct      requeue_work;
497         struct mutex            lock;
498         unsigned long           flags;
499 #define NVME_NSHEAD_DISK_LIVE   0
500         struct nvme_ns __rcu    *current_path[];
501 #endif
502 };
503
504 static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
505 {
506         return IS_ENABLED(CONFIG_NVME_MULTIPATH) && head->disk;
507 }
508
509 enum nvme_ns_features {
510         NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
511         NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
512         NVME_NS_DEAC = 1 << 2,          /* DEAC bit in Write Zeores supported */
513 };
514
515 struct nvme_ns {
516         struct list_head list;
517
518         struct nvme_ctrl *ctrl;
519         struct request_queue *queue;
520         struct gendisk *disk;
521 #ifdef CONFIG_NVME_MULTIPATH
522         enum nvme_ana_state ana_state;
523         u32 ana_grpid;
524 #endif
525         struct list_head siblings;
526         struct kref kref;
527         struct nvme_ns_head *head;
528
529         unsigned long flags;
530 #define NVME_NS_REMOVING        0
531 #define NVME_NS_ANA_PENDING     2
532 #define NVME_NS_FORCE_RO        3
533 #define NVME_NS_READY           4
534
535         struct cdev             cdev;
536         struct device           cdev_device;
537
538         struct nvme_fault_inject fault_inject;
539 };
540
541 /* NVMe ns supports metadata actions by the controller (generate/strip) */
542 static inline bool nvme_ns_has_pi(struct nvme_ns_head *head)
543 {
544         return head->pi_type && head->ms == head->pi_size;
545 }
546
547 struct nvme_ctrl_ops {
548         const char *name;
549         struct module *module;
550         unsigned int flags;
551 #define NVME_F_FABRICS                  (1 << 0)
552 #define NVME_F_METADATA_SUPPORTED       (1 << 1)
553 #define NVME_F_BLOCKING                 (1 << 2)
554
555         const struct attribute_group **dev_attr_groups;
556         int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
557         int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
558         int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
559         void (*free_ctrl)(struct nvme_ctrl *ctrl);
560         void (*submit_async_event)(struct nvme_ctrl *ctrl);
561         int (*subsystem_reset)(struct nvme_ctrl *ctrl);
562         void (*delete_ctrl)(struct nvme_ctrl *ctrl);
563         void (*stop_ctrl)(struct nvme_ctrl *ctrl);
564         int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
565         void (*print_device_info)(struct nvme_ctrl *ctrl);
566         bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl);
567 };
568
569 /*
570  * nvme command_id is constructed as such:
571  * | xxxx | xxxxxxxxxxxx |
572  *   gen    request tag
573  */
574 #define nvme_genctr_mask(gen)                   (gen & 0xf)
575 #define nvme_cid_install_genctr(gen)            (nvme_genctr_mask(gen) << 12)
576 #define nvme_genctr_from_cid(cid)               ((cid & 0xf000) >> 12)
577 #define nvme_tag_from_cid(cid)                  (cid & 0xfff)
578
579 static inline u16 nvme_cid(struct request *rq)
580 {
581         return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
582 }
583
584 static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
585                 u16 command_id)
586 {
587         u8 genctr = nvme_genctr_from_cid(command_id);
588         u16 tag = nvme_tag_from_cid(command_id);
589         struct request *rq;
590
591         rq = blk_mq_tag_to_rq(tags, tag);
592         if (unlikely(!rq)) {
593                 pr_err("could not locate request for tag %#x\n",
594                         tag);
595                 return NULL;
596         }
597         if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) {
598                 dev_err(nvme_req(rq)->ctrl->device,
599                         "request %#x genctr mismatch (got %#x expected %#x)\n",
600                         tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
601                 return NULL;
602         }
603         return rq;
604 }
605
606 static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
607                 u16 command_id)
608 {
609         return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
610 }
611
612 /*
613  * Return the length of the string without the space padding
614  */
615 static inline int nvme_strlen(char *s, int len)
616 {
617         while (s[len - 1] == ' ')
618                 len--;
619         return len;
620 }
621
622 static inline void nvme_print_device_info(struct nvme_ctrl *ctrl)
623 {
624         struct nvme_subsystem *subsys = ctrl->subsys;
625
626         if (ctrl->ops->print_device_info) {
627                 ctrl->ops->print_device_info(ctrl);
628                 return;
629         }
630
631         dev_err(ctrl->device,
632                 "VID:%04x model:%.*s firmware:%.*s\n", subsys->vendor_id,
633                 nvme_strlen(subsys->model, sizeof(subsys->model)),
634                 subsys->model, nvme_strlen(subsys->firmware_rev,
635                                            sizeof(subsys->firmware_rev)),
636                 subsys->firmware_rev);
637 }
638
639 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
640 void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
641                             const char *dev_name);
642 void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject);
643 void nvme_should_fail(struct request *req);
644 #else
645 static inline void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
646                                           const char *dev_name)
647 {
648 }
649 static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj)
650 {
651 }
652 static inline void nvme_should_fail(struct request *req) {}
653 #endif
654
655 bool nvme_wait_reset(struct nvme_ctrl *ctrl);
656 int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
657
658 static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
659 {
660         if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
661                 return -ENOTTY;
662         return ctrl->ops->subsystem_reset(ctrl);
663 }
664
665 /*
666  * Convert a 512B sector number to a device logical block number.
667  */
668 static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector)
669 {
670         return sector >> (head->lba_shift - SECTOR_SHIFT);
671 }
672
673 /*
674  * Convert a device logical block number to a 512B sector number.
675  */
676 static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba)
677 {
678         return lba << (head->lba_shift - SECTOR_SHIFT);
679 }
680
681 /*
682  * Convert byte length to nvme's 0-based num dwords
683  */
684 static inline u32 nvme_bytes_to_numd(size_t len)
685 {
686         return (len >> 2) - 1;
687 }
688
689 static inline bool nvme_is_ana_error(u16 status)
690 {
691         switch (status & NVME_SCT_SC_MASK) {
692         case NVME_SC_ANA_TRANSITION:
693         case NVME_SC_ANA_INACCESSIBLE:
694         case NVME_SC_ANA_PERSISTENT_LOSS:
695                 return true;
696         default:
697                 return false;
698         }
699 }
700
701 static inline bool nvme_is_path_error(u16 status)
702 {
703         /* check for a status code type of 'path related status' */
704         return (status & NVME_SCT_MASK) == NVME_SCT_PATH;
705 }
706
707 /*
708  * Fill in the status and result information from the CQE, and then figure out
709  * if blk-mq will need to use IPI magic to complete the request, and if yes do
710  * so.  If not let the caller complete the request without an indirect function
711  * call.
712  */
713 static inline bool nvme_try_complete_req(struct request *req, __le16 status,
714                 union nvme_result result)
715 {
716         struct nvme_request *rq = nvme_req(req);
717         struct nvme_ctrl *ctrl = rq->ctrl;
718
719         if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN))
720                 rq->genctr++;
721
722         rq->status = le16_to_cpu(status) >> 1;
723         rq->result = result;
724         /* inject error when permitted by fault injection framework */
725         nvme_should_fail(req);
726         if (unlikely(blk_should_fake_timeout(req->q)))
727                 return true;
728         return blk_mq_complete_request_remote(req);
729 }
730
731 static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
732 {
733         get_device(ctrl->device);
734 }
735
736 static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
737 {
738         put_device(ctrl->device);
739 }
740
741 static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
742 {
743         return !qid &&
744                 nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
745 }
746
747 /*
748  * Returns true for sink states that can't ever transition back to live.
749  */
750 static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
751 {
752         switch (nvme_ctrl_state(ctrl)) {
753         case NVME_CTRL_NEW:
754         case NVME_CTRL_LIVE:
755         case NVME_CTRL_RESETTING:
756         case NVME_CTRL_CONNECTING:
757                 return false;
758         case NVME_CTRL_DELETING:
759         case NVME_CTRL_DELETING_NOIO:
760         case NVME_CTRL_DEAD:
761                 return true;
762         default:
763                 WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
764                 return true;
765         }
766 }
767
768 void nvme_end_req(struct request *req);
769 void nvme_complete_rq(struct request *req);
770 void nvme_complete_batch_req(struct request *req);
771
772 static __always_inline void nvme_complete_batch(struct io_comp_batch *iob,
773                                                 void (*fn)(struct request *rq))
774 {
775         struct request *req;
776
777         rq_list_for_each(&iob->req_list, req) {
778                 fn(req);
779                 nvme_complete_batch_req(req);
780         }
781         blk_mq_end_request_batch(iob);
782 }
783
784 blk_status_t nvme_host_path_error(struct request *req);
785 bool nvme_cancel_request(struct request *req, void *data);
786 void nvme_cancel_tagset(struct nvme_ctrl *ctrl);
787 void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl);
788 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
789                 enum nvme_ctrl_state new_state);
790 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
791 int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
792 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
793                 const struct nvme_ctrl_ops *ops, unsigned long quirks);
794 int nvme_add_ctrl(struct nvme_ctrl *ctrl);
795 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
796 void nvme_start_ctrl(struct nvme_ctrl *ctrl);
797 void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
798 int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended);
799 int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
800                 const struct blk_mq_ops *ops, unsigned int cmd_size);
801 void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl);
802 int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
803                 const struct blk_mq_ops *ops, unsigned int nr_maps,
804                 unsigned int cmd_size);
805 void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl);
806
807 void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
808
809 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
810                 volatile union nvme_result *res);
811
812 void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl);
813 void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl);
814 void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl);
815 void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl);
816 void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl);
817 void nvme_sync_queues(struct nvme_ctrl *ctrl);
818 void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
819 void nvme_unfreeze(struct nvme_ctrl *ctrl);
820 void nvme_wait_freeze(struct nvme_ctrl *ctrl);
821 int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
822 void nvme_start_freeze(struct nvme_ctrl *ctrl);
823
824 static inline enum req_op nvme_req_op(struct nvme_command *cmd)
825 {
826         return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
827 }
828
829 #define NVME_QID_ANY -1
830 void nvme_init_request(struct request *req, struct nvme_command *cmd);
831 void nvme_cleanup_cmd(struct request *req);
832 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
833 blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
834                 struct request *req);
835 bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
836                 bool queue_live, enum nvme_ctrl_state state);
837
838 static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
839                 bool queue_live)
840 {
841         enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
842
843         if (likely(state == NVME_CTRL_LIVE))
844                 return true;
845         if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING)
846                 return queue_live;
847         return __nvme_check_ready(ctrl, rq, queue_live, state);
848 }
849
850 /*
851  * NSID shall be unique for all shared namespaces, or if at least one of the
852  * following conditions is met:
853  *   1. Namespace Management is supported by the controller
854  *   2. ANA is supported by the controller
855  *   3. NVM Set are supported by the controller
856  *
857  * In other case, private namespace are not required to report a unique NSID.
858  */
859 static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl,
860                 struct nvme_ns_head *head)
861 {
862         return head->shared ||
863                 (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) ||
864                 (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) ||
865                 (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS);
866 }
867
868 /*
869  * Flags for __nvme_submit_sync_cmd()
870  */
871 typedef __u32 __bitwise nvme_submit_flags_t;
872
873 enum {
874         /* Insert request at the head of the queue */
875         NVME_SUBMIT_AT_HEAD  = (__force nvme_submit_flags_t)(1 << 0),
876         /* Set BLK_MQ_REQ_NOWAIT when allocating request */
877         NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1),
878         /* Set BLK_MQ_REQ_RESERVED when allocating request */
879         NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2),
880         /* Retry command when NVME_STATUS_DNR is not set in the result */
881         NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3),
882 };
883
884 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
885                 void *buf, unsigned bufflen);
886 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
887                 union nvme_result *result, void *buffer, unsigned bufflen,
888                 int qid, nvme_submit_flags_t flags);
889 int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
890                       unsigned int dword11, void *buffer, size_t buflen,
891                       u32 *result);
892 int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
893                       unsigned int dword11, void *buffer, size_t buflen,
894                       u32 *result);
895 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
896 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
897 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
898 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
899 int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
900 void nvme_queue_scan(struct nvme_ctrl *ctrl);
901 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
902                 void *log, size_t size, u64 offset);
903 bool nvme_tryget_ns_head(struct nvme_ns_head *head);
904 void nvme_put_ns_head(struct nvme_ns_head *head);
905 int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
906                 const struct file_operations *fops, struct module *owner);
907 void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
908 int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
909                 unsigned int cmd, unsigned long arg);
910 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
911 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
912                 unsigned int cmd, unsigned long arg);
913 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
914                 unsigned long arg);
915 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
916                 unsigned long arg);
917 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
918                 struct io_comp_batch *iob, unsigned int poll_flags);
919 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
920                 unsigned int issue_flags);
921 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
922                 unsigned int issue_flags);
923 int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
924                 struct nvme_id_ns **id);
925 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
926 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
927
928 extern const struct attribute_group *nvme_ns_attr_groups[];
929 extern const struct pr_ops nvme_pr_ops;
930 extern const struct block_device_operations nvme_ns_head_ops;
931 extern const struct attribute_group nvme_dev_attrs_group;
932 extern const struct attribute_group *nvme_subsys_attrs_groups[];
933 extern const struct attribute_group *nvme_dev_attr_groups[];
934 extern const struct block_device_operations nvme_bdev_ops;
935
936 void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
937 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
938 #ifdef CONFIG_NVME_MULTIPATH
939 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
940 {
941         return ctrl->ana_log_buf != NULL;
942 }
943
944 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
945 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
946 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
947 void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
948 void nvme_failover_req(struct request *req);
949 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
950 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
951 void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
952 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
953 int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
954 void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
955 void nvme_mpath_update(struct nvme_ctrl *ctrl);
956 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
957 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
958 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
959 void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
960 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
961 void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
962 void nvme_mpath_start_request(struct request *rq);
963 void nvme_mpath_end_request(struct request *rq);
964
965 static inline void nvme_trace_bio_complete(struct request *req)
966 {
967         struct nvme_ns *ns = req->q->queuedata;
968
969         if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio)
970                 trace_block_bio_complete(ns->head->disk->queue, req->bio);
971 }
972
973 extern bool multipath;
974 extern struct device_attribute dev_attr_ana_grpid;
975 extern struct device_attribute dev_attr_ana_state;
976 extern struct device_attribute subsys_attr_iopolicy;
977
978 static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
979 {
980         return disk->fops == &nvme_ns_head_ops;
981 }
982 #else
983 #define multipath false
984 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
985 {
986         return false;
987 }
988 static inline void nvme_failover_req(struct request *req)
989 {
990 }
991 static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
992 {
993 }
994 static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
995                 struct nvme_ns_head *head)
996 {
997         return 0;
998 }
999 static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
1000 {
1001 }
1002 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
1003 {
1004 }
1005 static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
1006 {
1007         return false;
1008 }
1009 static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
1010 {
1011 }
1012 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
1013 {
1014 }
1015 static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
1016 {
1017 }
1018 static inline void nvme_trace_bio_complete(struct request *req)
1019 {
1020 }
1021 static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
1022 {
1023 }
1024 static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
1025                 struct nvme_id_ctrl *id)
1026 {
1027         if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
1028                 dev_warn(ctrl->device,
1029 "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
1030         return 0;
1031 }
1032 static inline void nvme_mpath_update(struct nvme_ctrl *ctrl)
1033 {
1034 }
1035 static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
1036 {
1037 }
1038 static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
1039 {
1040 }
1041 static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
1042 {
1043 }
1044 static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
1045 {
1046 }
1047 static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
1048 {
1049 }
1050 static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
1051 {
1052 }
1053 static inline void nvme_mpath_start_request(struct request *rq)
1054 {
1055 }
1056 static inline void nvme_mpath_end_request(struct request *rq)
1057 {
1058 }
1059 static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
1060 {
1061         return false;
1062 }
1063 #endif /* CONFIG_NVME_MULTIPATH */
1064
1065 int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
1066                 enum blk_unique_id type);
1067
1068 struct nvme_zone_info {
1069         u64 zone_size;
1070         unsigned int max_open_zones;
1071         unsigned int max_active_zones;
1072 };
1073
1074 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
1075                 unsigned int nr_zones, report_zones_cb cb, void *data);
1076 int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
1077                 struct nvme_zone_info *zi);
1078 void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
1079                 struct nvme_zone_info *zi);
1080 #ifdef CONFIG_BLK_DEV_ZONED
1081 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
1082                                        struct nvme_command *cmnd,
1083                                        enum nvme_zone_mgmt_action action);
1084 #else
1085 static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
1086                 struct request *req, struct nvme_command *cmnd,
1087                 enum nvme_zone_mgmt_action action)
1088 {
1089         return BLK_STS_NOTSUPP;
1090 }
1091 #endif
1092
1093 static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
1094 {
1095         struct gendisk *disk = dev_to_disk(dev);
1096
1097         WARN_ON(nvme_disk_is_ns_head(disk));
1098         return disk->private_data;
1099 }
1100
1101 #ifdef CONFIG_NVME_HWMON
1102 int nvme_hwmon_init(struct nvme_ctrl *ctrl);
1103 void nvme_hwmon_exit(struct nvme_ctrl *ctrl);
1104 #else
1105 static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl)
1106 {
1107         return 0;
1108 }
1109
1110 static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
1111 {
1112 }
1113 #endif
1114
1115 static inline void nvme_start_request(struct request *rq)
1116 {
1117         if (rq->cmd_flags & REQ_NVME_MPATH)
1118                 nvme_mpath_start_request(rq);
1119         blk_mq_start_request(rq);
1120 }
1121
1122 static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
1123 {
1124         return ctrl->sgls & ((1 << 0) | (1 << 1));
1125 }
1126
1127 #ifdef CONFIG_NVME_HOST_AUTH
1128 int __init nvme_init_auth(void);
1129 void __exit nvme_exit_auth(void);
1130 int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
1131 void nvme_auth_stop(struct nvme_ctrl *ctrl);
1132 int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
1133 int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
1134 void nvme_auth_free(struct nvme_ctrl *ctrl);
1135 #else
1136 static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
1137 {
1138         return 0;
1139 }
1140 static inline int __init nvme_init_auth(void)
1141 {
1142         return 0;
1143 }
1144 static inline void __exit nvme_exit_auth(void)
1145 {
1146 }
1147 static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
1148 static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
1149 {
1150         return -EPROTONOSUPPORT;
1151 }
1152 static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
1153 {
1154         return -EPROTONOSUPPORT;
1155 }
1156 static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
1157 #endif
1158
1159 u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1160                          u8 opcode);
1161 u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
1162 int nvme_execute_rq(struct request *rq, bool at_head);
1163 void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
1164                        struct nvme_command *cmd, int status);
1165 struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
1166 struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
1167 bool nvme_get_ns(struct nvme_ns *ns);
1168 void nvme_put_ns(struct nvme_ns *ns);
1169
1170 static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
1171 {
1172         return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
1173 }
1174
1175 #ifdef CONFIG_NVME_VERBOSE_ERRORS
1176 const char *nvme_get_error_status_str(u16 status);
1177 const char *nvme_get_opcode_str(u8 opcode);
1178 const char *nvme_get_admin_opcode_str(u8 opcode);
1179 const char *nvme_get_fabrics_opcode_str(u8 opcode);
1180 #else /* CONFIG_NVME_VERBOSE_ERRORS */
1181 static inline const char *nvme_get_error_status_str(u16 status)
1182 {
1183         return "I/O Error";
1184 }
1185 static inline const char *nvme_get_opcode_str(u8 opcode)
1186 {
1187         return "I/O Cmd";
1188 }
1189 static inline const char *nvme_get_admin_opcode_str(u8 opcode)
1190 {
1191         return "Admin Cmd";
1192 }
1193
1194 static inline const char *nvme_get_fabrics_opcode_str(u8 opcode)
1195 {
1196         return "Fabrics Cmd";
1197 }
1198 #endif /* CONFIG_NVME_VERBOSE_ERRORS */
1199
1200 static inline const char *nvme_opcode_str(int qid, u8 opcode)
1201 {
1202         return qid ? nvme_get_opcode_str(opcode) :
1203                 nvme_get_admin_opcode_str(opcode);
1204 }
1205
1206 static inline const char *nvme_fabrics_opcode_str(
1207                 int qid, const struct nvme_command *cmd)
1208 {
1209         if (nvme_is_fabrics(cmd))
1210                 return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype);
1211
1212         return nvme_opcode_str(qid, cmd->common.opcode);
1213 }
1214 #endif /* _NVME_H */