Commit | Line | Data |
---|---|---|
32acab31 CH |
1 | /* |
2 | * Copyright (c) 2017 Christoph Hellwig. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify it | |
5 | * under the terms and conditions of the GNU General Public License, | |
6 | * version 2, as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope it will be useful, but WITHOUT | |
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
11 | * more details. | |
12 | */ | |
13 | ||
14 | #include <linux/moduleparam.h> | |
15 | #include "nvme.h" | |
16 | ||
17 | static bool multipath = true; | |
18 | module_param(multipath, bool, 0644); | |
19 | MODULE_PARM_DESC(multipath, | |
20 | "turn on native support for multiple controllers per subsystem"); | |
21 | ||
22 | void nvme_failover_req(struct request *req) | |
23 | { | |
24 | struct nvme_ns *ns = req->q->queuedata; | |
25 | unsigned long flags; | |
26 | ||
27 | spin_lock_irqsave(&ns->head->requeue_lock, flags); | |
28 | blk_steal_bios(&ns->head->requeue_list, req); | |
29 | spin_unlock_irqrestore(&ns->head->requeue_lock, flags); | |
30 | blk_mq_end_request(req, 0); | |
31 | ||
32 | nvme_reset_ctrl(ns->ctrl); | |
33 | kblockd_schedule_work(&ns->head->requeue_work); | |
34 | } | |
35 | ||
908e4564 | 36 | bool nvme_req_needs_failover(struct request *req, blk_status_t error) |
32acab31 CH |
37 | { |
38 | if (!(req->cmd_flags & REQ_NVME_MPATH)) | |
39 | return false; | |
e1f425e7 | 40 | return blk_path_error(error); |
32acab31 CH |
41 | } |
42 | ||
43 | void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) | |
44 | { | |
45 | struct nvme_ns *ns; | |
46 | ||
47 | mutex_lock(&ctrl->namespaces_mutex); | |
48 | list_for_each_entry(ns, &ctrl->namespaces, list) { | |
49 | if (ns->head->disk) | |
50 | kblockd_schedule_work(&ns->head->requeue_work); | |
51 | } | |
52 | mutex_unlock(&ctrl->namespaces_mutex); | |
53 | } | |
54 | ||
55 | static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) | |
56 | { | |
57 | struct nvme_ns *ns; | |
58 | ||
59 | list_for_each_entry_rcu(ns, &head->list, siblings) { | |
60 | if (ns->ctrl->state == NVME_CTRL_LIVE) { | |
61 | rcu_assign_pointer(head->current_path, ns); | |
62 | return ns; | |
63 | } | |
64 | } | |
65 | ||
66 | return NULL; | |
67 | } | |
68 | ||
69 | inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) | |
70 | { | |
71 | struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); | |
72 | ||
73 | if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) | |
74 | ns = __nvme_find_path(head); | |
75 | return ns; | |
76 | } | |
77 | ||
78 | static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, | |
79 | struct bio *bio) | |
80 | { | |
81 | struct nvme_ns_head *head = q->queuedata; | |
82 | struct device *dev = disk_to_dev(head->disk); | |
83 | struct nvme_ns *ns; | |
84 | blk_qc_t ret = BLK_QC_T_NONE; | |
85 | int srcu_idx; | |
86 | ||
87 | srcu_idx = srcu_read_lock(&head->srcu); | |
88 | ns = nvme_find_path(head); | |
89 | if (likely(ns)) { | |
90 | bio->bi_disk = ns->disk; | |
91 | bio->bi_opf |= REQ_NVME_MPATH; | |
92 | ret = direct_make_request(bio); | |
93 | } else if (!list_empty_careful(&head->list)) { | |
89c4aff6 | 94 | dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); |
32acab31 CH |
95 | |
96 | spin_lock_irq(&head->requeue_lock); | |
97 | bio_list_add(&head->requeue_list, bio); | |
98 | spin_unlock_irq(&head->requeue_lock); | |
99 | } else { | |
100 | dev_warn_ratelimited(dev, "no path - failing I/O\n"); | |
101 | ||
102 | bio->bi_status = BLK_STS_IOERR; | |
103 | bio_endio(bio); | |
104 | } | |
105 | ||
106 | srcu_read_unlock(&head->srcu, srcu_idx); | |
107 | return ret; | |
108 | } | |
109 | ||
110 | static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) | |
111 | { | |
112 | struct nvme_ns_head *head = q->queuedata; | |
113 | struct nvme_ns *ns; | |
114 | bool found = false; | |
115 | int srcu_idx; | |
116 | ||
117 | srcu_idx = srcu_read_lock(&head->srcu); | |
118 | ns = srcu_dereference(head->current_path, &head->srcu); | |
119 | if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) | |
120 | found = ns->queue->poll_fn(q, qc); | |
121 | srcu_read_unlock(&head->srcu, srcu_idx); | |
122 | return found; | |
123 | } | |
124 | ||
125 | static void nvme_requeue_work(struct work_struct *work) | |
126 | { | |
127 | struct nvme_ns_head *head = | |
128 | container_of(work, struct nvme_ns_head, requeue_work); | |
129 | struct bio *bio, *next; | |
130 | ||
131 | spin_lock_irq(&head->requeue_lock); | |
132 | next = bio_list_get(&head->requeue_list); | |
133 | spin_unlock_irq(&head->requeue_lock); | |
134 | ||
135 | while ((bio = next) != NULL) { | |
136 | next = bio->bi_next; | |
137 | bio->bi_next = NULL; | |
138 | ||
139 | /* | |
140 | * Reset disk to the mpath node and resubmit to select a new | |
141 | * path. | |
142 | */ | |
143 | bio->bi_disk = head->disk; | |
144 | generic_make_request(bio); | |
145 | } | |
146 | } | |
147 | ||
148 | int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) | |
149 | { | |
150 | struct request_queue *q; | |
151 | bool vwc = false; | |
152 | ||
153 | bio_list_init(&head->requeue_list); | |
154 | spin_lock_init(&head->requeue_lock); | |
155 | INIT_WORK(&head->requeue_work, nvme_requeue_work); | |
156 | ||
157 | /* | |
158 | * Add a multipath node if the subsystems supports multiple controllers. | |
159 | * We also do this for private namespaces as the namespace sharing data could | |
160 | * change after a rescan. | |
161 | */ | |
162 | if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) | |
163 | return 0; | |
164 | ||
165 | q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); | |
166 | if (!q) | |
167 | goto out; | |
168 | q->queuedata = head; | |
169 | blk_queue_make_request(q, nvme_ns_head_make_request); | |
170 | q->poll_fn = nvme_ns_head_poll; | |
171 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); | |
172 | /* set to a default value for 512 until disk is validated */ | |
173 | blk_queue_logical_block_size(q, 512); | |
174 | ||
175 | /* we need to propagate up the VMC settings */ | |
176 | if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) | |
177 | vwc = true; | |
178 | blk_queue_write_cache(q, vwc, vwc); | |
179 | ||
180 | head->disk = alloc_disk(0); | |
181 | if (!head->disk) | |
182 | goto out_cleanup_queue; | |
183 | head->disk->fops = &nvme_ns_head_ops; | |
184 | head->disk->private_data = head; | |
185 | head->disk->queue = q; | |
186 | head->disk->flags = GENHD_FL_EXT_DEVT; | |
187 | sprintf(head->disk->disk_name, "nvme%dn%d", | |
188 | ctrl->subsys->instance, head->instance); | |
189 | return 0; | |
190 | ||
191 | out_cleanup_queue: | |
192 | blk_cleanup_queue(q); | |
193 | out: | |
194 | return -ENOMEM; | |
195 | } | |
196 | ||
197 | void nvme_mpath_add_disk(struct nvme_ns_head *head) | |
198 | { | |
199 | if (!head->disk) | |
200 | return; | |
9bd82b1a BS |
201 | |
202 | mutex_lock(&head->subsys->lock); | |
203 | if (!(head->disk->flags & GENHD_FL_UP)) { | |
204 | device_add_disk(&head->subsys->dev, head->disk); | |
205 | if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, | |
206 | &nvme_ns_id_attr_group)) | |
207 | pr_warn("%s: failed to create sysfs group for identification\n", | |
208 | head->disk->disk_name); | |
209 | } | |
210 | mutex_unlock(&head->subsys->lock); | |
32acab31 CH |
211 | } |
212 | ||
213 | void nvme_mpath_remove_disk(struct nvme_ns_head *head) | |
214 | { | |
215 | if (!head->disk) | |
216 | return; | |
5b85b826 CH |
217 | sysfs_remove_group(&disk_to_dev(head->disk)->kobj, |
218 | &nvme_ns_id_attr_group); | |
32acab31 CH |
219 | del_gendisk(head->disk); |
220 | blk_set_queue_dying(head->disk->queue); | |
221 | /* make sure all pending bios are cleaned up */ | |
222 | kblockd_schedule_work(&head->requeue_work); | |
223 | flush_work(&head->requeue_work); | |
224 | blk_cleanup_queue(head->disk->queue); | |
225 | put_disk(head->disk); | |
226 | } |